summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/adfs/adfs.h1
-rw-r--r--fs/adfs/dir.c2
-rw-r--r--fs/adfs/dir_fplus.c9
-rw-r--r--fs/afs/main.c4
-rw-r--r--fs/aio.c19
-rw-r--r--fs/autofs4/autofs_i.h63
-rw-r--r--fs/autofs4/expire.c1
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c10
-rw-r--r--fs/befs/linuxvfs.c8
-rw-r--r--fs/bfs/bfs.h1
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/bfs/inode.c8
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/dev-replace.c5
-rw-r--r--fs/btrfs/disk-io.c5
-rw-r--r--fs/btrfs/extent-tree.c5
-rw-r--r--fs/btrfs/extent_io.c10
-rw-r--r--fs/btrfs/ioctl.c37
-rw-r--r--fs/btrfs/ordered-data.c11
-rw-r--r--fs/btrfs/print-tree.c9
-rw-r--r--fs/btrfs/raid56.c5
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/sysfs.c32
-rw-r--r--fs/btrfs/sysfs.h4
-rw-r--r--fs/btrfs/transaction.c12
-rw-r--r--fs/btrfs/volumes.c32
-rw-r--r--fs/btrfs/zlib.c2
-rw-r--r--fs/buffer.c11
-rw-r--r--fs/cifs/cifs_debug.c2
-rw-r--r--fs/cifs/cifs_unicode.c7
-rw-r--r--fs/cifs/cifsfs.c17
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h19
-rw-r--r--fs/cifs/cifsproto.h4
-rw-r--r--fs/cifs/cifssmb.c119
-rw-r--r--fs/cifs/connect.c18
-rw-r--r--fs/cifs/file.c881
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/cifs/misc.c15
-rw-r--r--fs/cifs/sess.c1192
-rw-r--r--fs/cifs/smb1ops.c8
-rw-r--r--fs/cifs/smb2inode.c2
-rw-r--r--fs/cifs/smb2maperror.c2
-rw-r--r--fs/cifs/smb2misc.c6
-rw-r--r--fs/cifs/smb2ops.c73
-rw-r--r--fs/cifs/smb2pdu.c94
-rw-r--r--fs/cifs/smb2proto.h2
-rw-r--r--fs/cifs/smb2transport.c5
-rw-r--r--fs/cifs/transport.c25
-rw-r--r--fs/coda/cache.c2
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c3
-rw-r--r--fs/coda/file.c2
-rw-r--r--fs/coda/inode.c4
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/coda/psdev.c2
-rw-r--r--fs/coda/upcall.c2
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/cramfs/inode.c45
-rw-r--r--fs/cramfs/uncompress.c10
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/debugfs/inode.c39
-rw-r--r--fs/direct-io.c23
-rw-r--r--fs/dlm/debug_fs.c15
-rw-r--r--fs/efs/namei.c11
-rw-r--r--fs/exec.c10
-rw-r--r--fs/exofs/ore_raid.c2
-rw-r--r--fs/ext4/balloc.c17
-rw-r--r--fs/ext4/dir.c25
-rw-r--r--fs/ext4/ext4.h14
-rw-r--r--fs/ext4/extents.c14
-rw-r--r--fs/ext4/extents_status.c4
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/ialloc.c37
-rw-r--r--fs/ext4/indirect.c285
-rw-r--r--fs/ext4/inline.c18
-rw-r--r--fs/ext4/inode.c130
-rw-r--r--fs/ext4/mballoc.c53
-rw-r--r--fs/ext4/migrate.c7
-rw-r--r--fs/ext4/move_extent.c3
-rw-r--r--fs/ext4/super.c146
-rw-r--r--fs/f2fs/acl.c6
-rw-r--r--fs/f2fs/checkpoint.c178
-rw-r--r--fs/f2fs/data.c82
-rw-r--r--fs/f2fs/debug.c19
-rw-r--r--fs/f2fs/dir.c89
-rw-r--r--fs/f2fs/f2fs.h56
-rw-r--r--fs/f2fs/file.c57
-rw-r--r--fs/f2fs/gc.c7
-rw-r--r--fs/f2fs/hash.c4
-rw-r--r--fs/f2fs/inline.c1
-rw-r--r--fs/f2fs/inode.c13
-rw-r--r--fs/f2fs/namei.c259
-rw-r--r--fs/f2fs/node.c275
-rw-r--r--fs/f2fs/node.h7
-rw-r--r--fs/f2fs/recovery.c22
-rw-r--r--fs/f2fs/segment.c39
-rw-r--r--fs/f2fs/segment.h8
-rw-r--r--fs/f2fs/super.c25
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/fs-writeback.c3
-rw-r--r--fs/fscache/cookie.c7
-rw-r--r--fs/fscache/internal.h2
-rw-r--r--fs/fscache/main.c22
-rw-r--r--fs/fscache/page.c4
-rw-r--r--fs/fuse/dev.c51
-rw-r--r--fs/fuse/dir.c41
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/fuse/inode.c27
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/gfs2/glock.c39
-rw-r--r--fs/gfs2/glops.c4
-rw-r--r--fs/gfs2/lock_dlm.c12
-rw-r--r--fs/gfs2/ops_fstype.c11
-rw-r--r--fs/gfs2/recovery.c8
-rw-r--r--fs/gfs2/rgrp.c4
-rw-r--r--fs/gfs2/super.c8
-rw-r--r--fs/hpfs/dnode.c17
-rw-r--r--fs/inode.c8
-rw-r--r--fs/isofs/compress.c4
-rw-r--r--fs/jbd2/transaction.c15
-rw-r--r--fs/jffs2/acl.c3
-rw-r--r--fs/jffs2/compr_zlib.c7
-rw-r--r--fs/jffs2/xattr.c3
-rw-r--r--fs/kernfs/file.c71
-rw-r--r--fs/kernfs/mount.c30
-rw-r--r--fs/lockd/mon.c4
-rw-r--r--fs/locks.c26
-rw-r--r--fs/logfs/readwrite.c15
-rw-r--r--fs/mbcache.c3
-rw-r--r--fs/minix/bitmap.c2
-rw-r--r--fs/minix/inode.c4
-rw-r--r--fs/namei.c3
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c4
-rw-r--r--fs/nfs/idmap.c10
-rw-r--r--fs/nfs/inode.c82
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/nfs/nfs3acl.c43
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4namespace.c102
-rw-r--r--fs/nfs/nfs4proc.c2
-rw-r--r--fs/nfs/nfs4state.c4
-rw-r--r--fs/nfs/pagelist.c34
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/write.c343
-rw-r--r--fs/nfsd/nfs4xdr.c4
-rw-r--r--fs/nilfs2/Makefile2
-rw-r--r--fs/nilfs2/nilfs.h8
-rw-r--r--fs/nilfs2/super.c9
-rw-r--r--fs/nilfs2/sysfs.c1137
-rw-r--r--fs/nilfs2/sysfs.h176
-rw-r--r--fs/nilfs2/the_nilfs.c17
-rw-r--r--fs/nilfs2/the_nilfs.h20
-rw-r--r--fs/notify/fanotify/fanotify.c11
-rw-r--r--fs/notify/fanotify/fanotify_user.c14
-rw-r--r--fs/notify/inode_mark.c2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c4
-rw-r--r--fs/notify/notification.c37
-rw-r--r--fs/notify/vfsmount_mark.c2
-rw-r--r--fs/ntfs/file.c3
-rw-r--r--fs/ocfs2/alloc.c15
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c5
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c61
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c3
-rw-r--r--fs/ocfs2/dlm/dlmthread.c13
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c18
-rw-r--r--fs/ocfs2/move_extents.c2
-rw-r--r--fs/ocfs2/namei.c145
-rw-r--r--fs/ocfs2/ocfs2_trace.h2
-rw-r--r--fs/ocfs2/refcounttree.c10
-rw-r--r--fs/ocfs2/slot_map.c2
-rw-r--r--fs/ocfs2/super.c8
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/open.c5
-rw-r--r--fs/proc/array.c18
-rw-r--r--fs/proc/base.c181
-rw-r--r--fs/proc/fd.c2
-rw-r--r--fs/proc/generic.c32
-rw-r--r--fs/proc/internal.h9
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/proc/proc_tty.c4
-rw-r--r--fs/proc/root.c4
-rw-r--r--fs/proc/stat.c22
-rw-r--r--fs/proc/task_mmu.c27
-rw-r--r--fs/proc/vmcore.c82
-rw-r--r--fs/pstore/ram_core.c2
-rw-r--r--fs/qnx6/Makefile1
-rw-r--r--fs/qnx6/dir.c26
-rw-r--r--fs/qnx6/inode.c99
-rw-r--r--fs/qnx6/namei.c6
-rw-r--r--fs/qnx6/qnx6.h12
-rw-r--r--fs/qnx6/super_mmi.c22
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/do_balan.c2
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/ibalance.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/item_ops.c4
-rw-r--r--fs/reiserfs/lbalance.c2
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/procfs.c2
-rw-r--r--fs/reiserfs/stree.c2
-rw-r--r--fs/reiserfs/super.c9
-rw-r--r--fs/reiserfs/xattr.c22
-rw-r--r--fs/reiserfs/xattr_acl.c2
-rw-r--r--fs/reiserfs/xattr_security.c2
-rw-r--r--fs/reiserfs/xattr_trusted.c2
-rw-r--r--fs/reiserfs/xattr_user.c2
-rw-r--r--fs/romfs/super.c23
-rw-r--r--fs/seq_file.c30
-rw-r--r--fs/squashfs/file_direct.c2
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/timerfd.c77
-rw-r--r--fs/ufs/Makefile1
-rw-r--r--fs/ufs/inode.c32
-rw-r--r--fs/ufs/super.c304
-rw-r--r--fs/ufs/ufs.h10
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/xfs_bmap.c7
-rw-r--r--fs/xfs/xfs_bmap.h4
-rw-r--r--fs/xfs/xfs_bmap_util.c53
-rw-r--r--fs/xfs/xfs_bmap_util.h4
-rw-r--r--fs/xfs/xfs_btree.c82
-rw-r--r--fs/xfs/xfs_iomap.c3
-rw-r--r--fs/xfs/xfs_sb.c25
239 files changed, 6307 insertions, 2748 deletions
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index c770337c4b45..24575d9d882d 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -153,6 +153,7 @@ extern int adfs_map_lookup(struct super_block *sb, unsigned int frag_id, unsigne
extern unsigned int adfs_map_free(struct super_block *sb);
/* Misc */
+__printf(3, 4)
void __adfs_error(struct super_block *sb, const char *function,
const char *fmt, ...);
#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt)
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 0d138c0de293..51c279a29845 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -138,7 +138,7 @@ adfs_dir_lookup_byname(struct inode *inode, struct qstr *name, struct object_inf
goto out;
if (ADFS_I(inode)->parent_id != dir.parent_id) {
- adfs_error(sb, "parent directory changed under me! (%lx but got %lx)\n",
+ adfs_error(sb, "parent directory changed under me! (%lx but got %x)\n",
ADFS_I(inode)->parent_id, dir.parent_id);
ret = -EIO;
goto free_out;
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index d9e3bee4e653..f2ba88ab4aed 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -55,10 +55,10 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
}
size >>= sb->s_blocksize_bits;
- if (size > sizeof(dir->bh)/sizeof(dir->bh[0])) {
+ if (size > ARRAY_SIZE(dir->bh)) {
/* this directory is too big for fixed bh set, must allocate */
struct buffer_head **bh_fplus =
- kzalloc(size * sizeof(struct buffer_head *),
+ kcalloc(size, sizeof(struct buffer_head *),
GFP_KERNEL);
if (!bh_fplus) {
adfs_error(sb, "not enough memory for"
@@ -79,9 +79,8 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
dir->bh_fplus[blk] = sb_bread(sb, block);
if (!dir->bh_fplus[blk]) {
- adfs_error(sb, "dir object %X failed read for"
- " offset %d, mapped block %X",
- id, blk, block);
+ adfs_error(sb, "dir object %x failed read for offset %d, mapped block %lX",
+ id, blk, block);
goto out;
}
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 42dd2e499ed8..35de0c04729f 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -55,13 +55,13 @@ static int __init afs_get_client_UUID(void)
afs_uuid.time_low = uuidtime;
afs_uuid.time_mid = uuidtime >> 32;
afs_uuid.time_hi_and_version = (uuidtime >> 48) & AFS_UUID_TIMEHI_MASK;
- afs_uuid.time_hi_and_version = AFS_UUID_VERSION_TIME;
+ afs_uuid.time_hi_and_version |= AFS_UUID_VERSION_TIME;
get_random_bytes(&clockseq, 2);
afs_uuid.clock_seq_low = clockseq;
afs_uuid.clock_seq_hi_and_reserved =
(clockseq >> 8) & AFS_UUID_CLOCKHI_MASK;
- afs_uuid.clock_seq_hi_and_reserved = AFS_UUID_VARIANT_STD;
+ afs_uuid.clock_seq_hi_and_reserved |= AFS_UUID_VARIANT_STD;
_debug("AFS UUID: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
afs_uuid.time_low,
diff --git a/fs/aio.c b/fs/aio.c
index 4f078c054b41..bd7ec2cc2674 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -506,6 +506,8 @@ static void free_ioctx(struct work_struct *work)
aio_free_ring(ctx);
free_percpu(ctx->cpu);
+ percpu_ref_exit(&ctx->reqs);
+ percpu_ref_exit(&ctx->users);
kmem_cache_free(kioctx_cachep, ctx);
}
@@ -715,8 +717,8 @@ err_ctx:
err:
mutex_unlock(&ctx->ring_lock);
free_percpu(ctx->cpu);
- free_percpu(ctx->reqs.pcpu_count);
- free_percpu(ctx->users.pcpu_count);
+ percpu_ref_exit(&ctx->reqs);
+ percpu_ref_exit(&ctx->users);
kmem_cache_free(kioctx_cachep, ctx);
pr_debug("error allocating ioctx %d\n", err);
return ERR_PTR(err);
@@ -830,16 +832,20 @@ void exit_aio(struct mm_struct *mm)
static void put_reqs_available(struct kioctx *ctx, unsigned nr)
{
struct kioctx_cpu *kcpu;
+ unsigned long flags;
preempt_disable();
kcpu = this_cpu_ptr(ctx->cpu);
+ local_irq_save(flags);
kcpu->reqs_available += nr;
+
while (kcpu->reqs_available >= ctx->req_batch * 2) {
kcpu->reqs_available -= ctx->req_batch;
atomic_add(ctx->req_batch, &ctx->reqs_available);
}
+ local_irq_restore(flags);
preempt_enable();
}
@@ -847,10 +853,12 @@ static bool get_reqs_available(struct kioctx *ctx)
{
struct kioctx_cpu *kcpu;
bool ret = false;
+ unsigned long flags;
preempt_disable();
kcpu = this_cpu_ptr(ctx->cpu);
+ local_irq_save(flags);
if (!kcpu->reqs_available) {
int old, avail = atomic_read(&ctx->reqs_available);
@@ -869,6 +877,7 @@ static bool get_reqs_available(struct kioctx *ctx)
ret = true;
kcpu->reqs_available--;
out:
+ local_irq_restore(flags);
preempt_enable();
return ret;
}
@@ -1021,6 +1030,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
/* everything turned out well, dispose of the aiocb. */
kiocb_free(iocb);
+ put_reqs_available(ctx, 1);
/*
* We have to order our ring_info tail store above and test
@@ -1062,6 +1072,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
if (head == tail)
goto out;
+ head %= ctx->nr_events;
+ tail %= ctx->nr_events;
+
while (ret < nr) {
long avail;
struct io_event *ev;
@@ -1100,8 +1113,6 @@ static long aio_read_events_ring(struct kioctx *ctx,
flush_dcache_page(ctx->ring_pages[0]);
pr_debug("%li h%u t%u\n", ret, head, tail);
-
- put_reqs_available(ctx, ret);
out:
mutex_unlock(&ctx->ring_lock);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index acf32054edd8..9e359fb20c0a 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -143,20 +143,6 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
}
-/* Does a dentry have some pending activity? */
-static inline int autofs4_ispending(struct dentry *dentry)
-{
- struct autofs_info *inf = autofs4_dentry_ino(dentry);
-
- if (inf->flags & AUTOFS_INF_PENDING)
- return 1;
-
- if (inf->flags & AUTOFS_INF_EXPIRING)
- return 1;
-
- return 0;
-}
-
struct inode *autofs4_get_inode(struct super_block *, umode_t);
void autofs4_free_ino(struct autofs_info *);
@@ -191,55 +177,6 @@ extern const struct file_operations autofs4_root_operations;
extern const struct dentry_operations autofs4_dentry_operations;
/* VFS automount flags management functions */
-
-static inline void __managed_dentry_set_automount(struct dentry *dentry)
-{
- dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
-}
-
-static inline void managed_dentry_set_automount(struct dentry *dentry)
-{
- spin_lock(&dentry->d_lock);
- __managed_dentry_set_automount(dentry);
- spin_unlock(&dentry->d_lock);
-}
-
-static inline void __managed_dentry_clear_automount(struct dentry *dentry)
-{
- dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT;
-}
-
-static inline void managed_dentry_clear_automount(struct dentry *dentry)
-{
- spin_lock(&dentry->d_lock);
- __managed_dentry_clear_automount(dentry);
- spin_unlock(&dentry->d_lock);
-}
-
-static inline void __managed_dentry_set_transit(struct dentry *dentry)
-{
- dentry->d_flags |= DCACHE_MANAGE_TRANSIT;
-}
-
-static inline void managed_dentry_set_transit(struct dentry *dentry)
-{
- spin_lock(&dentry->d_lock);
- __managed_dentry_set_transit(dentry);
- spin_unlock(&dentry->d_lock);
-}
-
-static inline void __managed_dentry_clear_transit(struct dentry *dentry)
-{
- dentry->d_flags &= ~DCACHE_MANAGE_TRANSIT;
-}
-
-static inline void managed_dentry_clear_transit(struct dentry *dentry)
-{
- spin_lock(&dentry->d_lock);
- __managed_dentry_clear_transit(dentry);
- spin_unlock(&dentry->d_lock);
-}
-
static inline void __managed_dentry_set_managed(struct dentry *dentry)
{
dentry->d_flags |= (DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 394e90b02c5e..a7be57e39be7 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -333,7 +333,6 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
if (ino->flags & AUTOFS_INF_PENDING)
goto out;
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
- struct autofs_info *ino = autofs4_dentry_ino(root);
ino->flags |= AUTOFS_INF_EXPIRING;
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index d7bd395ab586..1c55388ae633 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -210,7 +210,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
int pipefd;
struct autofs_sb_info *sbi;
struct autofs_info *ino;
- int pgrp;
+ int pgrp = 0;
bool pgrp_set = false;
int ret = -EINVAL;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index cc87c1abac97..cdb25ebccc4c 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -166,8 +166,10 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
const unsigned char *str = name->name;
struct list_head *p, *head;
- spin_lock(&sbi->lookup_lock);
head = &sbi->active_list;
+ if (list_empty(head))
+ return NULL;
+ spin_lock(&sbi->lookup_lock);
list_for_each(p, head) {
struct autofs_info *ino;
struct dentry *active;
@@ -218,8 +220,10 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
const unsigned char *str = name->name;
struct list_head *p, *head;
- spin_lock(&sbi->lookup_lock);
head = &sbi->expiring_list;
+ if (list_empty(head))
+ return NULL;
+ spin_lock(&sbi->lookup_lock);
list_for_each(p, head) {
struct autofs_info *ino;
struct dentry *expiring;
@@ -373,7 +377,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
* this because the leaves of the directory tree under the
* mount never trigger mounts themselves (they have an autofs
* trigger mount mounted on them). But v4 pseudo direct mounts
- * do need the leaves to to trigger mounts. In this case we
+ * do need the leaves to trigger mounts. In this case we
* have no choice but to use the list_empty() check and
* require user space behave.
*/
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index a16fbd4e8241..4cf61ec6b7a8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -799,13 +799,11 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
befs_debug(sb, "---> %s", __func__);
-#ifndef CONFIG_BEFS_RW
if (!(sb->s_flags & MS_RDONLY)) {
befs_warning(sb,
"No write support. Marking filesystem read-only");
sb->s_flags |= MS_RDONLY;
}
-#endif /* CONFIG_BEFS_RW */
/*
* Set dummy blocksize to read super block.
@@ -834,16 +832,14 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
(befs_super_block *) ((void *) bh->b_data + x86_sb_off);
}
- if (befs_load_sb(sb, disk_sb) != BEFS_OK)
+ if ((befs_load_sb(sb, disk_sb) != BEFS_OK) ||
+ (befs_check_sb(sb) != BEFS_OK))
goto unacquire_bh;
befs_dump_super_block(sb, disk_sb);
brelse(bh);
- if (befs_check_sb(sb) != BEFS_OK)
- goto unacquire_priv_sbp;
-
if( befs_sb->num_blocks > ~((sector_t)0) ) {
befs_error(sb, "blocks count: %llu "
"is larger than the host can use",
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index f7f87e233dd9..f40006db36df 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -46,6 +46,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
/* inode.c */
extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino);
+extern void bfs_dump_imap(const char *, struct super_block *);
/* file.c */
extern const struct inode_operations bfs_file_inops;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index a399e6d9dc74..08063ae0a17c 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -75,8 +75,6 @@ const struct file_operations bfs_dir_operations = {
.llseek = generic_file_llseek,
};
-extern void dump_imap(const char *, struct super_block *);
-
static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
@@ -110,7 +108,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
BFS_I(inode)->i_eblock = 0;
insert_inode_hash(inode);
mark_inode_dirty(inode);
- dump_imap("create", s);
+ bfs_dump_imap("create", s);
err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len,
inode->i_ino);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 7041ac35ace8..90bc079d9982 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -30,8 +30,6 @@ MODULE_LICENSE("GPL");
#define dprintf(x...)
#endif
-void dump_imap(const char *prefix, struct super_block *s);
-
struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
{
struct bfs_inode *di;
@@ -194,7 +192,7 @@ static void bfs_evict_inode(struct inode *inode)
info->si_freeb += bi->i_eblock + 1 - bi->i_sblock;
info->si_freei++;
clear_bit(ino, info->si_imap);
- dump_imap("delete_inode", s);
+ bfs_dump_imap("delete_inode", s);
}
/*
@@ -297,7 +295,7 @@ static const struct super_operations bfs_sops = {
.statfs = bfs_statfs,
};
-void dump_imap(const char *prefix, struct super_block *s)
+void bfs_dump_imap(const char *prefix, struct super_block *s)
{
#ifdef DEBUG
int i;
@@ -443,7 +441,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
}
brelse(bh);
brelse(sbh);
- dump_imap("read_super", s);
+ bfs_dump_imap("read_super", s);
return 0;
out3:
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 92371c414228..1daea0b47187 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -821,7 +821,7 @@ static void free_workspace(int type, struct list_head *workspace)
spin_lock(workspace_lock);
if (*num_workspace < num_online_cpus()) {
- list_add_tail(workspace, idle_workspace);
+ list_add(workspace, idle_workspace);
(*num_workspace)++;
spin_unlock(workspace_lock);
goto wake;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 2af6e66fe788..eea26e1b2fda 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -36,6 +36,7 @@
#include "check-integrity.h"
#include "rcu-string.h"
#include "dev-replace.h"
+#include "sysfs.h"
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
int scrub_ret);
@@ -562,6 +563,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
fs_info->fs_devices->latest_bdev = tgt_device->bdev;
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
+ /* replace the sysfs entry */
+ btrfs_kobj_rm_device(fs_info, src_device);
+ btrfs_kobj_add_device(fs_info, tgt_device);
+
btrfs_rm_dev_replace_blocked(fs_info);
btrfs_rm_dev_replace_srcdev(fs_info, src_device);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8bb4aa19898f..08e65e9cf2aa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -369,7 +369,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
out:
unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
&cached_state, GFP_NOFS);
- btrfs_tree_read_unlock_blocking(eb);
+ if (need_lock)
+ btrfs_tree_read_unlock_blocking(eb);
return ret;
}
@@ -2904,7 +2905,9 @@ retry_root_backup:
if (ret)
goto fail_qgroup;
+ mutex_lock(&fs_info->cleaner_mutex);
ret = btrfs_recover_relocation(tree_root);
+ mutex_unlock(&fs_info->cleaner_mutex);
if (ret < 0) {
printk(KERN_WARNING
"BTRFS: failed to recover relocation\n");
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 99c253918208..813537f362f9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5678,7 +5678,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_caching_control *next;
struct btrfs_caching_control *caching_ctl;
struct btrfs_block_group_cache *cache;
- struct btrfs_space_info *space_info;
down_write(&fs_info->commit_root_sem);
@@ -5701,9 +5700,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
up_write(&fs_info->commit_root_sem);
- list_for_each_entry_rcu(space_info, &fs_info->space_info, list)
- percpu_counter_set(&space_info->total_bytes_pinned, 0);
-
update_global_block_rsv(fs_info);
}
@@ -5741,6 +5737,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
spin_lock(&cache->lock);
cache->pinned -= len;
space_info->bytes_pinned -= len;
+ percpu_counter_add(&space_info->total_bytes_pinned, -len);
if (cache->ro) {
space_info->bytes_readonly += len;
readonly = true;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a389820d158b..3e11aab9f391 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3437,16 +3437,10 @@ done_unlocked:
return 0;
}
-static int eb_wait(void *word)
-{
- io_schedule();
- return 0;
-}
-
void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
{
- wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
+ TASK_UNINTERRUPTIBLE);
}
static noinline_for_stack int
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0d321c23069a..47aceb494d1d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -136,19 +136,22 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
void btrfs_update_iflags(struct inode *inode)
{
struct btrfs_inode *ip = BTRFS_I(inode);
-
- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+ unsigned int new_fl = 0;
if (ip->flags & BTRFS_INODE_SYNC)
- inode->i_flags |= S_SYNC;
+ new_fl |= S_SYNC;
if (ip->flags & BTRFS_INODE_IMMUTABLE)
- inode->i_flags |= S_IMMUTABLE;
+ new_fl |= S_IMMUTABLE;
if (ip->flags & BTRFS_INODE_APPEND)
- inode->i_flags |= S_APPEND;
+ new_fl |= S_APPEND;
if (ip->flags & BTRFS_INODE_NOATIME)
- inode->i_flags |= S_NOATIME;
+ new_fl |= S_NOATIME;
if (ip->flags & BTRFS_INODE_DIRSYNC)
- inode->i_flags |= S_DIRSYNC;
+ new_fl |= S_DIRSYNC;
+
+ set_mask_bits(&inode->i_flags,
+ S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
+ new_fl);
}
/*
@@ -3139,7 +3142,6 @@ out:
static void clone_update_extent_map(struct inode *inode,
const struct btrfs_trans_handle *trans,
const struct btrfs_path *path,
- struct btrfs_file_extent_item *fi,
const u64 hole_offset,
const u64 hole_len)
{
@@ -3154,7 +3156,11 @@ static void clone_update_extent_map(struct inode *inode,
return;
}
- if (fi) {
+ if (path) {
+ struct btrfs_file_extent_item *fi;
+
+ fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
btrfs_extent_item_to_extent_map(inode, path, fi, false, em);
em->generation = -1;
if (btrfs_file_extent_type(path->nodes[0], fi) ==
@@ -3508,18 +3514,15 @@ process_slot:
btrfs_item_ptr_offset(leaf, slot),
size);
inode_add_bytes(inode, datal);
- extent = btrfs_item_ptr(leaf, slot,
- struct btrfs_file_extent_item);
}
/* If we have an implicit hole (NO_HOLES feature). */
if (drop_start < new_key.offset)
clone_update_extent_map(inode, trans,
- path, NULL, drop_start,
+ NULL, drop_start,
new_key.offset - drop_start);
- clone_update_extent_map(inode, trans, path,
- extent, 0, 0);
+ clone_update_extent_map(inode, trans, path, 0, 0);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
@@ -3562,12 +3565,10 @@ process_slot:
btrfs_end_transaction(trans, root);
goto out;
}
+ clone_update_extent_map(inode, trans, NULL, last_dest_end,
+ destoff + len - last_dest_end);
ret = clone_finish_inode_update(trans, inode, destoff + len,
destoff, olen);
- if (ret)
- goto out;
- clone_update_extent_map(inode, trans, path, NULL, last_dest_end,
- destoff + len - last_dest_end);
}
out:
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e12441c7cf1d..7187b14faa6c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -484,8 +484,19 @@ void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
log_list);
list_del_init(&ordered->log_list);
spin_unlock_irq(&log->log_extents_lock[index]);
+
+ if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
+ !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
+ struct inode *inode = ordered->inode;
+ u64 start = ordered->file_offset;
+ u64 end = ordered->file_offset + ordered->len - 1;
+
+ WARN_ON(!inode);
+ filemap_fdatawrite_range(inode->i_mapping, start, end);
+ }
wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
&ordered->flags));
+
btrfs_put_ordered_extent(ordered);
spin_lock_irq(&log->log_extents_lock[index]);
}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 6efd70d3b64f..9626b4ad3b9a 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -54,7 +54,7 @@ static void print_extent_data_ref(struct extent_buffer *eb,
btrfs_extent_data_ref_count(eb, ref));
}
-static void print_extent_item(struct extent_buffer *eb, int slot)
+static void print_extent_item(struct extent_buffer *eb, int slot, int type)
{
struct btrfs_extent_item *ei;
struct btrfs_extent_inline_ref *iref;
@@ -63,7 +63,6 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
struct btrfs_disk_key key;
unsigned long end;
unsigned long ptr;
- int type;
u32 item_size = btrfs_item_size_nr(eb, slot);
u64 flags;
u64 offset;
@@ -88,7 +87,8 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei),
flags);
- if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ if ((type == BTRFS_EXTENT_ITEM_KEY) &&
+ flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
struct btrfs_tree_block_info *info;
info = (struct btrfs_tree_block_info *)(ei + 1);
btrfs_tree_block_key(eb, info, &key);
@@ -223,7 +223,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
btrfs_disk_root_refs(l, ri));
break;
case BTRFS_EXTENT_ITEM_KEY:
- print_extent_item(l, i);
+ case BTRFS_METADATA_ITEM_KEY:
+ print_extent_item(l, i, type);
break;
case BTRFS_TREE_BLOCK_REF_KEY:
printk(KERN_INFO "\t\ttree block backref\n");
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 4055291a523e..4a88f073fdd7 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1956,9 +1956,10 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
* pages are going to be uptodate.
*/
for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
- if (rbio->faila == stripe ||
- rbio->failb == stripe)
+ if (rbio->faila == stripe || rbio->failb == stripe) {
+ atomic_inc(&rbio->bbio->error);
continue;
+ }
for (pagenr = 0; pagenr < nr_pages; pagenr++) {
struct page *p;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4662d92a4b73..8e16bca69c56 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -522,9 +522,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_ssd_spread:
btrfs_set_and_info(root, SSD_SPREAD,
"use spread ssd allocation scheme");
+ btrfs_set_opt(info->mount_opt, SSD);
break;
case Opt_nossd:
- btrfs_clear_and_info(root, NOSSD,
+ btrfs_set_and_info(root, NOSSD,
"not using ssd allocation scheme");
btrfs_clear_opt(info->mount_opt, SSD);
break;
@@ -1467,7 +1468,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
goto restore;
/* recover relocation */
+ mutex_lock(&fs_info->cleaner_mutex);
ret = btrfs_recover_relocation(root);
+ mutex_unlock(&fs_info->cleaner_mutex);
if (ret)
goto restore;
@@ -1808,6 +1811,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
list_for_each_entry(dev, head, dev_list) {
if (dev->missing)
continue;
+ if (!dev->name)
+ continue;
if (!first_dev || dev->devid < first_dev->devid)
first_dev = dev;
}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index df39458f1487..78699364f537 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -605,14 +605,37 @@ static void init_feature_attrs(void)
}
}
-static int add_device_membership(struct btrfs_fs_info *fs_info)
+int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
+ struct btrfs_device *one_device)
+{
+ struct hd_struct *disk;
+ struct kobject *disk_kobj;
+
+ if (!fs_info->device_dir_kobj)
+ return -EINVAL;
+
+ if (one_device) {
+ disk = one_device->bdev->bd_part;
+ disk_kobj = &part_to_dev(disk)->kobj;
+
+ sysfs_remove_link(fs_info->device_dir_kobj,
+ disk_kobj->name);
+ }
+
+ return 0;
+}
+
+int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
+ struct btrfs_device *one_device)
{
int error = 0;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *dev;
- fs_info->device_dir_kobj = kobject_create_and_add("devices",
+ if (!fs_info->device_dir_kobj)
+ fs_info->device_dir_kobj = kobject_create_and_add("devices",
&fs_info->super_kobj);
+
if (!fs_info->device_dir_kobj)
return -ENOMEM;
@@ -623,6 +646,9 @@ static int add_device_membership(struct btrfs_fs_info *fs_info)
if (!dev->bdev)
continue;
+ if (one_device && one_device != dev)
+ continue;
+
disk = dev->bdev->bd_part;
disk_kobj = &part_to_dev(disk)->kobj;
@@ -666,7 +692,7 @@ int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
if (error)
goto failure;
- error = add_device_membership(fs_info);
+ error = btrfs_kobj_add_device(fs_info, NULL);
if (error)
goto failure;
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 9ab576318a84..ac46df37504c 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -66,4 +66,8 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
extern const char * const btrfs_feature_set_names[3];
extern struct kobj_type space_info_ktype;
extern struct kobj_type btrfs_raid_ktype;
+int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
+ struct btrfs_device *one_device);
+int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
+ struct btrfs_device *one_device);
#endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 511839c04f11..5f379affdf23 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -386,11 +386,13 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
bool reloc_reserved = false;
int ret;
+ /* Send isn't supposed to start transactions. */
+ ASSERT(current->journal_info != (void *)BTRFS_SEND_TRANS_STUB);
+
if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
return ERR_PTR(-EROFS);
- if (current->journal_info &&
- current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) {
+ if (current->journal_info) {
WARN_ON(type & TRANS_EXTWRITERS);
h = current->journal_info;
h->use_count++;
@@ -491,6 +493,7 @@ again:
smp_mb();
if (cur_trans->state >= TRANS_STATE_BLOCKED &&
may_wait_transaction(root, type)) {
+ current->journal_info = h;
btrfs_commit_transaction(h, root);
goto again;
}
@@ -1615,11 +1618,6 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
int ret;
ret = btrfs_run_delayed_items(trans, root);
- /*
- * running the delayed items may have added new refs. account
- * them now so that they hinder processing of more delayed refs
- * as little as possible.
- */
if (ret)
return ret;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c83b24251e53..6cb82f62cb7c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -40,6 +40,7 @@
#include "rcu-string.h"
#include "math.h"
#include "dev-replace.h"
+#include "sysfs.h"
static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -554,12 +555,14 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
* This is ok to do without rcu read locked because we hold the
* uuid mutex so nothing we touch in here is going to disappear.
*/
- name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
- if (!name) {
- kfree(device);
- goto error;
+ if (orig_dev->name) {
+ name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
+ if (!name) {
+ kfree(device);
+ goto error;
+ }
+ rcu_assign_pointer(device->name, name);
}
- rcu_assign_pointer(device->name, name);
list_add(&device->dev_list, &fs_devices->devices);
device->fs_devices = fs_devices;
@@ -1677,8 +1680,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
if (device->bdev == root->fs_info->fs_devices->latest_bdev)
root->fs_info->fs_devices->latest_bdev = next_device->bdev;
- if (device->bdev)
+ if (device->bdev) {
device->fs_devices->open_devices--;
+ /* remove sysfs entry */
+ btrfs_kobj_rm_device(root->fs_info, device);
+ }
call_rcu(&device->rcu, free_device);
@@ -2143,9 +2149,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
total_bytes = btrfs_super_num_devices(root->fs_info->super_copy);
btrfs_set_super_num_devices(root->fs_info->super_copy,
total_bytes + 1);
+
+ /* add sysfs device entry */
+ btrfs_kobj_add_device(root->fs_info, device);
+
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
if (seeding_dev) {
+ char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
ret = init_first_rw_device(trans, root, device);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
@@ -2156,6 +2167,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
btrfs_abort_transaction(trans, root, ret);
goto error_trans;
}
+
+ /* Sprouting would change fsid of the mounted root,
+ * so rename the fsid on the sysfs
+ */
+ snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
+ root->fs_info->fsid);
+ if (kobject_rename(&root->fs_info->super_kobj, fsid_buf))
+ goto error_trans;
} else {
ret = btrfs_add_device(trans, root, device);
if (ret) {
@@ -2205,6 +2224,7 @@ error_trans:
unlock_chunks(root);
btrfs_end_transaction(trans, root);
rcu_string_free(device->name);
+ btrfs_kobj_rm_device(root->fs_info, device);
kfree(device);
error:
blkdev_put(bdev, FMODE_EXCL);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 4f196314c0c1..b67d8fc81277 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -136,7 +136,7 @@ static int zlib_compress_pages(struct list_head *ws,
if (workspace->def_strm.total_in > 8192 &&
workspace->def_strm.total_in <
workspace->def_strm.total_out) {
- ret = -EIO;
+ ret = -E2BIG;
goto out;
}
/* we need another page for writing out. Test this
diff --git a/fs/buffer.c b/fs/buffer.c
index eba6e4f621ce..8f05111bbb8b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -61,16 +61,9 @@ inline void touch_buffer(struct buffer_head *bh)
}
EXPORT_SYMBOL(touch_buffer);
-static int sleep_on_buffer(void *word)
-{
- io_schedule();
- return 0;
-}
-
void __lock_buffer(struct buffer_head *bh)
{
- wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
- TASK_UNINTERRUPTIBLE);
+ wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(__lock_buffer);
@@ -123,7 +116,7 @@ EXPORT_SYMBOL(buffer_check_dirty_writeback);
*/
void __wait_on_buffer(struct buffer_head * bh)
{
- wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
+ wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(__wait_on_buffer);
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index f3ac4154cbb6..44ec72684df5 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -213,7 +213,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
tcon->nativeFileSystem);
}
seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x"
- "\n\tPathComponentMax: %d Status: 0x%d",
+ "\n\tPathComponentMax: %d Status: %d",
le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
le32_to_cpu(tcon->fsAttrInfo.Attributes),
le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 0227b45ef00a..15e9505aa35f 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -290,7 +290,8 @@ int
cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
const struct nls_table *cp, int mapChars)
{
- int i, j, charlen;
+ int i, charlen;
+ int j = 0;
char src_char;
__le16 dst_char;
wchar_t tmp;
@@ -298,12 +299,11 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
if (!mapChars)
return cifs_strtoUTF16(target, source, PATH_MAX, cp);
- for (i = 0, j = 0; i < srclen; j++) {
+ for (i = 0; i < srclen; j++) {
src_char = source[i];
charlen = 1;
switch (src_char) {
case 0:
- put_unaligned(0, &target[j]);
goto ctoUTF16_out;
case ':':
dst_char = cpu_to_le16(UNI_COLON);
@@ -350,6 +350,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
}
ctoUTF16_out:
+ put_unaligned(0, &target[j]); /* Null terminate target unicode string */
return j;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2c90d07c0b3a..888398067420 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -725,6 +725,19 @@ out_nls:
goto out;
}
+static ssize_t
+cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ ssize_t rc;
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ rc = cifs_revalidate_mapping(inode);
+ if (rc)
+ return rc;
+
+ return generic_file_read_iter(iocb, iter);
+}
+
static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
@@ -881,7 +894,7 @@ const struct inode_operations cifs_symlink_inode_ops = {
const struct file_operations cifs_file_ops = {
.read = new_sync_read,
.write = new_sync_write,
- .read_iter = generic_file_read_iter,
+ .read_iter = cifs_loose_read_iter,
.write_iter = cifs_file_write_iter,
.open = cifs_open,
.release = cifs_close,
@@ -939,7 +952,7 @@ const struct file_operations cifs_file_direct_ops = {
const struct file_operations cifs_file_nobrl_ops = {
.read = new_sync_read,
.write = new_sync_write,
- .read_iter = generic_file_read_iter,
+ .read_iter = cifs_loose_read_iter,
.write_iter = cifs_file_write_iter,
.open = cifs_open,
.release = cifs_close,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 70f178a7c759..560480263336 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.03"
+#define CIFS_VERSION "2.04"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index de6aed8c78e5..0012e1e291d4 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -404,6 +404,11 @@ struct smb_version_operations {
const struct cifs_fid *, u32 *);
int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *,
int);
+ /* writepages retry size */
+ unsigned int (*wp_retry_size)(struct inode *);
+ /* get mtu credits */
+ int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int,
+ unsigned int *, unsigned int *);
};
struct smb_version_values {
@@ -640,6 +645,16 @@ add_credits(struct TCP_Server_Info *server, const unsigned int add,
}
static inline void
+add_credits_and_wake_if(struct TCP_Server_Info *server, const unsigned int add,
+ const int optype)
+{
+ if (add) {
+ server->ops->add_credits(server, add, optype);
+ wake_up(&server->request_q);
+ }
+}
+
+static inline void
set_credits(struct TCP_Server_Info *server, const int val)
{
server->ops->set_credits(server, val);
@@ -1044,6 +1059,7 @@ struct cifs_readdata {
struct address_space *mapping;
__u64 offset;
unsigned int bytes;
+ unsigned int got_bytes;
pid_t pid;
int result;
struct work_struct work;
@@ -1053,6 +1069,7 @@ struct cifs_readdata {
struct kvec iov;
unsigned int pagesz;
unsigned int tailsz;
+ unsigned int credits;
unsigned int nr_pages;
struct page *pages[];
};
@@ -1073,6 +1090,7 @@ struct cifs_writedata {
int result;
unsigned int pagesz;
unsigned int tailsz;
+ unsigned int credits;
unsigned int nr_pages;
struct page *pages[];
};
@@ -1398,6 +1416,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
#define CIFS_OBREAK_OP 0x0100 /* oplock break request */
#define CIFS_NEG_OP 0x0200 /* negotiate request */
#define CIFS_OP_MASK 0x0380 /* mask request type */
+#define CIFS_HAS_CREDITS 0x0400 /* already has credits */
/* Security Flags: indicate type of session setup needed */
#define CIFSSEC_MAY_SIGN 0x00001
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index ca7980a1e303..c31ce98c1704 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -36,6 +36,7 @@ extern struct smb_hdr *cifs_buf_get(void);
extern void cifs_buf_release(void *);
extern struct smb_hdr *cifs_small_buf_get(void);
extern void cifs_small_buf_release(void *);
+extern void free_rsp_buf(int, void *);
extern void cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx,
struct kvec *iov);
extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *,
@@ -89,6 +90,9 @@ extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *,
struct smb_rqst *);
extern int cifs_check_receive(struct mid_q_entry *mid,
struct TCP_Server_Info *server, bool log_error);
+extern int cifs_wait_mtu_credits(struct TCP_Server_Info *server,
+ unsigned int size, unsigned int *num,
+ unsigned int *credits);
extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
struct kvec *, int /* nvec to send */,
int * /* type of buf returned */ , const int flags);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6ce4e0954b98..66f65001a6d8 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -196,10 +196,6 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
if (rc)
goto out;
- /*
- * FIXME: check if wsize needs updated due to negotiated smb buffer
- * size shrinking
- */
atomic_inc(&tconInfoReconnectCount);
/* tell server Unix caps we support */
@@ -1517,7 +1513,6 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
return length;
server->total_read += length;
- rdata->bytes = length;
cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n",
server->total_read, buflen, data_len);
@@ -1560,12 +1555,18 @@ cifs_readv_callback(struct mid_q_entry *mid)
rc);
}
/* FIXME: should this be counted toward the initiating task? */
- task_io_account_read(rdata->bytes);
- cifs_stats_bytes_read(tcon, rdata->bytes);
+ task_io_account_read(rdata->got_bytes);
+ cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_REQUEST_SUBMITTED:
case MID_RETRY_NEEDED:
rdata->result = -EAGAIN;
+ if (server->sign && rdata->got_bytes)
+ /* reset bytes number since we can not check a sign */
+ rdata->got_bytes = 0;
+ /* FIXME: should this be counted toward the initiating task? */
+ task_io_account_read(rdata->got_bytes);
+ cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
default:
rdata->result = -EIO;
@@ -1734,10 +1735,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms,
/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
if (*buf) {
- if (resp_buf_type == CIFS_SMALL_BUFFER)
- cifs_small_buf_release(iov[0].iov_base);
- else if (resp_buf_type == CIFS_LARGE_BUFFER)
- cifs_buf_release(iov[0].iov_base);
+ free_rsp_buf(resp_buf_type, iov[0].iov_base);
} else if (resp_buf_type != CIFS_NO_BUFFER) {
/* return buffer to caller to free */
*buf = iov[0].iov_base;
@@ -1899,28 +1897,80 @@ cifs_writedata_release(struct kref *refcount)
static void
cifs_writev_requeue(struct cifs_writedata *wdata)
{
- int i, rc;
+ int i, rc = 0;
struct inode *inode = wdata->cfile->dentry->d_inode;
struct TCP_Server_Info *server;
+ unsigned int rest_len;
- for (i = 0; i < wdata->nr_pages; i++) {
- lock_page(wdata->pages[i]);
- clear_page_dirty_for_io(wdata->pages[i]);
- }
-
+ server = tlink_tcon(wdata->cfile->tlink)->ses->server;
+ i = 0;
+ rest_len = wdata->bytes;
do {
- server = tlink_tcon(wdata->cfile->tlink)->ses->server;
- rc = server->ops->async_writev(wdata, cifs_writedata_release);
- } while (rc == -EAGAIN);
+ struct cifs_writedata *wdata2;
+ unsigned int j, nr_pages, wsize, tailsz, cur_len;
+
+ wsize = server->ops->wp_retry_size(inode);
+ if (wsize < rest_len) {
+ nr_pages = wsize / PAGE_CACHE_SIZE;
+ if (!nr_pages) {
+ rc = -ENOTSUPP;
+ break;
+ }
+ cur_len = nr_pages * PAGE_CACHE_SIZE;
+ tailsz = PAGE_CACHE_SIZE;
+ } else {
+ nr_pages = DIV_ROUND_UP(rest_len, PAGE_CACHE_SIZE);
+ cur_len = rest_len;
+ tailsz = rest_len - (nr_pages - 1) * PAGE_CACHE_SIZE;
+ }
- for (i = 0; i < wdata->nr_pages; i++) {
- unlock_page(wdata->pages[i]);
- if (rc != 0) {
- SetPageError(wdata->pages[i]);
- end_page_writeback(wdata->pages[i]);
- page_cache_release(wdata->pages[i]);
+ wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
+ if (!wdata2) {
+ rc = -ENOMEM;
+ break;
}
- }
+
+ for (j = 0; j < nr_pages; j++) {
+ wdata2->pages[j] = wdata->pages[i + j];
+ lock_page(wdata2->pages[j]);
+ clear_page_dirty_for_io(wdata2->pages[j]);
+ }
+
+ wdata2->sync_mode = wdata->sync_mode;
+ wdata2->nr_pages = nr_pages;
+ wdata2->offset = page_offset(wdata2->pages[0]);
+ wdata2->pagesz = PAGE_CACHE_SIZE;
+ wdata2->tailsz = tailsz;
+ wdata2->bytes = cur_len;
+
+ wdata2->cfile = find_writable_file(CIFS_I(inode), false);
+ if (!wdata2->cfile) {
+ cifs_dbg(VFS, "No writable handles for inode\n");
+ rc = -EBADF;
+ break;
+ }
+ wdata2->pid = wdata2->cfile->pid;
+ rc = server->ops->async_writev(wdata2, cifs_writedata_release);
+
+ for (j = 0; j < nr_pages; j++) {
+ unlock_page(wdata2->pages[j]);
+ if (rc != 0 && rc != -EAGAIN) {
+ SetPageError(wdata2->pages[j]);
+ end_page_writeback(wdata2->pages[j]);
+ page_cache_release(wdata2->pages[j]);
+ }
+ }
+
+ if (rc) {
+ kref_put(&wdata2->refcount, cifs_writedata_release);
+ if (rc == -EAGAIN)
+ continue;
+ break;
+ }
+
+ rest_len -= cur_len;
+ i += nr_pages;
+ } while (i < wdata->nr_pages);
mapping_set_error(inode->i_mapping, rc);
kref_put(&wdata->refcount, cifs_writedata_release);
@@ -2203,10 +2253,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
}
/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
- if (resp_buf_type == CIFS_SMALL_BUFFER)
- cifs_small_buf_release(iov[0].iov_base);
- else if (resp_buf_type == CIFS_LARGE_BUFFER)
- cifs_buf_release(iov[0].iov_base);
+ free_rsp_buf(resp_buf_type, iov[0].iov_base);
/* Note: On -EAGAIN error only caller can retry on handle based calls
since file handle passed in no longer valid */
@@ -2451,10 +2498,7 @@ plk_err_exit:
if (pSMB)
cifs_small_buf_release(pSMB);
- if (resp_buf_type == CIFS_SMALL_BUFFER)
- cifs_small_buf_release(iov[0].iov_base);
- else if (resp_buf_type == CIFS_LARGE_BUFFER)
- cifs_buf_release(iov[0].iov_base);
+ free_rsp_buf(resp_buf_type, iov[0].iov_base);
/* Note: On -EAGAIN error only caller can retry on handle based calls
since file handle passed in no longer valid */
@@ -3838,10 +3882,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid,
}
}
qsec_out:
- if (buf_type == CIFS_SMALL_BUFFER)
- cifs_small_buf_release(iov[0].iov_base);
- else if (buf_type == CIFS_LARGE_BUFFER)
- cifs_buf_release(iov[0].iov_base);
+ free_rsp_buf(buf_type, iov[0].iov_base);
/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
return rc;
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 20d75b8ddb26..03ed8a09581c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -557,7 +557,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig,
try_to_freeze();
if (server_unresponsive(server)) {
- total_read = -EAGAIN;
+ total_read = -ECONNABORTED;
break;
}
@@ -571,7 +571,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig,
break;
} else if (server->tcpStatus == CifsNeedReconnect) {
cifs_reconnect(server);
- total_read = -EAGAIN;
+ total_read = -ECONNABORTED;
break;
} else if (length == -ERESTARTSYS ||
length == -EAGAIN ||
@@ -588,7 +588,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig,
cifs_dbg(FYI, "Received no data or error: expecting %d\n"
"got %d", to_read, length);
cifs_reconnect(server);
- total_read = -EAGAIN;
+ total_read = -ECONNABORTED;
break;
}
}
@@ -786,7 +786,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
cifs_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length);
cifs_reconnect(server);
wake_up(&server->response_q);
- return -EAGAIN;
+ return -ECONNABORTED;
}
/* switch to large buffer if too big for a small one */
@@ -3934,13 +3934,6 @@ cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
return tlink_tcon(cifs_sb_master_tlink(cifs_sb));
}
-static int
-cifs_sb_tcon_pending_wait(void *unused)
-{
- schedule();
- return signal_pending(current) ? -ERESTARTSYS : 0;
-}
-
/* find and return a tlink with given uid */
static struct tcon_link *
tlink_rb_search(struct rb_root *root, kuid_t uid)
@@ -4039,11 +4032,10 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
} else {
wait_for_construction:
ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
- cifs_sb_tcon_pending_wait,
TASK_INTERRUPTIBLE);
if (ret) {
cifs_put_tlink(tlink);
- return ERR_PTR(ret);
+ return ERR_PTR(-ERESTARTSYS);
}
/* if it's good, return it */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e90a1e9aa627..4ab2f79ffa7a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1670,8 +1670,8 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
break;
}
- len = min((size_t)cifs_sb->wsize,
- write_size - total_written);
+ len = min(server->ops->wp_retry_size(dentry->d_inode),
+ (unsigned int)write_size - total_written);
/* iov[0] is reserved for smb header */
iov[1].iov_base = (char *)write_data + total_written;
iov[1].iov_len = len;
@@ -1878,15 +1878,163 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
return rc;
}
+static struct cifs_writedata *
+wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
+ pgoff_t end, pgoff_t *index,
+ unsigned int *found_pages)
+{
+ unsigned int nr_pages;
+ struct page **pages;
+ struct cifs_writedata *wdata;
+
+ wdata = cifs_writedata_alloc((unsigned int)tofind,
+ cifs_writev_complete);
+ if (!wdata)
+ return NULL;
+
+ /*
+ * find_get_pages_tag seems to return a max of 256 on each
+ * iteration, so we must call it several times in order to
+ * fill the array or the wsize is effectively limited to
+ * 256 * PAGE_CACHE_SIZE.
+ */
+ *found_pages = 0;
+ pages = wdata->pages;
+ do {
+ nr_pages = find_get_pages_tag(mapping, index,
+ PAGECACHE_TAG_DIRTY, tofind,
+ pages);
+ *found_pages += nr_pages;
+ tofind -= nr_pages;
+ pages += nr_pages;
+ } while (nr_pages && tofind && *index <= end);
+
+ return wdata;
+}
+
+static unsigned int
+wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
+ struct address_space *mapping,
+ struct writeback_control *wbc,
+ pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
+{
+ unsigned int nr_pages = 0, i;
+ struct page *page;
+
+ for (i = 0; i < found_pages; i++) {
+ page = wdata->pages[i];
+ /*
+ * At this point we hold neither mapping->tree_lock nor
+ * lock on the page itself: the page may be truncated or
+ * invalidated (changing page->mapping to NULL), or even
+ * swizzled back from swapper_space to tmpfs file
+ * mapping
+ */
+
+ if (nr_pages == 0)
+ lock_page(page);
+ else if (!trylock_page(page))
+ break;
+
+ if (unlikely(page->mapping != mapping)) {
+ unlock_page(page);
+ break;
+ }
+
+ if (!wbc->range_cyclic && page->index > end) {
+ *done = true;
+ unlock_page(page);
+ break;
+ }
+
+ if (*next && (page->index != *next)) {
+ /* Not next consecutive page */
+ unlock_page(page);
+ break;
+ }
+
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ wait_on_page_writeback(page);
+
+ if (PageWriteback(page) ||
+ !clear_page_dirty_for_io(page)) {
+ unlock_page(page);
+ break;
+ }
+
+ /*
+ * This actually clears the dirty bit in the radix tree.
+ * See cifs_writepage() for more commentary.
+ */
+ set_page_writeback(page);
+ if (page_offset(page) >= i_size_read(mapping->host)) {
+ *done = true;
+ unlock_page(page);
+ end_page_writeback(page);
+ break;
+ }
+
+ wdata->pages[i] = page;
+ *next = page->index + 1;
+ ++nr_pages;
+ }
+
+ /* reset index to refind any pages skipped */
+ if (nr_pages == 0)
+ *index = wdata->pages[0]->index + 1;
+
+ /* put any pages we aren't going to use */
+ for (i = nr_pages; i < found_pages; i++) {
+ page_cache_release(wdata->pages[i]);
+ wdata->pages[i] = NULL;
+ }
+
+ return nr_pages;
+}
+
+static int
+wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
+ struct address_space *mapping, struct writeback_control *wbc)
+{
+ int rc = 0;
+ struct TCP_Server_Info *server;
+ unsigned int i;
+
+ wdata->sync_mode = wbc->sync_mode;
+ wdata->nr_pages = nr_pages;
+ wdata->offset = page_offset(wdata->pages[0]);
+ wdata->pagesz = PAGE_CACHE_SIZE;
+ wdata->tailsz = min(i_size_read(mapping->host) -
+ page_offset(wdata->pages[nr_pages - 1]),
+ (loff_t)PAGE_CACHE_SIZE);
+ wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
+
+ if (wdata->cfile != NULL)
+ cifsFileInfo_put(wdata->cfile);
+ wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
+ if (!wdata->cfile) {
+ cifs_dbg(VFS, "No writable handles for inode\n");
+ rc = -EBADF;
+ } else {
+ wdata->pid = wdata->cfile->pid;
+ server = tlink_tcon(wdata->cfile->tlink)->ses->server;
+ rc = server->ops->async_writev(wdata, cifs_writedata_release);
+ }
+
+ for (i = 0; i < nr_pages; ++i)
+ unlock_page(wdata->pages[i]);
+
+ return rc;
+}
+
static int cifs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
+ struct TCP_Server_Info *server;
bool done = false, scanned = false, range_whole = false;
pgoff_t end, index;
struct cifs_writedata *wdata;
- struct TCP_Server_Info *server;
- struct page *page;
int rc = 0;
/*
@@ -1906,152 +2054,50 @@ static int cifs_writepages(struct address_space *mapping,
range_whole = true;
scanned = true;
}
+ server = cifs_sb_master_tcon(cifs_sb)->ses->server;
retry:
while (!done && index <= end) {
- unsigned int i, nr_pages, found_pages;
- pgoff_t next = 0, tofind;
- struct page **pages;
+ unsigned int i, nr_pages, found_pages, wsize, credits;
+ pgoff_t next = 0, tofind, saved_index = index;
+
+ rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
+ &wsize, &credits);
+ if (rc)
+ break;
- tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
- end - index) + 1;
+ tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
- wdata = cifs_writedata_alloc((unsigned int)tofind,
- cifs_writev_complete);
+ wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
+ &found_pages);
if (!wdata) {
rc = -ENOMEM;
+ add_credits_and_wake_if(server, credits, 0);
break;
}
- /*
- * find_get_pages_tag seems to return a max of 256 on each
- * iteration, so we must call it several times in order to
- * fill the array or the wsize is effectively limited to
- * 256 * PAGE_CACHE_SIZE.
- */
- found_pages = 0;
- pages = wdata->pages;
- do {
- nr_pages = find_get_pages_tag(mapping, &index,
- PAGECACHE_TAG_DIRTY,
- tofind, pages);
- found_pages += nr_pages;
- tofind -= nr_pages;
- pages += nr_pages;
- } while (nr_pages && tofind && index <= end);
-
if (found_pages == 0) {
kref_put(&wdata->refcount, cifs_writedata_release);
+ add_credits_and_wake_if(server, credits, 0);
break;
}
- nr_pages = 0;
- for (i = 0; i < found_pages; i++) {
- page = wdata->pages[i];
- /*
- * At this point we hold neither mapping->tree_lock nor
- * lock on the page itself: the page may be truncated or
- * invalidated (changing page->mapping to NULL), or even
- * swizzled back from swapper_space to tmpfs file
- * mapping
- */
-
- if (nr_pages == 0)
- lock_page(page);
- else if (!trylock_page(page))
- break;
-
- if (unlikely(page->mapping != mapping)) {
- unlock_page(page);
- break;
- }
-
- if (!wbc->range_cyclic && page->index > end) {
- done = true;
- unlock_page(page);
- break;
- }
-
- if (next && (page->index != next)) {
- /* Not next consecutive page */
- unlock_page(page);
- break;
- }
-
- if (wbc->sync_mode != WB_SYNC_NONE)
- wait_on_page_writeback(page);
-
- if (PageWriteback(page) ||
- !clear_page_dirty_for_io(page)) {
- unlock_page(page);
- break;
- }
-
- /*
- * This actually clears the dirty bit in the radix tree.
- * See cifs_writepage() for more commentary.
- */
- set_page_writeback(page);
-
- if (page_offset(page) >= i_size_read(mapping->host)) {
- done = true;
- unlock_page(page);
- end_page_writeback(page);
- break;
- }
-
- wdata->pages[i] = page;
- next = page->index + 1;
- ++nr_pages;
- }
-
- /* reset index to refind any pages skipped */
- if (nr_pages == 0)
- index = wdata->pages[0]->index + 1;
-
- /* put any pages we aren't going to use */
- for (i = nr_pages; i < found_pages; i++) {
- page_cache_release(wdata->pages[i]);
- wdata->pages[i] = NULL;
- }
+ nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
+ end, &index, &next, &done);
/* nothing to write? */
if (nr_pages == 0) {
kref_put(&wdata->refcount, cifs_writedata_release);
+ add_credits_and_wake_if(server, credits, 0);
continue;
}
- wdata->sync_mode = wbc->sync_mode;
- wdata->nr_pages = nr_pages;
- wdata->offset = page_offset(wdata->pages[0]);
- wdata->pagesz = PAGE_CACHE_SIZE;
- wdata->tailsz =
- min(i_size_read(mapping->host) -
- page_offset(wdata->pages[nr_pages - 1]),
- (loff_t)PAGE_CACHE_SIZE);
- wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
- wdata->tailsz;
-
- do {
- if (wdata->cfile != NULL)
- cifsFileInfo_put(wdata->cfile);
- wdata->cfile = find_writable_file(CIFS_I(mapping->host),
- false);
- if (!wdata->cfile) {
- cifs_dbg(VFS, "No writable handles for inode\n");
- rc = -EBADF;
- break;
- }
- wdata->pid = wdata->cfile->pid;
- server = tlink_tcon(wdata->cfile->tlink)->ses->server;
- rc = server->ops->async_writev(wdata,
- cifs_writedata_release);
- } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
+ wdata->credits = credits;
- for (i = 0; i < nr_pages; ++i)
- unlock_page(wdata->pages[i]);
+ rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
/* send failure -- clean up the mess */
if (rc != 0) {
+ add_credits_and_wake_if(server, wdata->credits, 0);
for (i = 0; i < nr_pages; ++i) {
if (rc == -EAGAIN)
redirty_page_for_writepage(wbc,
@@ -2066,6 +2112,11 @@ retry:
}
kref_put(&wdata->refcount, cifs_writedata_release);
+ if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
+ index = saved_index;
+ continue;
+ }
+
wbc->nr_to_write -= nr_pages;
if (wbc->nr_to_write <= 0)
done = true;
@@ -2362,123 +2413,109 @@ cifs_uncached_writev_complete(struct work_struct *work)
kref_put(&wdata->refcount, cifs_uncached_writedata_release);
}
-/* attempt to send write to server, retry on any -EAGAIN errors */
static int
-cifs_uncached_retry_writev(struct cifs_writedata *wdata)
+wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
+ size_t *len, unsigned long *num_pages)
{
- int rc;
- struct TCP_Server_Info *server;
+ size_t save_len, copied, bytes, cur_len = *len;
+ unsigned long i, nr_pages = *num_pages;
- server = tlink_tcon(wdata->cfile->tlink)->ses->server;
+ save_len = cur_len;
+ for (i = 0; i < nr_pages; i++) {
+ bytes = min_t(const size_t, cur_len, PAGE_SIZE);
+ copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
+ cur_len -= copied;
+ /*
+ * If we didn't copy as much as we expected, then that
+ * may mean we trod into an unmapped area. Stop copying
+ * at that point. On the next pass through the big
+ * loop, we'll likely end up getting a zero-length
+ * write and bailing out of it.
+ */
+ if (copied < bytes)
+ break;
+ }
+ cur_len = save_len - cur_len;
+ *len = cur_len;
- do {
- if (wdata->cfile->invalidHandle) {
- rc = cifs_reopen_file(wdata->cfile, false);
- if (rc != 0)
- continue;
- }
- rc = server->ops->async_writev(wdata,
- cifs_uncached_writedata_release);
- } while (rc == -EAGAIN);
+ /*
+ * If we have no data to send, then that probably means that
+ * the copy above failed altogether. That's most likely because
+ * the address in the iovec was bogus. Return -EFAULT and let
+ * the caller free anything we allocated and bail out.
+ */
+ if (!cur_len)
+ return -EFAULT;
- return rc;
+ /*
+ * i + 1 now represents the number of pages we actually used in
+ * the copy phase above.
+ */
+ *num_pages = i + 1;
+ return 0;
}
-static ssize_t
-cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
+static int
+cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
+ struct cifsFileInfo *open_file,
+ struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
{
- unsigned long nr_pages, i;
- size_t bytes, copied, len, cur_len;
- ssize_t total_written = 0;
- loff_t offset;
- struct cifsFileInfo *open_file;
- struct cifs_tcon *tcon;
- struct cifs_sb_info *cifs_sb;
- struct cifs_writedata *wdata, *tmp;
- struct list_head wdata_list;
- int rc;
+ int rc = 0;
+ size_t cur_len;
+ unsigned long nr_pages, num_pages, i;
+ struct cifs_writedata *wdata;
+ struct iov_iter saved_from;
+ loff_t saved_offset = offset;
pid_t pid;
-
- len = iov_iter_count(from);
- rc = generic_write_checks(file, poffset, &len, 0);
- if (rc)
- return rc;
-
- if (!len)
- return 0;
-
- iov_iter_truncate(from, len);
-
- INIT_LIST_HEAD(&wdata_list);
- cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
- open_file = file->private_data;
- tcon = tlink_tcon(open_file->tlink);
-
- if (!tcon->ses->server->ops->async_writev)
- return -ENOSYS;
-
- offset = *poffset;
+ struct TCP_Server_Info *server;
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
pid = open_file->pid;
else
pid = current->tgid;
+ server = tlink_tcon(open_file->tlink)->ses->server;
+ memcpy(&saved_from, from, sizeof(struct iov_iter));
+
do {
- size_t save_len;
+ unsigned int wsize, credits;
- nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
+ rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
+ &wsize, &credits);
+ if (rc)
+ break;
+
+ nr_pages = get_numpages(wsize, len, &cur_len);
wdata = cifs_writedata_alloc(nr_pages,
cifs_uncached_writev_complete);
if (!wdata) {
rc = -ENOMEM;
+ add_credits_and_wake_if(server, credits, 0);
break;
}
rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
if (rc) {
kfree(wdata);
+ add_credits_and_wake_if(server, credits, 0);
break;
}
- save_len = cur_len;
- for (i = 0; i < nr_pages; i++) {
- bytes = min_t(size_t, cur_len, PAGE_SIZE);
- copied = copy_page_from_iter(wdata->pages[i], 0, bytes,
- from);
- cur_len -= copied;
- /*
- * If we didn't copy as much as we expected, then that
- * may mean we trod into an unmapped area. Stop copying
- * at that point. On the next pass through the big
- * loop, we'll likely end up getting a zero-length
- * write and bailing out of it.
- */
- if (copied < bytes)
- break;
- }
- cur_len = save_len - cur_len;
-
- /*
- * If we have no data to send, then that probably means that
- * the copy above failed altogether. That's most likely because
- * the address in the iovec was bogus. Set the rc to -EFAULT,
- * free anything we allocated and bail out.
- */
- if (!cur_len) {
+ num_pages = nr_pages;
+ rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
+ if (rc) {
for (i = 0; i < nr_pages; i++)
put_page(wdata->pages[i]);
kfree(wdata);
- rc = -EFAULT;
+ add_credits_and_wake_if(server, credits, 0);
break;
}
/*
- * i + 1 now represents the number of pages we actually used in
- * the copy phase above. Bring nr_pages down to that, and free
- * any pages that we didn't use.
+ * Bring nr_pages down to the number of pages we actually used,
+ * and free any pages that we didn't use.
*/
- for ( ; nr_pages > i + 1; nr_pages--)
+ for ( ; nr_pages > num_pages; nr_pages--)
put_page(wdata->pages[nr_pages - 1]);
wdata->sync_mode = WB_SYNC_ALL;
@@ -2489,18 +2526,69 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
wdata->bytes = cur_len;
wdata->pagesz = PAGE_SIZE;
wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
- rc = cifs_uncached_retry_writev(wdata);
+ wdata->credits = credits;
+
+ if (!wdata->cfile->invalidHandle ||
+ !cifs_reopen_file(wdata->cfile, false))
+ rc = server->ops->async_writev(wdata,
+ cifs_uncached_writedata_release);
if (rc) {
+ add_credits_and_wake_if(server, wdata->credits, 0);
kref_put(&wdata->refcount,
cifs_uncached_writedata_release);
+ if (rc == -EAGAIN) {
+ memcpy(from, &saved_from,
+ sizeof(struct iov_iter));
+ iov_iter_advance(from, offset - saved_offset);
+ continue;
+ }
break;
}
- list_add_tail(&wdata->list, &wdata_list);
+ list_add_tail(&wdata->list, wdata_list);
offset += cur_len;
len -= cur_len;
} while (len > 0);
+ return rc;
+}
+
+static ssize_t
+cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
+{
+ size_t len;
+ ssize_t total_written = 0;
+ struct cifsFileInfo *open_file;
+ struct cifs_tcon *tcon;
+ struct cifs_sb_info *cifs_sb;
+ struct cifs_writedata *wdata, *tmp;
+ struct list_head wdata_list;
+ struct iov_iter saved_from;
+ int rc;
+
+ len = iov_iter_count(from);
+ rc = generic_write_checks(file, poffset, &len, 0);
+ if (rc)
+ return rc;
+
+ if (!len)
+ return 0;
+
+ iov_iter_truncate(from, len);
+
+ INIT_LIST_HEAD(&wdata_list);
+ cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+ open_file = file->private_data;
+ tcon = tlink_tcon(open_file->tlink);
+
+ if (!tcon->ses->server->ops->async_writev)
+ return -ENOSYS;
+
+ memcpy(&saved_from, from, sizeof(struct iov_iter));
+
+ rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
+ &wdata_list);
+
/*
* If at least one write was successfully sent, then discard any rc
* value from the later writes. If the other write succeeds, then
@@ -2529,7 +2617,25 @@ restart_loop:
/* resend call if it's a retryable error */
if (rc == -EAGAIN) {
- rc = cifs_uncached_retry_writev(wdata);
+ struct list_head tmp_list;
+ struct iov_iter tmp_from;
+
+ INIT_LIST_HEAD(&tmp_list);
+ list_del_init(&wdata->list);
+
+ memcpy(&tmp_from, &saved_from,
+ sizeof(struct iov_iter));
+ iov_iter_advance(&tmp_from,
+ wdata->offset - *poffset);
+
+ rc = cifs_write_from_iter(wdata->offset,
+ wdata->bytes, &tmp_from,
+ open_file, cifs_sb, &tmp_list);
+
+ list_splice(&tmp_list, &wdata_list);
+
+ kref_put(&wdata->refcount,
+ cifs_uncached_writedata_release);
goto restart_loop;
}
}
@@ -2722,26 +2828,6 @@ cifs_uncached_readdata_release(struct kref *refcount)
cifs_readdata_release(refcount);
}
-static int
-cifs_retry_async_readv(struct cifs_readdata *rdata)
-{
- int rc;
- struct TCP_Server_Info *server;
-
- server = tlink_tcon(rdata->cfile->tlink)->ses->server;
-
- do {
- if (rdata->cfile->invalidHandle) {
- rc = cifs_reopen_file(rdata->cfile, true);
- if (rc != 0)
- continue;
- }
- rc = server->ops->async_readv(rdata);
- } while (rc == -EAGAIN);
-
- return rc;
-}
-
/**
* cifs_readdata_to_iov - copy data from pages in response to an iovec
* @rdata: the readdata response with list of pages holding data
@@ -2754,7 +2840,7 @@ cifs_retry_async_readv(struct cifs_readdata *rdata)
static int
cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
{
- size_t remaining = rdata->bytes;
+ size_t remaining = rdata->got_bytes;
unsigned int i;
for (i = 0; i < rdata->nr_pages; i++) {
@@ -2782,11 +2868,12 @@ static int
cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
struct cifs_readdata *rdata, unsigned int len)
{
- int total_read = 0, result = 0;
+ int result = 0;
unsigned int i;
unsigned int nr_pages = rdata->nr_pages;
struct kvec iov;
+ rdata->got_bytes = 0;
rdata->tailsz = PAGE_SIZE;
for (i = 0; i < nr_pages; i++) {
struct page *page = rdata->pages[i];
@@ -2820,55 +2907,45 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
if (result < 0)
break;
- total_read += result;
+ rdata->got_bytes += result;
}
- return total_read > 0 ? total_read : result;
+ return rdata->got_bytes > 0 && result != -ECONNABORTED ?
+ rdata->got_bytes : result;
}
-ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
+static int
+cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
+ struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
{
- struct file *file = iocb->ki_filp;
- ssize_t rc;
- size_t len, cur_len;
- ssize_t total_read = 0;
- loff_t offset = iocb->ki_pos;
- unsigned int npages;
- struct cifs_sb_info *cifs_sb;
- struct cifs_tcon *tcon;
- struct cifsFileInfo *open_file;
- struct cifs_readdata *rdata, *tmp;
- struct list_head rdata_list;
+ struct cifs_readdata *rdata;
+ unsigned int npages, rsize, credits;
+ size_t cur_len;
+ int rc;
pid_t pid;
+ struct TCP_Server_Info *server;
- len = iov_iter_count(to);
- if (!len)
- return 0;
-
- INIT_LIST_HEAD(&rdata_list);
- cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
- open_file = file->private_data;
- tcon = tlink_tcon(open_file->tlink);
-
- if (!tcon->ses->server->ops->async_readv)
- return -ENOSYS;
+ server = tlink_tcon(open_file->tlink)->ses->server;
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
pid = open_file->pid;
else
pid = current->tgid;
- if ((file->f_flags & O_ACCMODE) == O_WRONLY)
- cifs_dbg(FYI, "attempting read on write only file instance\n");
-
do {
- cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
+ rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
+ &rsize, &credits);
+ if (rc)
+ break;
+
+ cur_len = min_t(const size_t, len, rsize);
npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
/* allocate a readdata struct */
rdata = cifs_readdata_alloc(npages,
cifs_uncached_readv_complete);
if (!rdata) {
+ add_credits_and_wake_if(server, credits, 0);
rc = -ENOMEM;
break;
}
@@ -2884,44 +2961,113 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
rdata->pid = pid;
rdata->pagesz = PAGE_SIZE;
rdata->read_into_pages = cifs_uncached_read_into_pages;
+ rdata->credits = credits;
- rc = cifs_retry_async_readv(rdata);
+ if (!rdata->cfile->invalidHandle ||
+ !cifs_reopen_file(rdata->cfile, true))
+ rc = server->ops->async_readv(rdata);
error:
if (rc) {
+ add_credits_and_wake_if(server, rdata->credits, 0);
kref_put(&rdata->refcount,
cifs_uncached_readdata_release);
+ if (rc == -EAGAIN)
+ continue;
break;
}
- list_add_tail(&rdata->list, &rdata_list);
+ list_add_tail(&rdata->list, rdata_list);
offset += cur_len;
len -= cur_len;
} while (len > 0);
+ return rc;
+}
+
+ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct file *file = iocb->ki_filp;
+ ssize_t rc;
+ size_t len;
+ ssize_t total_read = 0;
+ loff_t offset = iocb->ki_pos;
+ struct cifs_sb_info *cifs_sb;
+ struct cifs_tcon *tcon;
+ struct cifsFileInfo *open_file;
+ struct cifs_readdata *rdata, *tmp;
+ struct list_head rdata_list;
+
+ len = iov_iter_count(to);
+ if (!len)
+ return 0;
+
+ INIT_LIST_HEAD(&rdata_list);
+ cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+ open_file = file->private_data;
+ tcon = tlink_tcon(open_file->tlink);
+
+ if (!tcon->ses->server->ops->async_readv)
+ return -ENOSYS;
+
+ if ((file->f_flags & O_ACCMODE) == O_WRONLY)
+ cifs_dbg(FYI, "attempting read on write only file instance\n");
+
+ rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
+
/* if at least one read request send succeeded, then reset rc */
if (!list_empty(&rdata_list))
rc = 0;
len = iov_iter_count(to);
/* the loop below should proceed in the order of increasing offsets */
+again:
list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
- again:
if (!rc) {
/* FIXME: freezable sleep too? */
rc = wait_for_completion_killable(&rdata->done);
if (rc)
rc = -EINTR;
- else if (rdata->result) {
- rc = rdata->result;
+ else if (rdata->result == -EAGAIN) {
/* resend call if it's a retryable error */
- if (rc == -EAGAIN) {
- rc = cifs_retry_async_readv(rdata);
- goto again;
+ struct list_head tmp_list;
+ unsigned int got_bytes = rdata->got_bytes;
+
+ list_del_init(&rdata->list);
+ INIT_LIST_HEAD(&tmp_list);
+
+ /*
+ * Got a part of data and then reconnect has
+ * happened -- fill the buffer and continue
+ * reading.
+ */
+ if (got_bytes && got_bytes < rdata->bytes) {
+ rc = cifs_readdata_to_iov(rdata, to);
+ if (rc) {
+ kref_put(&rdata->refcount,
+ cifs_uncached_readdata_release);
+ continue;
+ }
}
- } else {
+
+ rc = cifs_send_async_read(
+ rdata->offset + got_bytes,
+ rdata->bytes - got_bytes,
+ rdata->cfile, cifs_sb,
+ &tmp_list);
+
+ list_splice(&tmp_list, &rdata_list);
+
+ kref_put(&rdata->refcount,
+ cifs_uncached_readdata_release);
+ goto again;
+ } else if (rdata->result)
+ rc = rdata->result;
+ else
rc = cifs_readdata_to_iov(rdata, to);
- }
+ /* if there was a short read -- discard anything left */
+ if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
+ rc = -ENODATA;
}
list_del_init(&rdata->list);
kref_put(&rdata->refcount, cifs_uncached_readdata_release);
@@ -3030,18 +3176,19 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
for (total_read = 0, cur_offset = read_data; read_size > total_read;
total_read += bytes_read, cur_offset += bytes_read) {
- current_read_size = min_t(uint, read_size - total_read, rsize);
- /*
- * For windows me and 9x we do not want to request more than it
- * negotiated since it will refuse the read then.
- */
- if ((tcon->ses) && !(tcon->ses->capabilities &
+ do {
+ current_read_size = min_t(uint, read_size - total_read,
+ rsize);
+ /*
+ * For windows me and 9x we do not want to request more
+ * than it negotiated since it will refuse the read
+ * then.
+ */
+ if ((tcon->ses) && !(tcon->ses->capabilities &
tcon->ses->server->vals->cap_large_files)) {
- current_read_size = min_t(uint, current_read_size,
- CIFSMaxBufSize);
- }
- rc = -EAGAIN;
- while (rc == -EAGAIN) {
+ current_read_size = min_t(uint,
+ current_read_size, CIFSMaxBufSize);
+ }
if (open_file->invalidHandle) {
rc = cifs_reopen_file(open_file, true);
if (rc != 0)
@@ -3054,7 +3201,8 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
rc = server->ops->sync_read(xid, open_file, &io_parms,
&bytes_read, &cur_offset,
&buf_type);
- }
+ } while (rc == -EAGAIN);
+
if (rc || (bytes_read == 0)) {
if (total_read) {
break;
@@ -3133,25 +3281,30 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
static void
cifs_readv_complete(struct work_struct *work)
{
- unsigned int i;
+ unsigned int i, got_bytes;
struct cifs_readdata *rdata = container_of(work,
struct cifs_readdata, work);
+ got_bytes = rdata->got_bytes;
for (i = 0; i < rdata->nr_pages; i++) {
struct page *page = rdata->pages[i];
lru_cache_add_file(page);
- if (rdata->result == 0) {
+ if (rdata->result == 0 ||
+ (rdata->result == -EAGAIN && got_bytes)) {
flush_dcache_page(page);
SetPageUptodate(page);
}
unlock_page(page);
- if (rdata->result == 0)
+ if (rdata->result == 0 ||
+ (rdata->result == -EAGAIN && got_bytes))
cifs_readpage_to_fscache(rdata->mapping->host, page);
+ got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
+
page_cache_release(page);
rdata->pages[i] = NULL;
}
@@ -3162,7 +3315,7 @@ static int
cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
struct cifs_readdata *rdata, unsigned int len)
{
- int total_read = 0, result = 0;
+ int result = 0;
unsigned int i;
u64 eof;
pgoff_t eof_index;
@@ -3174,6 +3327,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
+ rdata->got_bytes = 0;
rdata->tailsz = PAGE_CACHE_SIZE;
for (i = 0; i < nr_pages; i++) {
struct page *page = rdata->pages[i];
@@ -3228,10 +3382,70 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
if (result < 0)
break;
- total_read += result;
+ rdata->got_bytes += result;
}
- return total_read > 0 ? total_read : result;
+ return rdata->got_bytes > 0 && result != -ECONNABORTED ?
+ rdata->got_bytes : result;
+}
+
+static int
+readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
+ unsigned int rsize, struct list_head *tmplist,
+ unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
+{
+ struct page *page, *tpage;
+ unsigned int expected_index;
+ int rc;
+
+ INIT_LIST_HEAD(tmplist);
+
+ page = list_entry(page_list->prev, struct page, lru);
+
+ /*
+ * Lock the page and put it in the cache. Since no one else
+ * should have access to this page, we're safe to simply set
+ * PG_locked without checking it first.
+ */
+ __set_page_locked(page);
+ rc = add_to_page_cache_locked(page, mapping,
+ page->index, GFP_KERNEL);
+
+ /* give up if we can't stick it in the cache */
+ if (rc) {
+ __clear_page_locked(page);
+ return rc;
+ }
+
+ /* move first page to the tmplist */
+ *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+ *bytes = PAGE_CACHE_SIZE;
+ *nr_pages = 1;
+ list_move_tail(&page->lru, tmplist);
+
+ /* now try and add more pages onto the request */
+ expected_index = page->index + 1;
+ list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
+ /* discontinuity ? */
+ if (page->index != expected_index)
+ break;
+
+ /* would this page push the read over the rsize? */
+ if (*bytes + PAGE_CACHE_SIZE > rsize)
+ break;
+
+ __set_page_locked(page);
+ if (add_to_page_cache_locked(page, mapping, page->index,
+ GFP_KERNEL)) {
+ __clear_page_locked(page);
+ break;
+ }
+ list_move_tail(&page->lru, tmplist);
+ (*bytes) += PAGE_CACHE_SIZE;
+ expected_index++;
+ (*nr_pages)++;
+ }
+ return rc;
}
static int cifs_readpages(struct file *file, struct address_space *mapping,
@@ -3241,19 +3455,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
struct list_head tmplist;
struct cifsFileInfo *open_file = file->private_data;
struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
- unsigned int rsize = cifs_sb->rsize;
+ struct TCP_Server_Info *server;
pid_t pid;
/*
- * Give up immediately if rsize is too small to read an entire page.
- * The VFS will fall back to readpage. We should never reach this
- * point however since we set ra_pages to 0 when the rsize is smaller
- * than a cache page.
- */
- if (unlikely(rsize < PAGE_CACHE_SIZE))
- return 0;
-
- /*
* Reads as many pages as possible from fscache. Returns -ENOBUFS
* immediately if the cookie is negative
*
@@ -3271,7 +3476,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
pid = current->tgid;
rc = 0;
- INIT_LIST_HEAD(&tmplist);
+ server = tlink_tcon(open_file->tlink)->ses->server;
cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
__func__, file, mapping, num_pages);
@@ -3288,58 +3493,35 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
* the rdata->pages, then we want them in increasing order.
*/
while (!list_empty(page_list)) {
- unsigned int i;
- unsigned int bytes = PAGE_CACHE_SIZE;
- unsigned int expected_index;
- unsigned int nr_pages = 1;
+ unsigned int i, nr_pages, bytes, rsize;
loff_t offset;
struct page *page, *tpage;
struct cifs_readdata *rdata;
+ unsigned credits;
- page = list_entry(page_list->prev, struct page, lru);
+ rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
+ &rsize, &credits);
+ if (rc)
+ break;
/*
- * Lock the page and put it in the cache. Since no one else
- * should have access to this page, we're safe to simply set
- * PG_locked without checking it first.
+ * Give up immediately if rsize is too small to read an entire
+ * page. The VFS will fall back to readpage. We should never
+ * reach this point however since we set ra_pages to 0 when the
+ * rsize is smaller than a cache page.
*/
- __set_page_locked(page);
- rc = add_to_page_cache_locked(page, mapping,
- page->index, GFP_KERNEL);
+ if (unlikely(rsize < PAGE_CACHE_SIZE)) {
+ add_credits_and_wake_if(server, credits, 0);
+ return 0;
+ }
- /* give up if we can't stick it in the cache */
+ rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
+ &nr_pages, &offset, &bytes);
if (rc) {
- __clear_page_locked(page);
+ add_credits_and_wake_if(server, credits, 0);
break;
}
- /* move first page to the tmplist */
- offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
- list_move_tail(&page->lru, &tmplist);
-
- /* now try and add more pages onto the request */
- expected_index = page->index + 1;
- list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
- /* discontinuity ? */
- if (page->index != expected_index)
- break;
-
- /* would this page push the read over the rsize? */
- if (bytes + PAGE_CACHE_SIZE > rsize)
- break;
-
- __set_page_locked(page);
- if (add_to_page_cache_locked(page, mapping,
- page->index, GFP_KERNEL)) {
- __clear_page_locked(page);
- break;
- }
- list_move_tail(&page->lru, &tmplist);
- bytes += PAGE_CACHE_SIZE;
- expected_index++;
- nr_pages++;
- }
-
rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
if (!rdata) {
/* best to give up if we're out of mem */
@@ -3350,6 +3532,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
page_cache_release(page);
}
rc = -ENOMEM;
+ add_credits_and_wake_if(server, credits, 0);
break;
}
@@ -3360,21 +3543,32 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
rdata->pid = pid;
rdata->pagesz = PAGE_CACHE_SIZE;
rdata->read_into_pages = cifs_readpages_read_into_pages;
+ rdata->credits = credits;
list_for_each_entry_safe(page, tpage, &tmplist, lru) {
list_del(&page->lru);
rdata->pages[rdata->nr_pages++] = page;
}
- rc = cifs_retry_async_readv(rdata);
- if (rc != 0) {
+ if (!rdata->cfile->invalidHandle ||
+ !cifs_reopen_file(rdata->cfile, true))
+ rc = server->ops->async_readv(rdata);
+ if (rc) {
+ add_credits_and_wake_if(server, rdata->credits, 0);
for (i = 0; i < rdata->nr_pages; i++) {
page = rdata->pages[i];
lru_cache_add_file(page);
unlock_page(page);
page_cache_release(page);
+ if (rc == -EAGAIN)
+ list_add_tail(&page->lru, &tmplist);
}
kref_put(&rdata->refcount, cifs_readdata_release);
+ if (rc == -EAGAIN) {
+ /* Re-add pages to the page_list and retry */
+ list_splice(&tmplist, page_list);
+ continue;
+ }
break;
}
@@ -3618,13 +3812,6 @@ static int cifs_launder_page(struct page *page)
return rc;
}
-static int
-cifs_pending_writers_wait(void *unused)
-{
- schedule();
- return 0;
-}
-
void cifs_oplock_break(struct work_struct *work)
{
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -3636,7 +3823,7 @@ void cifs_oplock_break(struct work_struct *work)
int rc = 0;
wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
- cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
server->ops->downgrade_oplock(server, cinode,
test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a174605f6afa..41de3935caa0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1780,7 +1780,7 @@ cifs_invalidate_mapping(struct inode *inode)
* @word: long word containing the bit lock
*/
static int
-cifs_wait_bit_killable(void *word)
+cifs_wait_bit_killable(struct wait_bit_key *key)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
@@ -1794,8 +1794,8 @@ cifs_revalidate_mapping(struct inode *inode)
int rc;
unsigned long *flags = &CIFS_I(inode)->flags;
- rc = wait_on_bit_lock(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
- TASK_KILLABLE);
+ rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
+ TASK_KILLABLE);
if (rc)
return rc;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 264ece71bdb2..68559fd557fb 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -374,7 +374,7 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
oparms.cifs_sb = cifs_sb;
oparms.desired_access = GENERIC_WRITE;
oparms.create_options = create_options;
- oparms.disposition = FILE_OPEN;
+ oparms.disposition = FILE_CREATE;
oparms.path = path;
oparms.fid = &fid;
oparms.reconnect = false;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 3b0c62e622da..81340c6253eb 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -226,6 +226,15 @@ cifs_small_buf_release(void *buf_to_free)
return;
}
+void
+free_rsp_buf(int resp_buftype, void *rsp)
+{
+ if (resp_buftype == CIFS_SMALL_BUFFER)
+ cifs_small_buf_release(rsp);
+ else if (resp_buftype == CIFS_LARGE_BUFFER)
+ cifs_buf_release(rsp);
+}
+
/* NB: MID can not be set if treeCon not passed in, in that
case it is responsbility of caller to set the mid */
void
@@ -414,7 +423,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
return true;
}
if (pSMBr->hdr.Status.CifsError) {
- cifs_dbg(FYI, "notify err 0x%d\n",
+ cifs_dbg(FYI, "notify err 0x%x\n",
pSMBr->hdr.Status.CifsError);
return true;
}
@@ -441,7 +450,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
if (pSMB->hdr.WordCount != 8)
return false;
- cifs_dbg(FYI, "oplock type 0x%d level 0x%d\n",
+ cifs_dbg(FYI, "oplock type 0x%x level 0x%x\n",
pSMB->LockType, pSMB->OplockLevel);
if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE))
return false;
@@ -582,7 +591,7 @@ int cifs_get_writer(struct cifsInodeInfo *cinode)
start:
rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK,
- cifs_oplock_break_wait, TASK_KILLABLE);
+ TASK_KILLABLE);
if (rc)
return rc;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index e87387dbf39f..39ee32688eac 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -520,382 +520,559 @@ select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
}
}
-int
-CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
- const struct nls_table *nls_cp)
+struct sess_data {
+ unsigned int xid;
+ struct cifs_ses *ses;
+ struct nls_table *nls_cp;
+ void (*func)(struct sess_data *);
+ int result;
+
+ /* we will send the SMB in three pieces:
+ * a fixed length beginning part, an optional
+ * SPNEGO blob (which can be zero length), and a
+ * last part which will include the strings
+ * and rest of bcc area. This allows us to avoid
+ * a large buffer 17K allocation
+ */
+ int buf0_type;
+ struct kvec iov[3];
+};
+
+static int
+sess_alloc_buffer(struct sess_data *sess_data, int wct)
{
- int rc = 0;
- int wct;
+ int rc;
+ struct cifs_ses *ses = sess_data->ses;
struct smb_hdr *smb_buf;
- char *bcc_ptr;
- char *str_area;
- SESSION_SETUP_ANDX *pSMB;
- __u32 capabilities;
- __u16 count;
- int resp_buf_type;
- struct kvec iov[3];
- enum securityEnum type;
- __u16 action, bytes_remaining;
- struct key *spnego_key = NULL;
- __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
- u16 blob_len;
- char *ntlmsspblob = NULL;
- if (ses == NULL) {
- WARN(1, "%s: ses == NULL!", __func__);
- return -EINVAL;
- }
+ rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses,
+ (void **)&smb_buf);
- type = select_sectype(ses->server, ses->sectype);
- cifs_dbg(FYI, "sess setup type %d\n", type);
- if (type == Unspecified) {
- cifs_dbg(VFS,
- "Unable to select appropriate authentication method!");
- return -EINVAL;
+ if (rc)
+ return rc;
+
+ sess_data->iov[0].iov_base = (char *)smb_buf;
+ sess_data->iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
+ /*
+ * This variable will be used to clear the buffer
+ * allocated above in case of any error in the calling function.
+ */
+ sess_data->buf0_type = CIFS_SMALL_BUFFER;
+
+ /* 2000 big enough to fit max user, domain, NOS name etc. */
+ sess_data->iov[2].iov_base = kmalloc(2000, GFP_KERNEL);
+ if (!sess_data->iov[2].iov_base) {
+ rc = -ENOMEM;
+ goto out_free_smb_buf;
}
- if (type == RawNTLMSSP) {
- /* if memory allocation is successful, caller of this function
- * frees it.
- */
- ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
- if (!ses->ntlmssp)
- return -ENOMEM;
- ses->ntlmssp->sesskey_per_smbsess = false;
+ return 0;
+
+out_free_smb_buf:
+ kfree(smb_buf);
+ sess_data->iov[0].iov_base = NULL;
+ sess_data->iov[0].iov_len = 0;
+ sess_data->buf0_type = CIFS_NO_BUFFER;
+ return rc;
+}
+
+static void
+sess_free_buffer(struct sess_data *sess_data)
+{
+ free_rsp_buf(sess_data->buf0_type, sess_data->iov[0].iov_base);
+ sess_data->buf0_type = CIFS_NO_BUFFER;
+ kfree(sess_data->iov[2].iov_base);
+}
+
+static int
+sess_establish_session(struct sess_data *sess_data)
+{
+ struct cifs_ses *ses = sess_data->ses;
+
+ mutex_lock(&ses->server->srv_mutex);
+ if (!ses->server->session_estab) {
+ if (ses->server->sign) {
+ ses->server->session_key.response =
+ kmemdup(ses->auth_key.response,
+ ses->auth_key.len, GFP_KERNEL);
+ if (!ses->server->session_key.response) {
+ mutex_unlock(&ses->server->srv_mutex);
+ return -ENOMEM;
+ }
+ ses->server->session_key.len =
+ ses->auth_key.len;
+ }
+ ses->server->sequence_number = 0x2;
+ ses->server->session_estab = true;
}
+ mutex_unlock(&ses->server->srv_mutex);
-ssetup_ntlmssp_authenticate:
- if (phase == NtLmChallenge)
- phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
+ cifs_dbg(FYI, "CIFS session established successfully\n");
+ spin_lock(&GlobalMid_Lock);
+ ses->status = CifsGood;
+ ses->need_reconnect = false;
+ spin_unlock(&GlobalMid_Lock);
- if (type == LANMAN) {
-#ifndef CONFIG_CIFS_WEAK_PW_HASH
- /* LANMAN and plaintext are less secure and off by default.
- So we make this explicitly be turned on in kconfig (in the
- build) and turned on at runtime (changed from the default)
- in proc/fs/cifs or via mount parm. Unfortunately this is
- needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
- return -EOPNOTSUPP;
-#endif
- wct = 10; /* lanman 2 style sessionsetup */
- } else if ((type == NTLM) || (type == NTLMv2)) {
- /* For NTLMv2 failures eventually may need to retry NTLM */
- wct = 13; /* old style NTLM sessionsetup */
- } else /* same size: negotiate or auth, NTLMSSP or extended security */
- wct = 12;
+ return 0;
+}
- rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses,
- (void **)&smb_buf);
- if (rc)
- return rc;
+static int
+sess_sendreceive(struct sess_data *sess_data)
+{
+ int rc;
+ struct smb_hdr *smb_buf = (struct smb_hdr *) sess_data->iov[0].iov_base;
+ __u16 count;
- pSMB = (SESSION_SETUP_ANDX *)smb_buf;
+ count = sess_data->iov[1].iov_len + sess_data->iov[2].iov_len;
+ smb_buf->smb_buf_length =
+ cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
+ put_bcc(count, smb_buf);
+
+ rc = SendReceive2(sess_data->xid, sess_data->ses,
+ sess_data->iov, 3 /* num_iovecs */,
+ &sess_data->buf0_type,
+ CIFS_LOG_ERROR);
+
+ return rc;
+}
+/*
+ * LANMAN and plaintext are less secure and off by default.
+ * So we make this explicitly be turned on in kconfig (in the
+ * build) and turned on at runtime (changed from the default)
+ * in proc/fs/cifs or via mount parm. Unfortunately this is
+ * needed for old Win (e.g. Win95), some obscure NAS and OS/2
+ */
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+static void
+sess_auth_lanman(struct sess_data *sess_data)
+{
+ int rc = 0;
+ struct smb_hdr *smb_buf;
+ SESSION_SETUP_ANDX *pSMB;
+ char *bcc_ptr;
+ struct cifs_ses *ses = sess_data->ses;
+ char lnm_session_key[CIFS_AUTH_RESP_SIZE];
+ __u32 capabilities;
+ __u16 bytes_remaining;
+
+ /* lanman 2 style sessionsetup */
+ /* wct = 10 */
+ rc = sess_alloc_buffer(sess_data, 10);
+ if (rc)
+ goto out;
+
+ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+ bcc_ptr = sess_data->iov[2].iov_base;
capabilities = cifs_ssetup_hdr(ses, pSMB);
- /* we will send the SMB in three pieces:
- a fixed length beginning part, an optional
- SPNEGO blob (which can be zero length), and a
- last part which will include the strings
- and rest of bcc area. This allows us to avoid
- a large buffer 17K allocation */
- iov[0].iov_base = (char *)pSMB;
- iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
-
- /* setting this here allows the code at the end of the function
- to free the request buffer if there's an error */
- resp_buf_type = CIFS_SMALL_BUFFER;
+ pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
- /* 2000 big enough to fit max user, domain, NOS name etc. */
- str_area = kmalloc(2000, GFP_KERNEL);
- if (str_area == NULL) {
- rc = -ENOMEM;
- goto ssetup_exit;
- }
- bcc_ptr = str_area;
+ /* no capabilities flags in old lanman negotiation */
+ pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
- iov[1].iov_base = NULL;
- iov[1].iov_len = 0;
+ /* Calculate hash with password and copy into bcc_ptr.
+ * Encryption Key (stored as in cryptkey) gets used if the
+ * security mode bit in Negottiate Protocol response states
+ * to use challenge/response method (i.e. Password bit is 1).
+ */
+ rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
+ ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
+ true : false, lnm_session_key);
- if (type == LANMAN) {
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- char lnm_session_key[CIFS_AUTH_RESP_SIZE];
+ memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
+
+ /*
+ * can not sign if LANMAN negotiated so no need
+ * to calculate signing key? but what if server
+ * changed to do higher than lanman dialect and
+ * we reconnected would we ever calc signing_key?
+ */
- pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
+ cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n");
+ /* Unicode not allowed for LANMAN dialects */
+ ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
- /* no capabilities flags in old lanman negotiation */
+ sess_data->iov[2].iov_len = (long) bcc_ptr -
+ (long) sess_data->iov[2].iov_base;
- pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+ rc = sess_sendreceive(sess_data);
+ if (rc)
+ goto out;
- /* Calculate hash with password and copy into bcc_ptr.
- * Encryption Key (stored as in cryptkey) gets used if the
- * security mode bit in Negottiate Protocol response states
- * to use challenge/response method (i.e. Password bit is 1).
- */
+ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+ smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
- rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
- ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
- true : false, lnm_session_key);
+ /* lanman response has a word count of 3 */
+ if (smb_buf->WordCount != 3) {
+ rc = -EIO;
+ cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
+ goto out;
+ }
- memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
+ if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
+ cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
+
+ ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
+ cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
- /* can not sign if LANMAN negotiated so no need
- to calculate signing key? but what if server
- changed to do higher than lanman dialect and
- we reconnected would we ever calc signing_key? */
+ bytes_remaining = get_bcc(smb_buf);
+ bcc_ptr = pByteArea(smb_buf);
- cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n");
- /* Unicode not allowed for LANMAN dialects */
- ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
+ /* BB check if Unicode and decode strings */
+ if (bytes_remaining == 0) {
+ /* no string area to decode, do nothing */
+ } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
+ /* unicode string area must be word-aligned */
+ if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
+ ++bcc_ptr;
+ --bytes_remaining;
+ }
+ decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
+ sess_data->nls_cp);
+ } else {
+ decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
+ sess_data->nls_cp);
+ }
+
+ rc = sess_establish_session(sess_data);
+out:
+ sess_data->result = rc;
+ sess_data->func = NULL;
+ sess_free_buffer(sess_data);
+}
+
+#else
+
+static void
+sess_auth_lanman(struct sess_data *sess_data)
+{
+ sess_data->result = -EOPNOTSUPP;
+ sess_data->func = NULL;
+}
#endif
- } else if (type == NTLM) {
- pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
- pSMB->req_no_secext.CaseInsensitivePasswordLength =
+
+static void
+sess_auth_ntlm(struct sess_data *sess_data)
+{
+ int rc = 0;
+ struct smb_hdr *smb_buf;
+ SESSION_SETUP_ANDX *pSMB;
+ char *bcc_ptr;
+ struct cifs_ses *ses = sess_data->ses;
+ __u32 capabilities;
+ __u16 bytes_remaining;
+
+ /* old style NTLM sessionsetup */
+ /* wct = 13 */
+ rc = sess_alloc_buffer(sess_data, 13);
+ if (rc)
+ goto out;
+
+ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+ bcc_ptr = sess_data->iov[2].iov_base;
+ capabilities = cifs_ssetup_hdr(ses, pSMB);
+
+ pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
+ pSMB->req_no_secext.CaseInsensitivePasswordLength =
cpu_to_le16(CIFS_AUTH_RESP_SIZE);
- pSMB->req_no_secext.CaseSensitivePasswordLength =
+ pSMB->req_no_secext.CaseSensitivePasswordLength =
cpu_to_le16(CIFS_AUTH_RESP_SIZE);
- /* calculate ntlm response and session key */
- rc = setup_ntlm_response(ses, nls_cp);
- if (rc) {
- cifs_dbg(VFS, "Error %d during NTLM authentication\n",
+ /* calculate ntlm response and session key */
+ rc = setup_ntlm_response(ses, sess_data->nls_cp);
+ if (rc) {
+ cifs_dbg(VFS, "Error %d during NTLM authentication\n",
rc);
- goto ssetup_exit;
- }
+ goto out;
+ }
- /* copy ntlm response */
- memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
- memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
-
- if (ses->capabilities & CAP_UNICODE) {
- /* unicode strings must be word aligned */
- if (iov[0].iov_len % 2) {
- *bcc_ptr = 0;
- bcc_ptr++;
- }
- unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
- } else
- ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
- } else if (type == NTLMv2) {
- pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
-
- /* LM2 password would be here if we supported it */
- pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
-
- /* calculate nlmv2 response and session key */
- rc = setup_ntlmv2_rsp(ses, nls_cp);
- if (rc) {
- cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n",
- rc);
- goto ssetup_exit;
+ /* copy ntlm response */
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
+
+ if (ses->capabilities & CAP_UNICODE) {
+ /* unicode strings must be word aligned */
+ if (sess_data->iov[0].iov_len % 2) {
+ *bcc_ptr = 0;
+ bcc_ptr++;
}
- memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- ses->auth_key.len - CIFS_SESS_KEY_SIZE);
- bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
-
- /* set case sensitive password length after tilen may get
- * assigned, tilen is 0 otherwise.
- */
- pSMB->req_no_secext.CaseSensitivePasswordLength =
- cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ unicode_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
+ } else {
+ ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
+ }
- if (ses->capabilities & CAP_UNICODE) {
- if (iov[0].iov_len % 2) {
- *bcc_ptr = 0;
- bcc_ptr++;
- }
- unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
- } else
- ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
- } else if (type == Kerberos) {
-#ifdef CONFIG_CIFS_UPCALL
- struct cifs_spnego_msg *msg;
- spnego_key = cifs_get_spnego_key(ses);
- if (IS_ERR(spnego_key)) {
- rc = PTR_ERR(spnego_key);
- spnego_key = NULL;
- goto ssetup_exit;
- }
+ sess_data->iov[2].iov_len = (long) bcc_ptr -
+ (long) sess_data->iov[2].iov_base;
- msg = spnego_key->payload.data;
- /* check version field to make sure that cifs.upcall is
- sending us a response in an expected form */
- if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
- cifs_dbg(VFS, "incorrect version of cifs.upcall "
- "expected %d but got %d)",
- CIFS_SPNEGO_UPCALL_VERSION, msg->version);
- rc = -EKEYREJECTED;
- goto ssetup_exit;
- }
+ rc = sess_sendreceive(sess_data);
+ if (rc)
+ goto out;
- ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
- GFP_KERNEL);
- if (!ses->auth_key.response) {
- cifs_dbg(VFS,
- "Kerberos can't allocate (%u bytes) memory",
- msg->sesskey_len);
- rc = -ENOMEM;
- goto ssetup_exit;
- }
- ses->auth_key.len = msg->sesskey_len;
-
- pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
- capabilities |= CAP_EXTENDED_SECURITY;
- pSMB->req.Capabilities = cpu_to_le32(capabilities);
- iov[1].iov_base = msg->data + msg->sesskey_len;
- iov[1].iov_len = msg->secblob_len;
- pSMB->req.SecurityBlobLength = cpu_to_le16(iov[1].iov_len);
-
- if (ses->capabilities & CAP_UNICODE) {
- /* unicode strings must be word aligned */
- if ((iov[0].iov_len + iov[1].iov_len) % 2) {
- *bcc_ptr = 0;
- bcc_ptr++;
- }
- unicode_oslm_strings(&bcc_ptr, nls_cp);
- unicode_domain_string(&bcc_ptr, ses, nls_cp);
- } else
- /* BB: is this right? */
- ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
-#else /* ! CONFIG_CIFS_UPCALL */
- cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
- rc = -ENOSYS;
- goto ssetup_exit;
-#endif /* CONFIG_CIFS_UPCALL */
- } else if (type == RawNTLMSSP) {
- if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
- cifs_dbg(VFS, "NTLMSSP requires Unicode support\n");
- rc = -ENOSYS;
- goto ssetup_exit;
- }
+ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+ smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
- cifs_dbg(FYI, "ntlmssp session setup phase %d\n", phase);
- pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
- capabilities |= CAP_EXTENDED_SECURITY;
- pSMB->req.Capabilities |= cpu_to_le32(capabilities);
- switch(phase) {
- case NtLmNegotiate:
- build_ntlmssp_negotiate_blob(
- pSMB->req.SecurityBlob, ses);
- iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
- iov[1].iov_base = pSMB->req.SecurityBlob;
- pSMB->req.SecurityBlobLength =
- cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
- break;
- case NtLmAuthenticate:
- /*
- * 5 is an empirical value, large enough to hold
- * authenticate message plus max 10 of av paris,
- * domain, user, workstation names, flags, etc.
- */
- ntlmsspblob = kzalloc(
- 5*sizeof(struct _AUTHENTICATE_MESSAGE),
- GFP_KERNEL);
- if (!ntlmsspblob) {
- rc = -ENOMEM;
- goto ssetup_exit;
- }
+ if (smb_buf->WordCount != 3) {
+ rc = -EIO;
+ cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
+ goto out;
+ }
- rc = build_ntlmssp_auth_blob(ntlmsspblob,
- &blob_len, ses, nls_cp);
- if (rc)
- goto ssetup_exit;
- iov[1].iov_len = blob_len;
- iov[1].iov_base = ntlmsspblob;
- pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len);
- /*
- * Make sure that we tell the server that we are using
- * the uid that it just gave us back on the response
- * (challenge)
- */
- smb_buf->Uid = ses->Suid;
- break;
- default:
- cifs_dbg(VFS, "invalid phase %d\n", phase);
- rc = -ENOSYS;
- goto ssetup_exit;
+ if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
+ cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
+
+ ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
+ cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
+
+ bytes_remaining = get_bcc(smb_buf);
+ bcc_ptr = pByteArea(smb_buf);
+
+ /* BB check if Unicode and decode strings */
+ if (bytes_remaining == 0) {
+ /* no string area to decode, do nothing */
+ } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
+ /* unicode string area must be word-aligned */
+ if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
+ ++bcc_ptr;
+ --bytes_remaining;
}
- /* unicode strings must be word aligned */
- if ((iov[0].iov_len + iov[1].iov_len) % 2) {
+ decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
+ sess_data->nls_cp);
+ } else {
+ decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
+ sess_data->nls_cp);
+ }
+
+ rc = sess_establish_session(sess_data);
+out:
+ sess_data->result = rc;
+ sess_data->func = NULL;
+ sess_free_buffer(sess_data);
+ kfree(ses->auth_key.response);
+ ses->auth_key.response = NULL;
+}
+
+static void
+sess_auth_ntlmv2(struct sess_data *sess_data)
+{
+ int rc = 0;
+ struct smb_hdr *smb_buf;
+ SESSION_SETUP_ANDX *pSMB;
+ char *bcc_ptr;
+ struct cifs_ses *ses = sess_data->ses;
+ __u32 capabilities;
+ __u16 bytes_remaining;
+
+ /* old style NTLM sessionsetup */
+ /* wct = 13 */
+ rc = sess_alloc_buffer(sess_data, 13);
+ if (rc)
+ goto out;
+
+ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+ bcc_ptr = sess_data->iov[2].iov_base;
+ capabilities = cifs_ssetup_hdr(ses, pSMB);
+
+ pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
+
+ /* LM2 password would be here if we supported it */
+ pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
+
+ /* calculate nlmv2 response and session key */
+ rc = setup_ntlmv2_rsp(ses, sess_data->nls_cp);
+ if (rc) {
+ cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", rc);
+ goto out;
+ }
+
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
+
+ /* set case sensitive password length after tilen may get
+ * assigned, tilen is 0 otherwise.
+ */
+ pSMB->req_no_secext.CaseSensitivePasswordLength =
+ cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+
+ if (ses->capabilities & CAP_UNICODE) {
+ if (sess_data->iov[0].iov_len % 2) {
*bcc_ptr = 0;
bcc_ptr++;
}
- unicode_oslm_strings(&bcc_ptr, nls_cp);
+ unicode_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
} else {
- cifs_dbg(VFS, "secType %d not supported!\n", type);
- rc = -ENOSYS;
- goto ssetup_exit;
+ ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
}
- iov[2].iov_base = str_area;
- iov[2].iov_len = (long) bcc_ptr - (long) str_area;
- count = iov[1].iov_len + iov[2].iov_len;
- smb_buf->smb_buf_length =
- cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
+ sess_data->iov[2].iov_len = (long) bcc_ptr -
+ (long) sess_data->iov[2].iov_base;
- put_bcc(count, smb_buf);
+ rc = sess_sendreceive(sess_data);
+ if (rc)
+ goto out;
- rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type,
- CIFS_LOG_ERROR);
- /* SMB request buf freed in SendReceive2 */
+ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+ smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
+
+ if (smb_buf->WordCount != 3) {
+ rc = -EIO;
+ cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
+ goto out;
+ }
+
+ if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
+ cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
+
+ ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
+ cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
- pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
- smb_buf = (struct smb_hdr *)iov[0].iov_base;
+ bytes_remaining = get_bcc(smb_buf);
+ bcc_ptr = pByteArea(smb_buf);
- if ((type == RawNTLMSSP) && (resp_buf_type != CIFS_NO_BUFFER) &&
- (smb_buf->Status.CifsError ==
- cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) {
- if (phase != NtLmNegotiate) {
- cifs_dbg(VFS, "Unexpected more processing error\n");
- goto ssetup_exit;
+ /* BB check if Unicode and decode strings */
+ if (bytes_remaining == 0) {
+ /* no string area to decode, do nothing */
+ } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
+ /* unicode string area must be word-aligned */
+ if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
+ ++bcc_ptr;
+ --bytes_remaining;
}
- /* NTLMSSP Negotiate sent now processing challenge (response) */
- phase = NtLmChallenge; /* process ntlmssp challenge */
- rc = 0; /* MORE_PROC rc is not an error here, but expected */
+ decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
+ sess_data->nls_cp);
+ } else {
+ decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
+ sess_data->nls_cp);
}
+
+ rc = sess_establish_session(sess_data);
+out:
+ sess_data->result = rc;
+ sess_data->func = NULL;
+ sess_free_buffer(sess_data);
+ kfree(ses->auth_key.response);
+ ses->auth_key.response = NULL;
+}
+
+#ifdef CONFIG_CIFS_UPCALL
+static void
+sess_auth_kerberos(struct sess_data *sess_data)
+{
+ int rc = 0;
+ struct smb_hdr *smb_buf;
+ SESSION_SETUP_ANDX *pSMB;
+ char *bcc_ptr;
+ struct cifs_ses *ses = sess_data->ses;
+ __u32 capabilities;
+ __u16 bytes_remaining;
+ struct key *spnego_key = NULL;
+ struct cifs_spnego_msg *msg;
+ u16 blob_len;
+
+ /* extended security */
+ /* wct = 12 */
+ rc = sess_alloc_buffer(sess_data, 12);
if (rc)
- goto ssetup_exit;
+ goto out;
- if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) {
+ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+ bcc_ptr = sess_data->iov[2].iov_base;
+ capabilities = cifs_ssetup_hdr(ses, pSMB);
+
+ spnego_key = cifs_get_spnego_key(ses);
+ if (IS_ERR(spnego_key)) {
+ rc = PTR_ERR(spnego_key);
+ spnego_key = NULL;
+ goto out;
+ }
+
+ msg = spnego_key->payload.data;
+ /*
+ * check version field to make sure that cifs.upcall is
+ * sending us a response in an expected form
+ */
+ if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
+ cifs_dbg(VFS,
+ "incorrect version of cifs.upcall (expected %d but got %d)",
+ CIFS_SPNEGO_UPCALL_VERSION, msg->version);
+ rc = -EKEYREJECTED;
+ goto out_put_spnego_key;
+ }
+
+ ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
+ GFP_KERNEL);
+ if (!ses->auth_key.response) {
+ cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory",
+ msg->sesskey_len);
+ rc = -ENOMEM;
+ goto out_put_spnego_key;
+ }
+ ses->auth_key.len = msg->sesskey_len;
+
+ pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
+ capabilities |= CAP_EXTENDED_SECURITY;
+ pSMB->req.Capabilities = cpu_to_le32(capabilities);
+ sess_data->iov[1].iov_base = msg->data + msg->sesskey_len;
+ sess_data->iov[1].iov_len = msg->secblob_len;
+ pSMB->req.SecurityBlobLength = cpu_to_le16(sess_data->iov[1].iov_len);
+
+ if (ses->capabilities & CAP_UNICODE) {
+ /* unicode strings must be word aligned */
+ if ((sess_data->iov[0].iov_len
+ + sess_data->iov[1].iov_len) % 2) {
+ *bcc_ptr = 0;
+ bcc_ptr++;
+ }
+ unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp);
+ unicode_domain_string(&bcc_ptr, ses, sess_data->nls_cp);
+ } else {
+ /* BB: is this right? */
+ ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
+ }
+
+ sess_data->iov[2].iov_len = (long) bcc_ptr -
+ (long) sess_data->iov[2].iov_base;
+
+ rc = sess_sendreceive(sess_data);
+ if (rc)
+ goto out_put_spnego_key;
+
+ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+ smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
+
+ if (smb_buf->WordCount != 4) {
rc = -EIO;
cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
- goto ssetup_exit;
+ goto out_put_spnego_key;
}
- action = le16_to_cpu(pSMB->resp.Action);
- if (action & GUEST_LOGIN)
+
+ if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
+
ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
- /* response can have either 3 or 4 word count - Samba sends 3 */
- /* and lanman response is 3 */
+
bytes_remaining = get_bcc(smb_buf);
bcc_ptr = pByteArea(smb_buf);
- if (smb_buf->WordCount == 4) {
- blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
- if (blob_len > bytes_remaining) {
- cifs_dbg(VFS, "bad security blob length %d\n",
- blob_len);
- rc = -EINVAL;
- goto ssetup_exit;
- }
- if (phase == NtLmChallenge) {
- rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses);
- /* now goto beginning for ntlmssp authenticate phase */
- if (rc)
- goto ssetup_exit;
- }
- bcc_ptr += blob_len;
- bytes_remaining -= blob_len;
+ blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
+ if (blob_len > bytes_remaining) {
+ cifs_dbg(VFS, "bad security blob length %d\n",
+ blob_len);
+ rc = -EINVAL;
+ goto out_put_spnego_key;
}
+ bcc_ptr += blob_len;
+ bytes_remaining -= blob_len;
/* BB check if Unicode and decode strings */
if (bytes_remaining == 0) {
@@ -906,60 +1083,371 @@ ssetup_ntlmssp_authenticate:
++bcc_ptr;
--bytes_remaining;
}
- decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp);
+ decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
+ sess_data->nls_cp);
} else {
- decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp);
+ decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
+ sess_data->nls_cp);
}
-ssetup_exit:
- if (spnego_key) {
- key_invalidate(spnego_key);
- key_put(spnego_key);
+ rc = sess_establish_session(sess_data);
+out_put_spnego_key:
+ key_invalidate(spnego_key);
+ key_put(spnego_key);
+out:
+ sess_data->result = rc;
+ sess_data->func = NULL;
+ sess_free_buffer(sess_data);
+ kfree(ses->auth_key.response);
+ ses->auth_key.response = NULL;
+}
+
+#else
+
+static void
+sess_auth_kerberos(struct sess_data *sess_data)
+{
+ cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
+ sess_data->result = -ENOSYS;
+ sess_data->func = NULL;
+}
+#endif /* ! CONFIG_CIFS_UPCALL */
+
+/*
+ * The required kvec buffers have to be allocated before calling this
+ * function.
+ */
+static int
+_sess_auth_rawntlmssp_assemble_req(struct sess_data *sess_data)
+{
+ struct smb_hdr *smb_buf;
+ SESSION_SETUP_ANDX *pSMB;
+ struct cifs_ses *ses = sess_data->ses;
+ __u32 capabilities;
+ char *bcc_ptr;
+
+ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+ smb_buf = (struct smb_hdr *)pSMB;
+
+ capabilities = cifs_ssetup_hdr(ses, pSMB);
+ if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
+ cifs_dbg(VFS, "NTLMSSP requires Unicode support\n");
+ return -ENOSYS;
}
- kfree(str_area);
- kfree(ntlmsspblob);
- ntlmsspblob = NULL;
- if (resp_buf_type == CIFS_SMALL_BUFFER) {
- cifs_dbg(FYI, "ssetup freeing small buf %p\n", iov[0].iov_base);
- cifs_small_buf_release(iov[0].iov_base);
- } else if (resp_buf_type == CIFS_LARGE_BUFFER)
- cifs_buf_release(iov[0].iov_base);
- /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */
- if ((phase == NtLmChallenge) && (rc == 0))
- goto ssetup_ntlmssp_authenticate;
+ pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
+ capabilities |= CAP_EXTENDED_SECURITY;
+ pSMB->req.Capabilities |= cpu_to_le32(capabilities);
+
+ bcc_ptr = sess_data->iov[2].iov_base;
+ /* unicode strings must be word aligned */
+ if ((sess_data->iov[0].iov_len + sess_data->iov[1].iov_len) % 2) {
+ *bcc_ptr = 0;
+ bcc_ptr++;
+ }
+ unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp);
+
+ sess_data->iov[2].iov_len = (long) bcc_ptr -
+ (long) sess_data->iov[2].iov_base;
+
+ return 0;
+}
+
+static void
+sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data);
+
+static void
+sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
+{
+ int rc;
+ struct smb_hdr *smb_buf;
+ SESSION_SETUP_ANDX *pSMB;
+ struct cifs_ses *ses = sess_data->ses;
+ __u16 bytes_remaining;
+ char *bcc_ptr;
+ u16 blob_len;
+
+ cifs_dbg(FYI, "rawntlmssp session setup negotiate phase\n");
+
+ /*
+ * if memory allocation is successful, caller of this function
+ * frees it.
+ */
+ ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
+ if (!ses->ntlmssp) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ ses->ntlmssp->sesskey_per_smbsess = false;
+
+ /* wct = 12 */
+ rc = sess_alloc_buffer(sess_data, 12);
+ if (rc)
+ goto out;
+
+ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+
+ /* Build security blob before we assemble the request */
+ build_ntlmssp_negotiate_blob(pSMB->req.SecurityBlob, ses);
+ sess_data->iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
+ sess_data->iov[1].iov_base = pSMB->req.SecurityBlob;
+ pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
+
+ rc = _sess_auth_rawntlmssp_assemble_req(sess_data);
+ if (rc)
+ goto out;
+
+ rc = sess_sendreceive(sess_data);
+
+ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+ smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
+
+ /* If true, rc here is expected and not an error */
+ if (sess_data->buf0_type != CIFS_NO_BUFFER &&
+ smb_buf->Status.CifsError ==
+ cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))
+ rc = 0;
+
+ if (rc)
+ goto out;
+
+ cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
+
+ if (smb_buf->WordCount != 4) {
+ rc = -EIO;
+ cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
+ goto out;
+ }
+
+ ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
+ cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
+
+ bytes_remaining = get_bcc(smb_buf);
+ bcc_ptr = pByteArea(smb_buf);
+
+ blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
+ if (blob_len > bytes_remaining) {
+ cifs_dbg(VFS, "bad security blob length %d\n",
+ blob_len);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses);
+out:
+ sess_free_buffer(sess_data);
if (!rc) {
- mutex_lock(&ses->server->srv_mutex);
- if (!ses->server->session_estab) {
- if (ses->server->sign) {
- ses->server->session_key.response =
- kmemdup(ses->auth_key.response,
- ses->auth_key.len, GFP_KERNEL);
- if (!ses->server->session_key.response) {
- rc = -ENOMEM;
- mutex_unlock(&ses->server->srv_mutex);
- goto keycp_exit;
- }
- ses->server->session_key.len =
- ses->auth_key.len;
- }
- ses->server->sequence_number = 0x2;
- ses->server->session_estab = true;
- }
- mutex_unlock(&ses->server->srv_mutex);
+ sess_data->func = sess_auth_rawntlmssp_authenticate;
+ return;
+ }
+
+ /* Else error. Cleanup */
+ kfree(ses->auth_key.response);
+ ses->auth_key.response = NULL;
+ kfree(ses->ntlmssp);
+ ses->ntlmssp = NULL;
+
+ sess_data->func = NULL;
+ sess_data->result = rc;
+}
- cifs_dbg(FYI, "CIFS session established successfully\n");
- spin_lock(&GlobalMid_Lock);
- ses->status = CifsGood;
- ses->need_reconnect = false;
- spin_unlock(&GlobalMid_Lock);
+static void
+sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
+{
+ int rc;
+ struct smb_hdr *smb_buf;
+ SESSION_SETUP_ANDX *pSMB;
+ struct cifs_ses *ses = sess_data->ses;
+ __u16 bytes_remaining;
+ char *bcc_ptr;
+ char *ntlmsspblob = NULL;
+ u16 blob_len;
+
+ cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n");
+
+ /* wct = 12 */
+ rc = sess_alloc_buffer(sess_data, 12);
+ if (rc)
+ goto out;
+
+ /* Build security blob before we assemble the request */
+ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+ smb_buf = (struct smb_hdr *)pSMB;
+ /*
+ * 5 is an empirical value, large enough to hold
+ * authenticate message plus max 10 of av paris,
+ * domain, user, workstation names, flags, etc.
+ */
+ ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE),
+ GFP_KERNEL);
+ if (!ntlmsspblob) {
+ rc = -ENOMEM;
+ goto out;
}
-keycp_exit:
+ rc = build_ntlmssp_auth_blob(ntlmsspblob,
+ &blob_len, ses, sess_data->nls_cp);
+ if (rc)
+ goto out_free_ntlmsspblob;
+ sess_data->iov[1].iov_len = blob_len;
+ sess_data->iov[1].iov_base = ntlmsspblob;
+ pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len);
+ /*
+ * Make sure that we tell the server that we are using
+ * the uid that it just gave us back on the response
+ * (challenge)
+ */
+ smb_buf->Uid = ses->Suid;
+
+ rc = _sess_auth_rawntlmssp_assemble_req(sess_data);
+ if (rc)
+ goto out_free_ntlmsspblob;
+
+ rc = sess_sendreceive(sess_data);
+ if (rc)
+ goto out_free_ntlmsspblob;
+
+ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+ smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
+ if (smb_buf->WordCount != 4) {
+ rc = -EIO;
+ cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
+ goto out_free_ntlmsspblob;
+ }
+
+ if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
+ cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
+
+ bytes_remaining = get_bcc(smb_buf);
+ bcc_ptr = pByteArea(smb_buf);
+ blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
+ if (blob_len > bytes_remaining) {
+ cifs_dbg(VFS, "bad security blob length %d\n",
+ blob_len);
+ rc = -EINVAL;
+ goto out_free_ntlmsspblob;
+ }
+ bcc_ptr += blob_len;
+ bytes_remaining -= blob_len;
+
+
+ /* BB check if Unicode and decode strings */
+ if (bytes_remaining == 0) {
+ /* no string area to decode, do nothing */
+ } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
+ /* unicode string area must be word-aligned */
+ if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
+ ++bcc_ptr;
+ --bytes_remaining;
+ }
+ decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
+ sess_data->nls_cp);
+ } else {
+ decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
+ sess_data->nls_cp);
+ }
+
+out_free_ntlmsspblob:
+ kfree(ntlmsspblob);
+out:
+ sess_free_buffer(sess_data);
+
+ if (!rc)
+ rc = sess_establish_session(sess_data);
+
+ /* Cleanup */
kfree(ses->auth_key.response);
ses->auth_key.response = NULL;
kfree(ses->ntlmssp);
+ ses->ntlmssp = NULL;
+
+ sess_data->func = NULL;
+ sess_data->result = rc;
+}
+
+static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data)
+{
+ int type;
+
+ type = select_sectype(ses->server, ses->sectype);
+ cifs_dbg(FYI, "sess setup type %d\n", type);
+ if (type == Unspecified) {
+ cifs_dbg(VFS,
+ "Unable to select appropriate authentication method!");
+ return -EINVAL;
+ }
+
+ switch (type) {
+ case LANMAN:
+ /* LANMAN and plaintext are less secure and off by default.
+ * So we make this explicitly be turned on in kconfig (in the
+ * build) and turned on at runtime (changed from the default)
+ * in proc/fs/cifs or via mount parm. Unfortunately this is
+ * needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+ sess_data->func = sess_auth_lanman;
+ break;
+#else
+ return -EOPNOTSUPP;
+#endif
+ case NTLM:
+ sess_data->func = sess_auth_ntlm;
+ break;
+ case NTLMv2:
+ sess_data->func = sess_auth_ntlmv2;
+ break;
+ case Kerberos:
+#ifdef CONFIG_CIFS_UPCALL
+ sess_data->func = sess_auth_kerberos;
+ break;
+#else
+ cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
+ return -ENOSYS;
+ break;
+#endif /* CONFIG_CIFS_UPCALL */
+ case RawNTLMSSP:
+ sess_data->func = sess_auth_rawntlmssp_negotiate;
+ break;
+ default:
+ cifs_dbg(VFS, "secType %d not supported!\n", type);
+ return -ENOSYS;
+ }
+
+ return 0;
+}
+
+int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
+ const struct nls_table *nls_cp)
+{
+ int rc = 0;
+ struct sess_data *sess_data;
+
+ if (ses == NULL) {
+ WARN(1, "%s: ses == NULL!", __func__);
+ return -EINVAL;
+ }
+
+ sess_data = kzalloc(sizeof(struct sess_data), GFP_KERNEL);
+ if (!sess_data)
+ return -ENOMEM;
+
+ rc = select_sec(ses, sess_data);
+ if (rc)
+ goto out;
+
+ sess_data->xid = xid;
+ sess_data->ses = ses;
+ sess_data->buf0_type = CIFS_NO_BUFFER;
+ sess_data->nls_cp = (struct nls_table *) nls_cp;
+
+ while (sess_data->func)
+ sess_data->func(sess_data);
+
+ /* Store result before we free sess_data */
+ rc = sess_data->result;
+out:
+ kfree(sess_data);
return rc;
}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index d1fdfa848703..5e8c22d6c7b9 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1009,6 +1009,12 @@ cifs_is_read_op(__u32 oplock)
return oplock == OPLOCK_READ;
}
+static unsigned int
+cifs_wp_retry_size(struct inode *inode)
+{
+ return CIFS_SB(inode->i_sb)->wsize;
+}
+
struct smb_version_operations smb1_operations = {
.send_cancel = send_nt_cancel,
.compare_fids = cifs_compare_fids,
@@ -1019,6 +1025,7 @@ struct smb_version_operations smb1_operations = {
.set_credits = cifs_set_credits,
.get_credits_field = cifs_get_credits_field,
.get_credits = cifs_get_credits,
+ .wait_mtu_credits = cifs_wait_mtu_credits,
.get_next_mid = cifs_get_next_mid,
.read_data_offset = cifs_read_data_offset,
.read_data_length = cifs_read_data_length,
@@ -1078,6 +1085,7 @@ struct smb_version_operations smb1_operations = {
.query_mf_symlink = cifs_query_mf_symlink,
.create_mf_symlink = cifs_create_mf_symlink,
.is_read_op = cifs_is_read_op,
+ .wp_retry_size = cifs_wp_retry_size,
#ifdef CONFIG_CIFS_XATTR
.query_all_EAs = CIFSSMBQAllEAs,
.set_EA = CIFSSMBSetEA,
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 84c012a6aba0..0150182a4494 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -91,7 +91,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
case SMB2_OP_SET_EOF:
tmprc = SMB2_set_eof(xid, tcon, fid.persistent_fid,
fid.volatile_fid, current->tgid,
- (__le64 *)data);
+ (__le64 *)data, false);
break;
case SMB2_OP_SET_INFO:
tmprc = SMB2_set_info(xid, tcon, fid.persistent_fid,
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 94bd4fbb13d3..e31a9dfdcd39 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -605,7 +605,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_MAPPED_FILE_SIZE_ZERO, -EIO, "STATUS_MAPPED_FILE_SIZE_ZERO"},
{STATUS_TOO_MANY_OPENED_FILES, -EMFILE, "STATUS_TOO_MANY_OPENED_FILES"},
{STATUS_CANCELLED, -EIO, "STATUS_CANCELLED"},
- {STATUS_CANNOT_DELETE, -EIO, "STATUS_CANNOT_DELETE"},
+ {STATUS_CANNOT_DELETE, -EACCES, "STATUS_CANNOT_DELETE"},
{STATUS_INVALID_COMPUTER_NAME, -EIO, "STATUS_INVALID_COMPUTER_NAME"},
{STATUS_FILE_DELETED, -EIO, "STATUS_FILE_DELETED"},
{STATUS_SPECIAL_ACCOUNT, -EIO, "STATUS_SPECIAL_ACCOUNT"},
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index b8021fde987d..f2e6ac29a8d6 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -437,7 +437,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
continue;
cifs_dbg(FYI, "found in the open list\n");
- cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
+ cifs_dbg(FYI, "lease key match, lease break 0x%x\n",
le32_to_cpu(rsp->NewLeaseState));
server->ops->set_oplock_level(cinode, lease_state, 0, NULL);
@@ -467,7 +467,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
}
cifs_dbg(FYI, "found in the pending open list\n");
- cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
+ cifs_dbg(FYI, "lease key match, lease break 0x%x\n",
le32_to_cpu(rsp->NewLeaseState));
open->oplock = lease_state;
@@ -546,7 +546,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
return false;
}
- cifs_dbg(FYI, "oplock level 0x%d\n", rsp->OplockLevel);
+ cifs_dbg(FYI, "oplock level 0x%x\n", rsp->OplockLevel);
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 787844bde384..77f8aeb9c2fc 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -19,6 +19,7 @@
#include <linux/pagemap.h>
#include <linux/vfs.h>
+#include <linux/falloc.h>
#include "cifsglob.h"
#include "smb2pdu.h"
#include "smb2proto.h"
@@ -112,6 +113,53 @@ smb2_get_credits(struct mid_q_entry *mid)
return le16_to_cpu(((struct smb2_hdr *)mid->resp_buf)->CreditRequest);
}
+static int
+smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
+ unsigned int *num, unsigned int *credits)
+{
+ int rc = 0;
+ unsigned int scredits;
+
+ spin_lock(&server->req_lock);
+ while (1) {
+ if (server->credits <= 0) {
+ spin_unlock(&server->req_lock);
+ cifs_num_waiters_inc(server);
+ rc = wait_event_killable(server->request_q,
+ has_credits(server, &server->credits));
+ cifs_num_waiters_dec(server);
+ if (rc)
+ return rc;
+ spin_lock(&server->req_lock);
+ } else {
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&server->req_lock);
+ return -ENOENT;
+ }
+
+ scredits = server->credits;
+ /* can deadlock with reopen */
+ if (scredits == 1) {
+ *num = SMB2_MAX_BUFFER_SIZE;
+ *credits = 0;
+ break;
+ }
+
+ /* leave one credit for a possible reopen */
+ scredits--;
+ *num = min_t(unsigned int, size,
+ scredits * SMB2_MAX_BUFFER_SIZE);
+
+ *credits = DIV_ROUND_UP(*num, SMB2_MAX_BUFFER_SIZE);
+ server->credits -= *credits;
+ server->in_flight++;
+ break;
+ }
+ }
+ spin_unlock(&server->req_lock);
+ return rc;
+}
+
static __u64
smb2_get_next_mid(struct TCP_Server_Info *server)
{
@@ -182,8 +230,9 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
/* start with specified wsize, or default */
wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
wsize = min_t(unsigned int, wsize, server->max_write);
- /* set it to the maximum buffer size value we can send with 1 credit */
- wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
+
+ if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
+ wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
return wsize;
}
@@ -197,8 +246,9 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
/* start with specified rsize, or default */
rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
rsize = min_t(unsigned int, rsize, server->max_read);
- /* set it to the maximum buffer size value we can send with 1 credit */
- rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
+
+ if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
+ rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
return rsize;
}
@@ -687,7 +737,7 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
{
__le64 eof = cpu_to_le64(size);
return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
- cfile->fid.volatile_fid, cfile->pid, &eof);
+ cfile->fid.volatile_fid, cfile->pid, &eof, false);
}
static int
@@ -1104,6 +1154,13 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch)
return le32_to_cpu(lc->lcontext.LeaseState);
}
+static unsigned int
+smb2_wp_retry_size(struct inode *inode)
+{
+ return min_t(unsigned int, CIFS_SB(inode->i_sb)->wsize,
+ SMB2_MAX_BUFFER_SIZE);
+}
+
struct smb_version_operations smb20_operations = {
.compare_fids = smb2_compare_fids,
.setup_request = smb2_setup_request,
@@ -1113,6 +1170,7 @@ struct smb_version_operations smb20_operations = {
.set_credits = smb2_set_credits,
.get_credits_field = smb2_get_credits_field,
.get_credits = smb2_get_credits,
+ .wait_mtu_credits = cifs_wait_mtu_credits,
.get_next_mid = smb2_get_next_mid,
.read_data_offset = smb2_read_data_offset,
.read_data_length = smb2_read_data_length,
@@ -1177,6 +1235,7 @@ struct smb_version_operations smb20_operations = {
.create_lease_buf = smb2_create_lease_buf,
.parse_lease_buf = smb2_parse_lease_buf,
.clone_range = smb2_clone_range,
+ .wp_retry_size = smb2_wp_retry_size,
};
struct smb_version_operations smb21_operations = {
@@ -1188,6 +1247,7 @@ struct smb_version_operations smb21_operations = {
.set_credits = smb2_set_credits,
.get_credits_field = smb2_get_credits_field,
.get_credits = smb2_get_credits,
+ .wait_mtu_credits = smb2_wait_mtu_credits,
.get_next_mid = smb2_get_next_mid,
.read_data_offset = smb2_read_data_offset,
.read_data_length = smb2_read_data_length,
@@ -1252,6 +1312,7 @@ struct smb_version_operations smb21_operations = {
.create_lease_buf = smb2_create_lease_buf,
.parse_lease_buf = smb2_parse_lease_buf,
.clone_range = smb2_clone_range,
+ .wp_retry_size = smb2_wp_retry_size,
};
struct smb_version_operations smb30_operations = {
@@ -1263,6 +1324,7 @@ struct smb_version_operations smb30_operations = {
.set_credits = smb2_set_credits,
.get_credits_field = smb2_get_credits_field,
.get_credits = smb2_get_credits,
+ .wait_mtu_credits = smb2_wait_mtu_credits,
.get_next_mid = smb2_get_next_mid,
.read_data_offset = smb2_read_data_offset,
.read_data_length = smb2_read_data_length,
@@ -1330,6 +1392,7 @@ struct smb_version_operations smb30_operations = {
.parse_lease_buf = smb3_parse_lease_buf,
.clone_range = smb2_clone_range,
.validate_negotiate = smb3_validate_negotiate,
+ .wp_retry_size = smb2_wp_retry_size,
};
struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index b0b260dbb19d..42ebc1a8be6c 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -108,7 +108,6 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
if (!tcon)
goto out;
- /* BB FIXME when we do write > 64K add +1 for every 64K in req or rsp */
/* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */
/* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */
if ((tcon->ses) &&
@@ -245,10 +244,6 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
if (rc)
goto out;
atomic_inc(&tconInfoReconnectCount);
- /*
- * BB FIXME add code to check if wsize needs update due to negotiated
- * smb buffer size shrinking.
- */
out:
/*
* Check if handle based operation so we know whether we can continue
@@ -309,16 +304,6 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
return rc;
}
-static void
-free_rsp_buf(int resp_buftype, void *rsp)
-{
- if (resp_buftype == CIFS_SMALL_BUFFER)
- cifs_small_buf_release(rsp);
- else if (resp_buftype == CIFS_LARGE_BUFFER)
- cifs_buf_release(rsp);
-}
-
-
/*
*
* SMB2 Worker functions follow:
@@ -1738,12 +1723,18 @@ smb2_readv_callback(struct mid_q_entry *mid)
rc);
}
/* FIXME: should this be counted toward the initiating task? */
- task_io_account_read(rdata->bytes);
- cifs_stats_bytes_read(tcon, rdata->bytes);
+ task_io_account_read(rdata->got_bytes);
+ cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_REQUEST_SUBMITTED:
case MID_RETRY_NEEDED:
rdata->result = -EAGAIN;
+ if (server->sign && rdata->got_bytes)
+ /* reset bytes number since we can not check a sign */
+ rdata->got_bytes = 0;
+ /* FIXME: should this be counted toward the initiating task? */
+ task_io_account_read(rdata->got_bytes);
+ cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
default:
if (rdata->result != -ENODATA)
@@ -1762,11 +1753,12 @@ smb2_readv_callback(struct mid_q_entry *mid)
int
smb2_async_readv(struct cifs_readdata *rdata)
{
- int rc;
+ int rc, flags = 0;
struct smb2_hdr *buf;
struct cifs_io_parms io_parms;
struct smb_rqst rqst = { .rq_iov = &rdata->iov,
.rq_nvec = 1 };
+ struct TCP_Server_Info *server;
cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n",
__func__, rdata->offset, rdata->bytes);
@@ -1777,18 +1769,41 @@ smb2_async_readv(struct cifs_readdata *rdata)
io_parms.persistent_fid = rdata->cfile->fid.persistent_fid;
io_parms.volatile_fid = rdata->cfile->fid.volatile_fid;
io_parms.pid = rdata->pid;
+
+ server = io_parms.tcon->ses->server;
+
rc = smb2_new_read_req(&rdata->iov, &io_parms, 0, 0);
- if (rc)
+ if (rc) {
+ if (rc == -EAGAIN && rdata->credits) {
+ /* credits was reset by reconnect */
+ rdata->credits = 0;
+ /* reduce in_flight value since we won't send the req */
+ spin_lock(&server->req_lock);
+ server->in_flight--;
+ spin_unlock(&server->req_lock);
+ }
return rc;
+ }
buf = (struct smb2_hdr *)rdata->iov.iov_base;
/* 4 for rfc1002 length field */
rdata->iov.iov_len = get_rfc1002_length(rdata->iov.iov_base) + 4;
+ if (rdata->credits) {
+ buf->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
+ SMB2_MAX_BUFFER_SIZE));
+ spin_lock(&server->req_lock);
+ server->credits += rdata->credits -
+ le16_to_cpu(buf->CreditCharge);
+ spin_unlock(&server->req_lock);
+ wake_up(&server->request_q);
+ flags = CIFS_HAS_CREDITS;
+ }
+
kref_get(&rdata->refcount);
rc = cifs_call_async(io_parms.tcon->ses->server, &rqst,
cifs_readv_receive, smb2_readv_callback,
- rdata, 0);
+ rdata, flags);
if (rc) {
kref_put(&rdata->refcount, cifs_readdata_release);
cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE);
@@ -1906,15 +1921,25 @@ int
smb2_async_writev(struct cifs_writedata *wdata,
void (*release)(struct kref *kref))
{
- int rc = -EACCES;
+ int rc = -EACCES, flags = 0;
struct smb2_write_req *req = NULL;
struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
+ struct TCP_Server_Info *server = tcon->ses->server;
struct kvec iov;
struct smb_rqst rqst;
rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req);
- if (rc)
+ if (rc) {
+ if (rc == -EAGAIN && wdata->credits) {
+ /* credits was reset by reconnect */
+ wdata->credits = 0;
+ /* reduce in_flight value since we won't send the req */
+ spin_lock(&server->req_lock);
+ server->in_flight--;
+ spin_unlock(&server->req_lock);
+ }
goto async_writev_out;
+ }
req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid);
@@ -1947,9 +1972,20 @@ smb2_async_writev(struct cifs_writedata *wdata,
inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */);
+ if (wdata->credits) {
+ req->hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
+ SMB2_MAX_BUFFER_SIZE));
+ spin_lock(&server->req_lock);
+ server->credits += wdata->credits -
+ le16_to_cpu(req->hdr.CreditCharge);
+ spin_unlock(&server->req_lock);
+ wake_up(&server->request_q);
+ flags = CIFS_HAS_CREDITS;
+ }
+
kref_get(&wdata->refcount);
- rc = cifs_call_async(tcon->ses->server, &rqst, NULL,
- smb2_writev_callback, wdata, 0);
+ rc = cifs_call_async(server, &rqst, NULL, smb2_writev_callback, wdata,
+ flags);
if (rc) {
kref_put(&wdata->refcount, release);
@@ -2325,7 +2361,7 @@ SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
int
SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
- u64 volatile_fid, u32 pid, __le64 *eof)
+ u64 volatile_fid, u32 pid, __le64 *eof, bool is_falloc)
{
struct smb2_file_eof_info info;
void *data;
@@ -2336,8 +2372,12 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
data = &info;
size = sizeof(struct smb2_file_eof_info);
- return send_set_info(xid, tcon, persistent_fid, volatile_fid, pid,
- FILE_END_OF_FILE_INFORMATION, 1, &data, &size);
+ if (is_falloc)
+ return send_set_info(xid, tcon, persistent_fid, volatile_fid,
+ pid, FILE_ALLOCATION_INFORMATION, 1, &data, &size);
+ else
+ return send_set_info(xid, tcon, persistent_fid, volatile_fid,
+ pid, FILE_END_OF_FILE_INFORMATION, 1, &data, &size);
}
int
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 0ce48db20a65..67e8ce8055de 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -139,7 +139,7 @@ extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
__le16 *target_file);
extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, u32 pid,
- __le64 *eof);
+ __le64 *eof, bool is_fallocate);
extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
FILE_BASIC_INFO *buf);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 59c748ce872f..5111e7272db6 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -466,7 +466,12 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
static inline void
smb2_seq_num_into_buf(struct TCP_Server_Info *server, struct smb2_hdr *hdr)
{
+ unsigned int i, num = le16_to_cpu(hdr->CreditCharge);
+
hdr->MessageId = get_next_mid64(server);
+ /* skip message numbers according to CreditCharge field */
+ for (i = 1; i < num; i++)
+ get_next_mid(server);
}
static struct mid_q_entry *
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 18cd5650a5fc..9d087f4e7d4e 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -448,6 +448,15 @@ wait_for_free_request(struct TCP_Server_Info *server, const int timeout,
return wait_for_free_credits(server, timeout, val);
}
+int
+cifs_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
+ unsigned int *num, unsigned int *credits)
+{
+ *num = size;
+ *credits = 0;
+ return 0;
+}
+
static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
struct mid_q_entry **ppmidQ)
{
@@ -531,20 +540,23 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
{
int rc, timeout, optype;
struct mid_q_entry *mid;
+ unsigned int credits = 0;
timeout = flags & CIFS_TIMEOUT_MASK;
optype = flags & CIFS_OP_MASK;
- rc = wait_for_free_request(server, timeout, optype);
- if (rc)
- return rc;
+ if ((flags & CIFS_HAS_CREDITS) == 0) {
+ rc = wait_for_free_request(server, timeout, optype);
+ if (rc)
+ return rc;
+ credits = 1;
+ }
mutex_lock(&server->srv_mutex);
mid = server->ops->setup_async_request(server, rqst);
if (IS_ERR(mid)) {
mutex_unlock(&server->srv_mutex);
- add_credits(server, 1, optype);
- wake_up(&server->request_q);
+ add_credits_and_wake_if(server, credits, optype);
return PTR_ERR(mid);
}
@@ -572,8 +584,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
return 0;
cifs_delete_mid(mid);
- add_credits(server, 1, optype);
- wake_up(&server->request_q);
+ add_credits_and_wake_if(server, credits, optype);
return rc;
}
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 1da168c61d35..278f8fdeb9ef 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -13,7 +13,7 @@
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/list.h>
#include <linux/sched.h>
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 2849f41e72a2..1326d38960db 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -13,7 +13,7 @@
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/coda.h>
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index cd8a63238b11..9c3dedc000d1 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -19,8 +19,7 @@
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/namei.h>
-
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/coda.h>
#include <linux/coda_psdev.h>
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 9e83b7790212..d244d743a232 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -18,7 +18,7 @@
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/coda.h>
#include <linux/coda_psdev.h>
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index fe3afb2de880..b945410bfcd5 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -21,9 +21,7 @@
#include <linux/vfs.h>
#include <linux/slab.h>
#include <linux/pid_namespace.h>
-
-#include <asm/uaccess.h>
-
+#include <linux/uaccess.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 3f5de96bbb58..4326d172fc27 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -16,7 +16,7 @@
#include <linux/string.h>
#include <linux/namei.h>
#include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/coda.h>
#include <linux/coda_psdev.h>
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 5c1e4242368b..822629126e89 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -40,7 +40,7 @@
#include <linux/pid_namespace.h>
#include <asm/io.h>
#include <asm/poll.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/coda.h>
#include <linux/coda_psdev.h>
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 21fcf8dcb9cd..5bb6e27298a4 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -27,7 +27,7 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <linux/vfs.h>
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index e82289047272..afec6450450f 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -59,7 +59,7 @@
#include <linux/gfp.h>
#include <net/bluetooth/bluetooth.h>
-#include <net/bluetooth/hci.h>
+#include <net/bluetooth/hci_sock.h>
#include <net/bluetooth/rfcomm.h>
#include <linux/capi.h>
diff --git a/fs/coredump.c b/fs/coredump.c
index 0b2528fb640e..a93f7e6ea4cf 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -306,7 +306,7 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
if (unlikely(nr < 0))
return nr;
- tsk->flags = PF_DUMPCORE;
+ tsk->flags |= PF_DUMPCORE;
if (atomic_read(&mm->mm_users) == nr + 1)
goto done;
/*
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index ddcfe590b8a8..355c522f3585 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -11,6 +11,8 @@
* The actual compression is based on zlib, see the other files.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
@@ -21,7 +23,7 @@
#include <linux/vfs.h>
#include <linux/mutex.h>
#include <uapi/linux/cramfs_fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "internal.h"
@@ -153,7 +155,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
static unsigned char read_buffers[READ_BUFFERS][BUFFER_SIZE];
static unsigned buffer_blocknr[READ_BUFFERS];
-static struct super_block * buffer_dev[READ_BUFFERS];
+static struct super_block *buffer_dev[READ_BUFFERS];
static int next_buffer;
/*
@@ -205,6 +207,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
for (i = 0; i < BLKS_PER_BUF; i++) {
struct page *page = pages[i];
+
if (page) {
wait_on_page_locked(page);
if (!PageUptodate(page)) {
@@ -223,6 +226,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
data = read_buffers[buffer];
for (i = 0; i < BLKS_PER_BUF; i++) {
struct page *page = pages[i];
+
if (page) {
memcpy(data, kmap(page), PAGE_CACHE_SIZE);
kunmap(page);
@@ -237,6 +241,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
static void cramfs_kill_sb(struct super_block *sb)
{
struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
+
kill_block_super(sb);
kfree(sbi);
}
@@ -277,7 +282,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
/* check for wrong endianness */
if (super.magic == CRAMFS_MAGIC_WEND) {
if (!silent)
- printk(KERN_ERR "cramfs: wrong endianness\n");
+ pr_err("wrong endianness\n");
return -EINVAL;
}
@@ -287,22 +292,22 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
mutex_unlock(&read_mutex);
if (super.magic != CRAMFS_MAGIC) {
if (super.magic == CRAMFS_MAGIC_WEND && !silent)
- printk(KERN_ERR "cramfs: wrong endianness\n");
+ pr_err("wrong endianness\n");
else if (!silent)
- printk(KERN_ERR "cramfs: wrong magic\n");
+ pr_err("wrong magic\n");
return -EINVAL;
}
}
/* get feature flags first */
if (super.flags & ~CRAMFS_SUPPORTED_FLAGS) {
- printk(KERN_ERR "cramfs: unsupported filesystem features\n");
+ pr_err("unsupported filesystem features\n");
return -EINVAL;
}
/* Check that the root inode is in a sane state */
if (!S_ISDIR(super.root.mode)) {
- printk(KERN_ERR "cramfs: root is not a directory\n");
+ pr_err("root is not a directory\n");
return -EINVAL;
}
/* correct strange, hard-coded permissions of mkcramfs */
@@ -310,23 +315,23 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
root_offset = super.root.offset << 2;
if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) {
- sbi->size=super.size;
- sbi->blocks=super.fsid.blocks;
- sbi->files=super.fsid.files;
+ sbi->size = super.size;
+ sbi->blocks = super.fsid.blocks;
+ sbi->files = super.fsid.files;
} else {
- sbi->size=1<<28;
- sbi->blocks=0;
- sbi->files=0;
+ sbi->size = 1<<28;
+ sbi->blocks = 0;
+ sbi->files = 0;
}
- sbi->magic=super.magic;
- sbi->flags=super.flags;
+ sbi->magic = super.magic;
+ sbi->flags = super.flags;
if (root_offset == 0)
- printk(KERN_INFO "cramfs: empty filesystem");
+ pr_info("empty filesystem");
else if (!(super.flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) &&
((root_offset != sizeof(struct cramfs_super)) &&
(root_offset != 512 + sizeof(struct cramfs_super))))
{
- printk(KERN_ERR "cramfs: bad root offset %lu\n", root_offset);
+ pr_err("bad root offset %lu\n", root_offset);
return -EINVAL;
}
@@ -425,7 +430,7 @@ static int cramfs_readdir(struct file *file, struct dir_context *ctx)
/*
* Lookup and fill in the inode data..
*/
-static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+static struct dentry *cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
unsigned int offset = 0;
struct inode *inode = NULL;
@@ -483,7 +488,7 @@ out:
return NULL;
}
-static int cramfs_readpage(struct file *file, struct page * page)
+static int cramfs_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
u32 maxblock;
@@ -511,7 +516,7 @@ static int cramfs_readpage(struct file *file, struct page * page)
if (compr_len == 0)
; /* hole */
else if (unlikely(compr_len > (PAGE_CACHE_SIZE << 1))) {
- pr_err("cramfs: bad compressed blocksize %u\n",
+ pr_err("bad compressed blocksize %u\n",
compr_len);
goto err;
} else {
diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c
index 1760c1b84d97..ec4f1d4fdad0 100644
--- a/fs/cramfs/uncompress.c
+++ b/fs/cramfs/uncompress.c
@@ -15,6 +15,8 @@
* then is used by multiple filesystems.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/vmalloc.h>
@@ -37,7 +39,7 @@ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen)
err = zlib_inflateReset(&stream);
if (err != Z_OK) {
- printk("zlib_inflateReset error %d\n", err);
+ pr_err("zlib_inflateReset error %d\n", err);
zlib_inflateEnd(&stream);
zlib_inflateInit(&stream);
}
@@ -48,8 +50,8 @@ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen)
return stream.total_out;
err:
- printk("Error %d while decompressing!\n", err);
- printk("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen);
+ pr_err("Error %d while decompressing!\n", err);
+ pr_err("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen);
return -EIO;
}
@@ -57,7 +59,7 @@ int cramfs_uncompress_init(void)
{
if (!initialized++) {
stream.workspace = vmalloc(zlib_inflate_workspacesize());
- if ( !stream.workspace ) {
+ if (!stream.workspace) {
initialized = 0;
return -ENOMEM;
}
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 63146295153b..76c08c2beb2f 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -451,7 +451,7 @@ static ssize_t read_file_bool(struct file *file, char __user *user_buf,
{
char buf[3];
u32 *val = file->private_data;
-
+
if (*val)
buf[0] = 'Y';
else
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 8c41b52da358..1e3b99d3db0d 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -66,7 +66,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev
break;
}
}
- return inode;
+ return inode;
}
/* SMP-safe */
@@ -317,7 +317,7 @@ static struct dentry *__create_file(const char *name, umode_t mode,
goto exit;
/* If the parent is not specified, we create it in the root.
- * We need the root dentry to do this, which is in the super
+ * We need the root dentry to do this, which is in the super
* block. A pointer to that is in the struct vfsmount that we
* have around.
*/
@@ -330,7 +330,7 @@ static struct dentry *__create_file(const char *name, umode_t mode,
switch (mode & S_IFMT) {
case S_IFDIR:
error = debugfs_mkdir(parent->d_inode, dentry, mode);
-
+
break;
case S_IFLNK:
error = debugfs_link(parent->d_inode, dentry, mode,
@@ -534,7 +534,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove);
*/
void debugfs_remove_recursive(struct dentry *dentry)
{
- struct dentry *child, *next, *parent;
+ struct dentry *child, *parent;
if (IS_ERR_OR_NULL(dentry))
return;
@@ -546,30 +546,49 @@ void debugfs_remove_recursive(struct dentry *dentry)
parent = dentry;
down:
mutex_lock(&parent->d_inode->i_mutex);
- list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
+ loop:
+ /*
+ * The parent->d_subdirs is protected by the d_lock. Outside that
+ * lock, the child can be unlinked and set to be freed which can
+ * use the d_u.d_child as the rcu head and corrupt this list.
+ */
+ spin_lock(&parent->d_lock);
+ list_for_each_entry(child, &parent->d_subdirs, d_u.d_child) {
if (!debugfs_positive(child))
continue;
/* perhaps simple_empty(child) makes more sense */
if (!list_empty(&child->d_subdirs)) {
+ spin_unlock(&parent->d_lock);
mutex_unlock(&parent->d_inode->i_mutex);
parent = child;
goto down;
}
- up:
+
+ spin_unlock(&parent->d_lock);
+
if (!__debugfs_remove(child, parent))
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+
+ /*
+ * The parent->d_lock protects agaist child from unlinking
+ * from d_subdirs. When releasing the parent->d_lock we can
+ * no longer trust that the next pointer is valid.
+ * Restart the loop. We'll skip this one with the
+ * debugfs_positive() check.
+ */
+ goto loop;
}
+ spin_unlock(&parent->d_lock);
mutex_unlock(&parent->d_inode->i_mutex);
child = parent;
parent = parent->d_parent;
mutex_lock(&parent->d_inode->i_mutex);
- if (child != dentry) {
- next = list_next_entry(child, d_u.d_child);
- goto up;
- }
+ if (child != dentry)
+ /* go up */
+ goto loop;
if (!__debugfs_remove(child, parent))
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 98040ba388ac..17e39b047de5 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -71,7 +71,6 @@ struct dio_submit {
been performed at the start of a
write */
int pages_in_io; /* approximate total IO pages */
- size_t size; /* total request size (doesn't change)*/
sector_t block_in_file; /* Current offset into the underlying
file in dio_block units. */
unsigned blocks_available; /* At block_in_file. changes */
@@ -198,9 +197,8 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
* L1 cache.
*/
static inline struct page *dio_get_page(struct dio *dio,
- struct dio_submit *sdio, size_t *from, size_t *to)
+ struct dio_submit *sdio)
{
- int n;
if (dio_pages_present(sdio) == 0) {
int ret;
@@ -209,10 +207,7 @@ static inline struct page *dio_get_page(struct dio *dio,
return ERR_PTR(ret);
BUG_ON(dio_pages_present(sdio) == 0);
}
- n = sdio->head++;
- *from = n ? 0 : sdio->from;
- *to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
- return dio->pages[n];
+ return dio->pages[sdio->head];
}
/**
@@ -911,11 +906,15 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
while (sdio->block_in_file < sdio->final_block_in_request) {
struct page *page;
size_t from, to;
- page = dio_get_page(dio, sdio, &from, &to);
+
+ page = dio_get_page(dio, sdio);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
goto out;
}
+ from = sdio->head ? 0 : sdio->from;
+ to = (sdio->head == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
+ sdio->head++;
while (from < to) {
unsigned this_chunk_bytes; /* # of bytes mapped */
@@ -1104,7 +1103,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
unsigned blkbits = i_blkbits;
unsigned blocksize_mask = (1 << blkbits) - 1;
ssize_t retval = -EINVAL;
- loff_t end = offset + iov_iter_count(iter);
+ size_t count = iov_iter_count(iter);
+ loff_t end = offset + count;
struct dio *dio;
struct dio_submit sdio = { 0, };
struct buffer_head map_bh = { 0, };
@@ -1287,10 +1287,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
*/
BUG_ON(retval == -EIOCBQUEUED);
if (dio->is_async && retval == 0 && dio->result &&
- ((rw == READ) || (dio->result == sdio.size)))
+ (rw == READ || dio->result == count))
retval = -EIOCBQUEUED;
-
- if (retval != -EIOCBQUEUED)
+ else
dio_await_completion(dio);
if (drop_refcount(dio) == 0) {
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 8d77ba7b1756..1323c568e362 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -718,16 +718,11 @@ static const struct file_operations waiters_fops = {
void dlm_delete_debug_file(struct dlm_ls *ls)
{
- if (ls->ls_debug_rsb_dentry)
- debugfs_remove(ls->ls_debug_rsb_dentry);
- if (ls->ls_debug_waiters_dentry)
- debugfs_remove(ls->ls_debug_waiters_dentry);
- if (ls->ls_debug_locks_dentry)
- debugfs_remove(ls->ls_debug_locks_dentry);
- if (ls->ls_debug_all_dentry)
- debugfs_remove(ls->ls_debug_all_dentry);
- if (ls->ls_debug_toss_dentry)
- debugfs_remove(ls->ls_debug_toss_dentry);
+ debugfs_remove(ls->ls_debug_rsb_dentry);
+ debugfs_remove(ls->ls_debug_waiters_dentry);
+ debugfs_remove(ls->ls_debug_locks_dentry);
+ debugfs_remove(ls->ls_debug_all_dentry);
+ debugfs_remove(ls->ls_debug_toss_dentry);
}
int dlm_create_debug_file(struct dlm_ls *ls)
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 356c044e2cd3..bbee8f063dfa 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -12,7 +12,8 @@
#include "efs.h"
-static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) {
+static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
+{
struct buffer_head *bh;
int slot, namelen;
@@ -40,10 +41,10 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
if (be16_to_cpu(dirblock->magic) != EFS_DIRBLK_MAGIC) {
pr_err("%s(): invalid directory block\n", __func__);
brelse(bh);
- return(0);
+ return 0;
}
- for(slot = 0; slot < dirblock->slots; slot++) {
+ for (slot = 0; slot < dirblock->slots; slot++) {
dirslot = (struct efs_dentry *) (((char *) bh->b_data) + EFS_SLOTAT(dirblock, slot));
namelen = dirslot->namelen;
@@ -52,12 +53,12 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
if ((namelen == len) && (!memcmp(name, nameptr, len))) {
inodenum = be32_to_cpu(dirslot->inode);
brelse(bh);
- return(inodenum);
+ return inodenum;
}
}
brelse(bh);
}
- return(0);
+ return 0;
}
struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
diff --git a/fs/exec.c b/fs/exec.c
index a3d33fe592d6..a2b42a98c743 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -368,10 +368,6 @@ static int bprm_mm_init(struct linux_binprm *bprm)
if (!mm)
goto err;
- err = init_new_context(current, mm);
- if (err)
- goto err;
-
err = __bprm_mm_init(bprm);
if (err)
goto err;
@@ -1216,7 +1212,7 @@ EXPORT_SYMBOL(install_exec_creds);
/*
* determine how safe it is to execute the proposed program
* - the caller must hold ->cred_guard_mutex to protect against
- * PTRACE_ATTACH
+ * PTRACE_ATTACH or seccomp thread-sync
*/
static void check_unsafe_exec(struct linux_binprm *bprm)
{
@@ -1234,7 +1230,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
* This isn't strictly necessary, but it makes it harder for LSMs to
* mess up.
*/
- if (current->no_new_privs)
+ if (task_no_new_privs(current))
bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
t = p;
@@ -1272,7 +1268,7 @@ int prepare_binprm(struct linux_binprm *bprm)
bprm->cred->egid = current_egid();
if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
- !current->no_new_privs &&
+ !task_no_new_privs(current) &&
kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
/* Set-uid? */
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 7f20f25c232c..84529b8a331b 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -116,7 +116,7 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width,
num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa,
pages_in_unit - i);
- __a1pa = kzalloc(num_a1pa * sizeof__a1pa, GFP_KERNEL);
+ __a1pa = kcalloc(num_a1pa, sizeof__a1pa, GFP_KERNEL);
if (unlikely(!__a1pa)) {
ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n",
num_a1pa);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0762d143e252..581ef40fbe90 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -194,7 +194,16 @@ static void ext4_init_block_bitmap(struct super_block *sb,
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u", block_group);
grp = ext4_get_group_info(sb, block_group);
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+ if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+ int count;
+ count = ext4_free_inodes_count(sb, gdp);
+ percpu_counter_sub(&sbi->s_freeinodes_counter,
+ count);
+ }
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
return;
}
@@ -359,6 +368,7 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
{
ext4_fsblk_t blk;
struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
if (buffer_verified(bh))
return;
@@ -369,6 +379,9 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
ext4_unlock_group(sb, block_group);
ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
block_group, blk);
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
return;
}
@@ -376,6 +389,9 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
desc, bh))) {
ext4_unlock_group(sb, block_group);
ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
return;
}
@@ -623,7 +639,6 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
if (!(*errp) &&
ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
dquot_alloc_block_nofail(inode,
EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ef1bed66c14f..0bb3f9ea0832 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -571,6 +571,31 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
return 0;
}
+int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
+ int buf_size)
+{
+ struct ext4_dir_entry_2 *de;
+ int nlen, rlen;
+ unsigned int offset = 0;
+ char *top;
+
+ de = (struct ext4_dir_entry_2 *)buf;
+ top = buf + buf_size;
+ while ((char *) de < top) {
+ if (ext4_check_dir_entry(dir, NULL, de, bh,
+ buf, buf_size, offset))
+ return -EIO;
+ nlen = EXT4_DIR_REC_LEN(de->name_len);
+ rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
+ de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
+ offset += rlen;
+ }
+ if ((char *) de > top)
+ return -EIO;
+
+ return 0;
+}
+
const struct file_operations ext4_dir_operations = {
.llseek = ext4_dir_llseek,
.read = generic_read_dir,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7cc5a0e23688..5b19760b1de5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -591,7 +591,6 @@ enum {
#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
-#define EXT4_FREE_BLOCKS_RESERVE 0x0040
/*
* ioctl commands
@@ -2029,6 +2028,8 @@ static inline unsigned char get_dtype(struct super_block *sb, int filetype)
return ext4_filetype_table[filetype];
}
+extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
+ void *buf, int buf_size);
/* fsync.c */
extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
@@ -2144,8 +2145,8 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
extern void ext4_ind_truncate(handle_t *, struct inode *inode);
-extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
- ext4_lblk_t first, ext4_lblk_t stop);
+extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
+ ext4_lblk_t start, ext4_lblk_t end);
/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
@@ -2560,7 +2561,6 @@ extern const struct file_operations ext4_file_operations;
extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
/* inline.c */
-extern int ext4_has_inline_data(struct inode *inode);
extern int ext4_get_max_inline_size(struct inode *inode);
extern int ext4_find_inline_data_nolock(struct inode *inode);
extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
@@ -2626,6 +2626,12 @@ extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
extern int ext4_convert_inline_data(struct inode *inode);
+static inline int ext4_has_inline_data(struct inode *inode)
+{
+ return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
+ EXT4_I(inode)->i_inline_off;
+}
+
/* namei.c */
extern const struct inode_operations ext4_dir_inode_operations;
extern const struct inode_operations ext4_special_inode_operations;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4da228a0e6d0..76c2df382b7d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -161,6 +161,8 @@ int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
struct inode *inode, struct ext4_ext_path *path)
{
int err;
+
+ WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
if (path->p_bh) {
ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
/* path points to block */
@@ -1808,8 +1810,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
brelse(path[1].p_bh);
ext4_free_blocks(handle, inode, NULL, blk, 1,
- EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET |
- EXT4_FREE_BLOCKS_RESERVE);
+ EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
}
/*
@@ -3253,7 +3254,7 @@ out:
fix_extent_len:
ex->ee_len = orig_ex.ee_len;
- ext4_ext_dirty(handle, inode, path + depth);
+ ext4_ext_dirty(handle, inode, path + path->p_depth);
return err;
}
@@ -5403,16 +5404,13 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
int ret;
/* Collapse range works only on fs block size aligned offsets. */
- if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
- len & (EXT4_BLOCK_SIZE(sb) - 1))
+ if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
+ len & (EXT4_CLUSTER_SIZE(sb) - 1))
return -EINVAL;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
- if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1)
- return -EOPNOTSUPP;
-
trace_ext4_collapse_range(inode, offset, len);
punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 3f5c188953a4..0b7e28e7eaa4 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -966,10 +966,10 @@ retry:
continue;
}
- if (ei->i_es_lru_nr == 0 || ei == locked_ei)
+ if (ei->i_es_lru_nr == 0 || ei == locked_ei ||
+ !write_trylock(&ei->i_es_lock))
continue;
- write_lock(&ei->i_es_lock);
shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
if (ei->i_es_lru_nr == 0)
list_del_init(&ei->i_es_lru);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8695f70af1ef..aca7b24a4432 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -200,10 +200,6 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
- struct address_space *mapping = file->f_mapping;
-
- if (!mapping->a_ops->readpage)
- return -ENOEXEC;
file_accessed(file);
vma->vm_ops = &ext4_file_vm_ops;
return 0;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 0ee59a6644e2..5b87fc36aab8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -71,6 +71,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
struct ext4_group_desc *gdp)
{
struct ext4_group_info *grp;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
J_ASSERT_BH(bh, buffer_locked(bh));
/* If checksum is bad mark all blocks and inodes use to prevent
@@ -78,7 +79,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u", block_group);
grp = ext4_get_group_info(sb, block_group);
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+ if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+ int count;
+ count = ext4_free_inodes_count(sb, gdp);
+ percpu_counter_sub(&sbi->s_freeinodes_counter,
+ count);
+ }
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
return 0;
}
@@ -116,6 +126,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
struct buffer_head *bh = NULL;
ext4_fsblk_t bitmap_blk;
struct ext4_group_info *grp;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
@@ -185,6 +196,12 @@ verify:
ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
"inode_bitmap = %llu", block_group, bitmap_blk);
grp = ext4_get_group_info(sb, block_group);
+ if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+ int count;
+ count = ext4_free_inodes_count(sb, desc);
+ percpu_counter_sub(&sbi->s_freeinodes_counter,
+ count);
+ }
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
return NULL;
}
@@ -321,6 +338,12 @@ out:
fatal = err;
} else {
ext4_error(sb, "bit already cleared for inode %lu", ino);
+ if (gdp && !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+ int count;
+ count = ext4_free_inodes_count(sb, gdp);
+ percpu_counter_sub(&sbi->s_freeinodes_counter,
+ count);
+ }
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
}
@@ -851,6 +874,13 @@ got:
goto out;
}
+ BUFFER_TRACE(group_desc_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, group_desc_bh);
+ if (err) {
+ ext4_std_error(sb, err);
+ goto out;
+ }
+
/* We may have to initialize the block bitmap if it isn't already */
if (ext4_has_group_desc_csum(sb) &&
gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -887,13 +917,6 @@ got:
}
}
- BUFFER_TRACE(group_desc_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, group_desc_bh);
- if (err) {
- ext4_std_error(sb, err);
- goto out;
- }
-
/* Update the relevant bg descriptor fields */
if (ext4_has_group_desc_csum(sb)) {
int free;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 8a57e9fcd1b9..e75f840000a0 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -389,7 +389,13 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
return 0;
failed:
for (; i >= 0; i--) {
- if (i != indirect_blks && branch[i].bh)
+ /*
+ * We want to ext4_forget() only freshly allocated indirect
+ * blocks. Buffer for new_blocks[i-1] is at branch[i].bh and
+ * buffer at branch[0].bh is indirect block / inode already
+ * existing before ext4_alloc_branch() was called.
+ */
+ if (i > 0 && i != indirect_blks && branch[i].bh)
ext4_forget(handle, 1, inode, branch[i].bh,
branch[i].bh->b_blocknr);
ext4_free_blocks(handle, inode, NULL, new_blocks[i],
@@ -1289,89 +1295,220 @@ do_indirects:
}
}
-static int free_hole_blocks(handle_t *handle, struct inode *inode,
- struct buffer_head *parent_bh, __le32 *i_data,
- int level, ext4_lblk_t first,
- ext4_lblk_t count, int max)
+/**
+ * ext4_ind_remove_space - remove space from the range
+ * @handle: JBD handle for this transaction
+ * @inode: inode we are dealing with
+ * @start: First block to remove
+ * @end: One block after the last block to remove (exclusive)
+ *
+ * Free the blocks in the defined range (end is exclusive endpoint of
+ * range). This is used by ext4_punch_hole().
+ */
+int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
+ ext4_lblk_t start, ext4_lblk_t end)
{
- struct buffer_head *bh = NULL;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ __le32 *i_data = ei->i_data;
int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int ret = 0;
- int i, inc;
- ext4_lblk_t offset;
- __le32 blk;
-
- inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level);
- for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) {
- if (offset >= count + first)
- break;
- if (*i_data == 0 || (offset + inc) <= first)
- continue;
- blk = *i_data;
- if (level > 0) {
- ext4_lblk_t first2;
- bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
- if (!bh) {
- EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
- "Read failure");
- return -EIO;
+ ext4_lblk_t offsets[4], offsets2[4];
+ Indirect chain[4], chain2[4];
+ Indirect *partial, *partial2;
+ ext4_lblk_t max_block;
+ __le32 nr = 0, nr2 = 0;
+ int n = 0, n2 = 0;
+ unsigned blocksize = inode->i_sb->s_blocksize;
+
+ max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
+ >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
+ if (end >= max_block)
+ end = max_block;
+ if ((start >= end) || (start > max_block))
+ return 0;
+
+ n = ext4_block_to_path(inode, start, offsets, NULL);
+ n2 = ext4_block_to_path(inode, end, offsets2, NULL);
+
+ BUG_ON(n > n2);
+
+ if ((n == 1) && (n == n2)) {
+ /* We're punching only within direct block range */
+ ext4_free_data(handle, inode, NULL, i_data + offsets[0],
+ i_data + offsets2[0]);
+ return 0;
+ } else if (n2 > n) {
+ /*
+ * Start and end are on a different levels so we're going to
+ * free partial block at start, and partial block at end of
+ * the range. If there are some levels in between then
+ * do_indirects label will take care of that.
+ */
+
+ if (n == 1) {
+ /*
+ * Start is at the direct block level, free
+ * everything to the end of the level.
+ */
+ ext4_free_data(handle, inode, NULL, i_data + offsets[0],
+ i_data + EXT4_NDIR_BLOCKS);
+ goto end_range;
+ }
+
+
+ partial = ext4_find_shared(inode, n, offsets, chain, &nr);
+ if (nr) {
+ if (partial == chain) {
+ /* Shared branch grows from the inode */
+ ext4_free_branches(handle, inode, NULL,
+ &nr, &nr+1, (chain+n-1) - partial);
+ *partial->p = 0;
+ } else {
+ /* Shared branch grows from an indirect block */
+ BUFFER_TRACE(partial->bh, "get_write_access");
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p,
+ partial->p+1, (chain+n-1) - partial);
}
- first2 = (first > offset) ? first - offset : 0;
- ret = free_hole_blocks(handle, inode, bh,
- (__le32 *)bh->b_data, level - 1,
- first2, count - offset,
- inode->i_sb->s_blocksize >> 2);
- if (ret) {
- brelse(bh);
- goto err;
+ }
+
+ /*
+ * Clear the ends of indirect blocks on the shared branch
+ * at the start of the range
+ */
+ while (partial > chain) {
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p + 1,
+ (__le32 *)partial->bh->b_data+addr_per_block,
+ (chain+n-1) - partial);
+ BUFFER_TRACE(partial->bh, "call brelse");
+ brelse(partial->bh);
+ partial--;
+ }
+
+end_range:
+ partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
+ if (nr2) {
+ if (partial2 == chain2) {
+ /*
+ * Remember, end is exclusive so here we're at
+ * the start of the next level we're not going
+ * to free. Everything was covered by the start
+ * of the range.
+ */
+ return 0;
+ } else {
+ /* Shared branch grows from an indirect block */
+ partial2--;
}
+ } else {
+ /*
+ * ext4_find_shared returns Indirect structure which
+ * points to the last element which should not be
+ * removed by truncate. But this is end of the range
+ * in punch_hole so we need to point to the next element
+ */
+ partial2->p++;
}
- if (level == 0 ||
- (bh && all_zeroes((__le32 *)bh->b_data,
- (__le32 *)bh->b_data + addr_per_block))) {
- ext4_free_data(handle, inode, parent_bh, &blk, &blk+1);
- *i_data = 0;
+
+ /*
+ * Clear the ends of indirect blocks on the shared branch
+ * at the end of the range
+ */
+ while (partial2 > chain2) {
+ ext4_free_branches(handle, inode, partial2->bh,
+ (__le32 *)partial2->bh->b_data,
+ partial2->p,
+ (chain2+n2-1) - partial2);
+ BUFFER_TRACE(partial2->bh, "call brelse");
+ brelse(partial2->bh);
+ partial2--;
}
- brelse(bh);
- bh = NULL;
+ goto do_indirects;
}
-err:
- return ret;
-}
-
-int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
- ext4_lblk_t first, ext4_lblk_t stop)
-{
- int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int level, ret = 0;
- int num = EXT4_NDIR_BLOCKS;
- ext4_lblk_t count, max = EXT4_NDIR_BLOCKS;
- __le32 *i_data = EXT4_I(inode)->i_data;
-
- count = stop - first;
- for (level = 0; level < 4; level++, max *= addr_per_block) {
- if (first < max) {
- ret = free_hole_blocks(handle, inode, NULL, i_data,
- level, first, count, num);
- if (ret)
- goto err;
- if (count > max - first)
- count -= max - first;
- else
- break;
- first = 0;
- } else {
- first -= max;
+ /* Punch happened within the same level (n == n2) */
+ partial = ext4_find_shared(inode, n, offsets, chain, &nr);
+ partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
+ /*
+ * ext4_find_shared returns Indirect structure which
+ * points to the last element which should not be
+ * removed by truncate. But this is end of the range
+ * in punch_hole so we need to point to the next element
+ */
+ partial2->p++;
+ while ((partial > chain) || (partial2 > chain2)) {
+ /* We're at the same block, so we're almost finished */
+ if ((partial->bh && partial2->bh) &&
+ (partial->bh->b_blocknr == partial2->bh->b_blocknr)) {
+ if ((partial > chain) && (partial2 > chain2)) {
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p + 1,
+ partial2->p,
+ (chain+n-1) - partial);
+ BUFFER_TRACE(partial->bh, "call brelse");
+ brelse(partial->bh);
+ BUFFER_TRACE(partial2->bh, "call brelse");
+ brelse(partial2->bh);
+ }
+ return 0;
}
- i_data += num;
- if (level == 0) {
- num = 1;
- max = 1;
+ /*
+ * Clear the ends of indirect blocks on the shared branch
+ * at the start of the range
+ */
+ if (partial > chain) {
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p + 1,
+ (__le32 *)partial->bh->b_data+addr_per_block,
+ (chain+n-1) - partial);
+ BUFFER_TRACE(partial->bh, "call brelse");
+ brelse(partial->bh);
+ partial--;
+ }
+ /*
+ * Clear the ends of indirect blocks on the shared branch
+ * at the end of the range
+ */
+ if (partial2 > chain2) {
+ ext4_free_branches(handle, inode, partial2->bh,
+ (__le32 *)partial2->bh->b_data,
+ partial2->p,
+ (chain2+n-1) - partial2);
+ BUFFER_TRACE(partial2->bh, "call brelse");
+ brelse(partial2->bh);
+ partial2--;
}
}
-err:
- return ret;
+do_indirects:
+ /* Kill the remaining (whole) subtrees */
+ switch (offsets[0]) {
+ default:
+ if (++n >= n2)
+ return 0;
+ nr = i_data[EXT4_IND_BLOCK];
+ if (nr) {
+ ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
+ i_data[EXT4_IND_BLOCK] = 0;
+ }
+ case EXT4_IND_BLOCK:
+ if (++n >= n2)
+ return 0;
+ nr = i_data[EXT4_DIND_BLOCK];
+ if (nr) {
+ ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
+ i_data[EXT4_DIND_BLOCK] = 0;
+ }
+ case EXT4_DIND_BLOCK:
+ if (++n >= n2)
+ return 0;
+ nr = i_data[EXT4_TIND_BLOCK];
+ if (nr) {
+ ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
+ i_data[EXT4_TIND_BLOCK] = 0;
+ }
+ case EXT4_TIND_BLOCK:
+ ;
+ }
+ return 0;
}
-
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 645205d8ada6..bea662bd0ca6 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -120,12 +120,6 @@ int ext4_get_max_inline_size(struct inode *inode)
return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE;
}
-int ext4_has_inline_data(struct inode *inode)
-{
- return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
- EXT4_I(inode)->i_inline_off;
-}
-
/*
* this function does not take xattr_sem, which is OK because it is
* currently only used in a code path coming form ext4_iget, before
@@ -1178,6 +1172,18 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
if (error < 0)
goto out;
+ /*
+ * Make sure the inline directory entries pass checks before we try to
+ * convert them, so that we avoid touching stuff that needs fsck.
+ */
+ if (S_ISDIR(inode->i_mode)) {
+ error = ext4_check_all_de(inode, iloc->bh,
+ buf + EXT4_INLINE_DOTDOT_SIZE,
+ inline_size - EXT4_INLINE_DOTDOT_SIZE);
+ if (error)
+ goto out;
+ }
+
error = ext4_destroy_inline_data_nolock(handle, inode);
if (error)
goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8a064734e6eb..367a60c07cf0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -325,18 +325,6 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
#endif
/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate a block located at @lblock
- */
-static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
-{
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- return ext4_ext_calc_metadata_amount(inode, lblock);
-
- return ext4_ind_calc_metadata_amount(inode, lblock);
-}
-
-/*
* Called with i_data_sem down, which is important since we can call
* ext4_discard_preallocations() from here.
*/
@@ -357,35 +345,10 @@ void ext4_da_update_reserve_space(struct inode *inode,
used = ei->i_reserved_data_blocks;
}
- if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
- ext4_warning(inode->i_sb, "ino %lu, allocated %d "
- "with only %d reserved metadata blocks "
- "(releasing %d blocks with reserved %d data blocks)",
- inode->i_ino, ei->i_allocated_meta_blocks,
- ei->i_reserved_meta_blocks, used,
- ei->i_reserved_data_blocks);
- WARN_ON(1);
- ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
- }
-
/* Update per-inode reservations */
ei->i_reserved_data_blocks -= used;
- ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
- used + ei->i_allocated_meta_blocks);
- ei->i_allocated_meta_blocks = 0;
+ percpu_counter_sub(&sbi->s_dirtyclusters_counter, used);
- if (ei->i_reserved_data_blocks == 0) {
- /*
- * We can release all of the reserved metadata blocks
- * only when we have written all of the delayed
- * allocation blocks.
- */
- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
- ei->i_reserved_meta_blocks);
- ei->i_reserved_meta_blocks = 0;
- ei->i_da_metadata_calc_len = 0;
- }
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
/* Update quota subsystem for data blocks */
@@ -1222,49 +1185,6 @@ static int ext4_journalled_write_end(struct file *file,
}
/*
- * Reserve a metadata for a single block located at lblock
- */
-static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
-{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned int md_needed;
- ext4_lblk_t save_last_lblock;
- int save_len;
-
- /*
- * recalculate the amount of metadata blocks to reserve
- * in order to allocate nrblocks
- * worse case is one extent per block
- */
- spin_lock(&ei->i_block_reservation_lock);
- /*
- * ext4_calc_metadata_amount() has side effects, which we have
- * to be prepared undo if we fail to claim space.
- */
- save_len = ei->i_da_metadata_calc_len;
- save_last_lblock = ei->i_da_metadata_calc_last_lblock;
- md_needed = EXT4_NUM_B2C(sbi,
- ext4_calc_metadata_amount(inode, lblock));
- trace_ext4_da_reserve_space(inode, md_needed);
-
- /*
- * We do still charge estimated metadata to the sb though;
- * we cannot afford to run out of free blocks.
- */
- if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
- ei->i_da_metadata_calc_len = save_len;
- ei->i_da_metadata_calc_last_lblock = save_last_lblock;
- spin_unlock(&ei->i_block_reservation_lock);
- return -ENOSPC;
- }
- ei->i_reserved_meta_blocks += md_needed;
- spin_unlock(&ei->i_block_reservation_lock);
-
- return 0; /* success */
-}
-
-/*
* Reserve a single cluster located at lblock
*/
static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
@@ -1273,8 +1193,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
int ret;
- ext4_lblk_t save_last_lblock;
- int save_len;
/*
* We will charge metadata quota at writeout time; this saves
@@ -1295,25 +1213,15 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
* ext4_calc_metadata_amount() has side effects, which we have
* to be prepared undo if we fail to claim space.
*/
- save_len = ei->i_da_metadata_calc_len;
- save_last_lblock = ei->i_da_metadata_calc_last_lblock;
- md_needed = EXT4_NUM_B2C(sbi,
- ext4_calc_metadata_amount(inode, lblock));
- trace_ext4_da_reserve_space(inode, md_needed);
+ md_needed = 0;
+ trace_ext4_da_reserve_space(inode, 0);
- /*
- * We do still charge estimated metadata to the sb though;
- * we cannot afford to run out of free blocks.
- */
- if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
- ei->i_da_metadata_calc_len = save_len;
- ei->i_da_metadata_calc_last_lblock = save_last_lblock;
+ if (ext4_claim_free_clusters(sbi, 1, 0)) {
spin_unlock(&ei->i_block_reservation_lock);
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
return -ENOSPC;
}
ei->i_reserved_data_blocks++;
- ei->i_reserved_meta_blocks += md_needed;
spin_unlock(&ei->i_block_reservation_lock);
return 0; /* success */
@@ -1346,20 +1254,6 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
}
ei->i_reserved_data_blocks -= to_free;
- if (ei->i_reserved_data_blocks == 0) {
- /*
- * We can release all of the reserved metadata blocks
- * only when we have written all of the delayed
- * allocation blocks.
- * Note that in case of bigalloc, i_reserved_meta_blocks,
- * i_reserved_data_blocks, etc. refer to number of clusters.
- */
- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
- ei->i_reserved_meta_blocks);
- ei->i_reserved_meta_blocks = 0;
- ei->i_da_metadata_calc_len = 0;
- }
-
/* update fs dirty data blocks counter */
percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
@@ -1500,10 +1394,6 @@ static void ext4_print_free_blocks(struct inode *inode)
ext4_msg(sb, KERN_CRIT, "Block reservation details");
ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
ei->i_reserved_data_blocks);
- ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
- ei->i_reserved_meta_blocks);
- ext4_msg(sb, KERN_CRIT, "i_allocated_meta_blocks=%u",
- ei->i_allocated_meta_blocks);
return;
}
@@ -1620,13 +1510,6 @@ add_delayed:
retval = ret;
goto out_unlock;
}
- } else {
- ret = ext4_da_reserve_metadata(inode, iblock);
- if (ret) {
- /* not enough space to reserve */
- retval = ret;
- goto out_unlock;
- }
}
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -2843,8 +2726,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
{
trace_ext4_alloc_da_blocks(inode);
- if (!EXT4_I(inode)->i_reserved_data_blocks &&
- !EXT4_I(inode)->i_reserved_meta_blocks)
+ if (!EXT4_I(inode)->i_reserved_data_blocks)
return 0;
/*
@@ -3624,7 +3506,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
ret = ext4_ext_remove_space(inode, first_block,
stop_block - 1);
else
- ret = ext4_free_hole_blocks(handle, inode, first_block,
+ ret = ext4_ind_remove_space(handle, inode, first_block,
stop_block);
up_write(&EXT4_I(inode)->i_data_sem);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 59e31622cc6e..956027711faf 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -722,6 +722,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
void *buddy, void *bitmap, ext4_group_t group)
{
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
ext4_grpblk_t i = 0;
ext4_grpblk_t first;
@@ -751,14 +752,17 @@ void ext4_mb_generate_buddy(struct super_block *sb,
if (free != grp->bb_free) {
ext4_grp_locked_error(sb, group, 0, 0,
- "%u clusters in bitmap, %u in gd; "
- "block bitmap corrupt.",
+ "block bitmap and bg descriptor "
+ "inconsistent: %u vs %u free clusters",
free, grp->bb_free);
/*
* If we intend to continue, we consider group descriptor
* corrupt and update bb_free using bitmap value
*/
grp->bb_free = free;
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
}
mb_set_largest_free_order(sb, grp);
@@ -1431,6 +1435,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
if (unlikely(block != -1)) {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t blocknr;
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
@@ -1441,6 +1446,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
"freeing already freed block "
"(bit %u); block bitmap corrupt.",
block);
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ e4b->bd_info->bb_free);
/* Mark the block group as corrupt. */
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
&e4b->bd_info->bb_state);
@@ -3067,8 +3075,9 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
(23 - bsbits)) << 23;
size = 8 * 1024 * 1024;
} else {
- start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
- size = ac->ac_o_ex.fe_len << bsbits;
+ start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
+ size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
+ ac->ac_o_ex.fe_len) << bsbits;
}
size = size >> bsbits;
start = start_off >> bsbits;
@@ -3208,8 +3217,27 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
{
struct ext4_prealloc_space *pa = ac->ac_pa;
+ struct ext4_buddy e4b;
+ int err;
- if (pa && pa->pa_type == MB_INODE_PA)
+ if (pa == NULL) {
+ err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
+ if (err) {
+ /*
+ * This should never happen since we pin the
+ * pages in the ext4_allocation_context so
+ * ext4_mb_load_buddy() should never fail.
+ */
+ WARN(1, "mb_load_buddy failed (%d)", err);
+ return;
+ }
+ ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+ mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
+ ac->ac_f_ex.fe_len);
+ ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+ return;
+ }
+ if (pa->pa_type == MB_INODE_PA)
pa->pa_free += ac->ac_b_ex.fe_len;
}
@@ -4619,7 +4647,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *gd_bh;
ext4_group_t block_group;
struct ext4_sb_info *sbi;
- struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_buddy e4b;
unsigned int count_clusters;
int err = 0;
@@ -4830,19 +4857,7 @@ do_more:
&sbi->s_flex_groups[flex_group].free_clusters);
}
- if (flags & EXT4_FREE_BLOCKS_RESERVE && ei->i_reserved_data_blocks) {
- percpu_counter_add(&sbi->s_dirtyclusters_counter,
- count_clusters);
- spin_lock(&ei->i_block_reservation_lock);
- if (flags & EXT4_FREE_BLOCKS_METADATA)
- ei->i_reserved_meta_blocks += count_clusters;
- else
- ei->i_reserved_data_blocks += count_clusters;
- spin_unlock(&ei->i_block_reservation_lock);
- if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
- dquot_reclaim_block(inode,
- EXT4_C2B(sbi, count_clusters));
- } else if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
+ if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index ec092437d3e0..d3567f27bae7 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -39,6 +39,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
newext.ee_block = cpu_to_le32(lb->first_block);
newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
ext4_ext_store_pblock(&newext, lb->first_pblock);
+ /* Locking only for convinience since we are operating on temp inode */
+ down_write(&EXT4_I(inode)->i_data_sem);
path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
if (IS_ERR(path)) {
@@ -61,7 +63,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
*/
if (needed && ext4_handle_has_enough_credits(handle,
EXT4_RESERVE_TRANS_BLOCKS)) {
+ up_write((&EXT4_I(inode)->i_data_sem));
retval = ext4_journal_restart(handle, needed);
+ down_write((&EXT4_I(inode)->i_data_sem));
if (retval)
goto err_out;
} else if (needed) {
@@ -70,13 +74,16 @@ static int finish_range(handle_t *handle, struct inode *inode,
/*
* IF not able to extend the journal restart the journal
*/
+ up_write((&EXT4_I(inode)->i_data_sem));
retval = ext4_journal_restart(handle, needed);
+ down_write((&EXT4_I(inode)->i_data_sem));
if (retval)
goto err_out;
}
}
retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
err_out:
+ up_write((&EXT4_I(inode)->i_data_sem));
if (path) {
ext4_ext_drop_refs(path);
kfree(path);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 2484c7ec6a72..671a74b14fd7 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1013,10 +1013,11 @@ data_copy:
*err = -EBUSY;
goto unlock_pages;
}
-
+ ext4_double_down_write_data_sem(orig_inode, donor_inode);
replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
orig_blk_offset,
block_len_in_page, err);
+ ext4_double_up_write_data_sem(orig_inode, donor_inode);
if (*err) {
if (replaced_count) {
block_len_in_page = replaced_count;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b9b9aabfb4d2..32b43ad154b9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1525,8 +1525,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
sbi->s_commit_interval = HZ * arg;
} else if (token == Opt_max_batch_time) {
- if (arg == 0)
- arg = EXT4_DEF_MAX_BATCH_TIME;
sbi->s_max_batch_time = arg;
} else if (token == Opt_min_batch_time) {
sbi->s_min_batch_time = arg;
@@ -2144,10 +2142,6 @@ static int ext4_check_descriptors(struct super_block *sb,
}
if (NULL != first_not_zeroed)
*first_not_zeroed = grp;
-
- ext4_free_blocks_count_set(sbi->s_es,
- EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
- sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
return 1;
}
@@ -2809,10 +2803,11 @@ static void print_daily_error_info(unsigned long arg)
es = sbi->s_es;
if (es->s_error_count)
- ext4_msg(sb, KERN_NOTICE, "error count: %u",
+ /* fsck newer than v1.41.13 is needed to clean this condition. */
+ ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
le32_to_cpu(es->s_error_count));
if (es->s_first_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
+ printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
sb->s_id, le32_to_cpu(es->s_first_error_time),
(int) sizeof(es->s_first_error_func),
es->s_first_error_func,
@@ -2826,7 +2821,7 @@ static void print_daily_error_info(unsigned long arg)
printk("\n");
}
if (es->s_last_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
+ printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
sb->s_id, le32_to_cpu(es->s_last_error_time),
(int) sizeof(es->s_last_error_func),
es->s_last_error_func,
@@ -3880,38 +3875,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount2;
}
}
-
- /*
- * set up enough so that it can read an inode,
- * and create new inode for buddy allocator
- */
- sbi->s_gdb_count = db_count;
- if (!test_opt(sb, NOLOAD) &&
- EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
- sb->s_op = &ext4_sops;
- else
- sb->s_op = &ext4_nojournal_sops;
-
- ext4_ext_init(sb);
- err = ext4_mb_init(sb);
- if (err) {
- ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
- err);
- goto failed_mount2;
- }
-
if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
- goto failed_mount2a;
+ goto failed_mount2;
}
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
- if (!ext4_fill_flex_info(sb)) {
- ext4_msg(sb, KERN_ERR,
- "unable to initialize "
- "flex_bg meta info!");
- goto failed_mount2a;
- }
+ sbi->s_gdb_count = db_count;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
spin_lock_init(&sbi->s_next_gen_lock);
@@ -3922,23 +3891,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Register extent status tree shrinker */
ext4_es_register_shrinker(sbi);
- err = percpu_counter_init(&sbi->s_freeclusters_counter,
- ext4_count_free_clusters(sb));
- if (!err) {
- err = percpu_counter_init(&sbi->s_freeinodes_counter,
- ext4_count_free_inodes(sb));
- }
- if (!err) {
- err = percpu_counter_init(&sbi->s_dirs_counter,
- ext4_count_dirs(sb));
- }
- if (!err) {
- err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
- }
- if (!err) {
- err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0);
- }
- if (err) {
+ if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) {
ext4_msg(sb, KERN_ERR, "insufficient memory");
goto failed_mount3;
}
@@ -3946,6 +3899,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_stripe = ext4_get_stripe_size(sbi);
sbi->s_extent_max_zeroout_kb = 32;
+ /*
+ * set up enough so that it can read an inode
+ */
+ if (!test_opt(sb, NOLOAD) &&
+ EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
+ sb->s_op = &ext4_sops;
+ else
+ sb->s_op = &ext4_nojournal_sops;
sb->s_export_op = &ext4_export_ops;
sb->s_xattr = ext4_xattr_handlers;
#ifdef CONFIG_QUOTA
@@ -4034,18 +3995,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
- /*
- * The journal may have updated the bg summary counts, so we
- * need to update the global counters.
- */
- percpu_counter_set(&sbi->s_freeclusters_counter,
- ext4_count_free_clusters(sb));
- percpu_counter_set(&sbi->s_freeinodes_counter,
- ext4_count_free_inodes(sb));
- percpu_counter_set(&sbi->s_dirs_counter,
- ext4_count_dirs(sb));
- percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
-
no_journal:
if (ext4_mballoc_ready) {
sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
@@ -4135,16 +4084,51 @@ no_journal:
if (err) {
ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
"reserved pool", ext4_calculate_resv_clusters(sb));
- goto failed_mount5;
+ goto failed_mount4a;
}
err = ext4_setup_system_zone(sb);
if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize system "
"zone (%d)", err);
+ goto failed_mount4a;
+ }
+
+ ext4_ext_init(sb);
+ err = ext4_mb_init(sb);
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
+ err);
goto failed_mount5;
}
+ block = ext4_count_free_clusters(sb);
+ ext4_free_blocks_count_set(sbi->s_es,
+ EXT4_C2B(sbi, block));
+ err = percpu_counter_init(&sbi->s_freeclusters_counter, block);
+ if (!err) {
+ unsigned long freei = ext4_count_free_inodes(sb);
+ sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
+ err = percpu_counter_init(&sbi->s_freeinodes_counter, freei);
+ }
+ if (!err)
+ err = percpu_counter_init(&sbi->s_dirs_counter,
+ ext4_count_dirs(sb));
+ if (!err)
+ err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "insufficient memory");
+ goto failed_mount6;
+ }
+
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+ if (!ext4_fill_flex_info(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "unable to initialize "
+ "flex_bg meta info!");
+ goto failed_mount6;
+ }
+
err = ext4_register_li_request(sb, first_not_zeroed);
if (err)
goto failed_mount6;
@@ -4218,8 +4202,17 @@ failed_mount8:
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
- ext4_release_system_zone(sb);
+ ext4_mb_release(sb);
+ if (sbi->s_flex_groups)
+ ext4_kvfree(sbi->s_flex_groups);
+ percpu_counter_destroy(&sbi->s_freeclusters_counter);
+ percpu_counter_destroy(&sbi->s_freeinodes_counter);
+ percpu_counter_destroy(&sbi->s_dirs_counter);
+ percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
failed_mount5:
+ ext4_ext_release(sb);
+ ext4_release_system_zone(sb);
+failed_mount4a:
dput(sb->s_root);
sb->s_root = NULL;
failed_mount4:
@@ -4234,23 +4227,14 @@ failed_mount_wq:
failed_mount3:
ext4_es_unregister_shrinker(sbi);
del_timer_sync(&sbi->s_err_report);
- if (sbi->s_flex_groups)
- ext4_kvfree(sbi->s_flex_groups);
- percpu_counter_destroy(&sbi->s_freeclusters_counter);
- percpu_counter_destroy(&sbi->s_freeinodes_counter);
- percpu_counter_destroy(&sbi->s_dirs_counter);
- percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
percpu_counter_destroy(&sbi->s_extent_cache_cnt);
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
-failed_mount2a:
- ext4_mb_release(sb);
failed_mount2:
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
ext4_kvfree(sbi->s_group_desc);
failed_mount:
- ext4_ext_release(sb);
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
if (sbi->s_proc) {
@@ -4560,11 +4544,13 @@ static int ext4_commit_super(struct super_block *sb, int sync)
else
es->s_kbytes_written =
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
- ext4_free_blocks_count_set(es,
+ if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
+ ext4_free_blocks_count_set(es,
EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeclusters_counter)));
- es->s_free_inodes_count =
- cpu_to_le32(percpu_counter_sum_positive(
+ if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
+ es->s_free_inodes_count =
+ cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
BUFFER_TRACE(sbh, "marking dirty");
ext4_superblock_csum_set(sb);
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index dbe2141d10ad..83b9b5a8d112 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -203,12 +203,6 @@ static int __f2fs_set_acl(struct inode *inode, int type,
size_t size = 0;
int error;
- if (acl) {
- error = posix_acl_valid(acl);
- if (error < 0)
- return error;
- }
-
switch (type) {
case ACL_TYPE_ACCESS:
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0b4710c1d370..6aeed5bada52 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -22,7 +22,7 @@
#include "segment.h"
#include <trace/events/f2fs.h>
-static struct kmem_cache *orphan_entry_slab;
+static struct kmem_cache *ino_entry_slab;
static struct kmem_cache *inode_entry_slab;
/*
@@ -282,72 +282,120 @@ const struct address_space_operations f2fs_meta_aops = {
.set_page_dirty = f2fs_set_meta_page_dirty,
};
+static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ struct ino_entry *e;
+retry:
+ spin_lock(&sbi->ino_lock[type]);
+
+ e = radix_tree_lookup(&sbi->ino_root[type], ino);
+ if (!e) {
+ e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
+ if (!e) {
+ spin_unlock(&sbi->ino_lock[type]);
+ goto retry;
+ }
+ if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
+ spin_unlock(&sbi->ino_lock[type]);
+ kmem_cache_free(ino_entry_slab, e);
+ goto retry;
+ }
+ memset(e, 0, sizeof(struct ino_entry));
+ e->ino = ino;
+
+ list_add_tail(&e->list, &sbi->ino_list[type]);
+ }
+ spin_unlock(&sbi->ino_lock[type]);
+}
+
+static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ struct ino_entry *e;
+
+ spin_lock(&sbi->ino_lock[type]);
+ e = radix_tree_lookup(&sbi->ino_root[type], ino);
+ if (e) {
+ list_del(&e->list);
+ radix_tree_delete(&sbi->ino_root[type], ino);
+ if (type == ORPHAN_INO)
+ sbi->n_orphans--;
+ spin_unlock(&sbi->ino_lock[type]);
+ kmem_cache_free(ino_entry_slab, e);
+ return;
+ }
+ spin_unlock(&sbi->ino_lock[type]);
+}
+
+void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ /* add new dirty ino entry into list */
+ __add_ino_entry(sbi, ino, type);
+}
+
+void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ /* remove dirty ino entry from list */
+ __remove_ino_entry(sbi, ino, type);
+}
+
+/* mode should be APPEND_INO or UPDATE_INO */
+bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
+{
+ struct ino_entry *e;
+ spin_lock(&sbi->ino_lock[mode]);
+ e = radix_tree_lookup(&sbi->ino_root[mode], ino);
+ spin_unlock(&sbi->ino_lock[mode]);
+ return e ? true : false;
+}
+
+static void release_dirty_inode(struct f2fs_sb_info *sbi)
+{
+ struct ino_entry *e, *tmp;
+ int i;
+
+ for (i = APPEND_INO; i <= UPDATE_INO; i++) {
+ spin_lock(&sbi->ino_lock[i]);
+ list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
+ list_del(&e->list);
+ radix_tree_delete(&sbi->ino_root[i], e->ino);
+ kmem_cache_free(ino_entry_slab, e);
+ }
+ spin_unlock(&sbi->ino_lock[i]);
+ }
+}
+
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
int err = 0;
- spin_lock(&sbi->orphan_inode_lock);
+ spin_lock(&sbi->ino_lock[ORPHAN_INO]);
if (unlikely(sbi->n_orphans >= sbi->max_orphans))
err = -ENOSPC;
else
sbi->n_orphans++;
- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
return err;
}
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
- spin_lock(&sbi->orphan_inode_lock);
+ spin_lock(&sbi->ino_lock[ORPHAN_INO]);
f2fs_bug_on(sbi->n_orphans == 0);
sbi->n_orphans--;
- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
}
void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *head;
- struct orphan_inode_entry *new, *orphan;
-
- new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
- new->ino = ino;
-
- spin_lock(&sbi->orphan_inode_lock);
- head = &sbi->orphan_inode_list;
- list_for_each_entry(orphan, head, list) {
- if (orphan->ino == ino) {
- spin_unlock(&sbi->orphan_inode_lock);
- kmem_cache_free(orphan_entry_slab, new);
- return;
- }
-
- if (orphan->ino > ino)
- break;
- }
-
- /* add new orphan entry into list which is sorted by inode number */
- list_add_tail(&new->list, &orphan->list);
- spin_unlock(&sbi->orphan_inode_lock);
+ /* add new orphan ino entry into list */
+ __add_ino_entry(sbi, ino, ORPHAN_INO);
}
void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *head;
- struct orphan_inode_entry *orphan;
-
- spin_lock(&sbi->orphan_inode_lock);
- head = &sbi->orphan_inode_list;
- list_for_each_entry(orphan, head, list) {
- if (orphan->ino == ino) {
- list_del(&orphan->list);
- f2fs_bug_on(sbi->n_orphans == 0);
- sbi->n_orphans--;
- spin_unlock(&sbi->orphan_inode_lock);
- kmem_cache_free(orphan_entry_slab, orphan);
- return;
- }
- }
- spin_unlock(&sbi->orphan_inode_lock);
+ /* remove orphan entry from orphan list */
+ __remove_ino_entry(sbi, ino, ORPHAN_INO);
}
static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -401,14 +449,14 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
struct page *page = NULL;
- struct orphan_inode_entry *orphan = NULL;
+ struct ino_entry *orphan = NULL;
for (index = 0; index < orphan_blocks; index++)
grab_meta_page(sbi, start_blk + index);
index = 1;
- spin_lock(&sbi->orphan_inode_lock);
- head = &sbi->orphan_inode_list;
+ spin_lock(&sbi->ino_lock[ORPHAN_INO]);
+ head = &sbi->ino_list[ORPHAN_INO];
/* loop for each orphan inode entry and write them in Jornal block */
list_for_each_entry(orphan, head, list) {
@@ -448,7 +496,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
f2fs_put_page(page, 1);
}
- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
}
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -714,10 +762,10 @@ retry_flush_dents:
* until finishing nat/sit flush.
*/
retry_flush_nodes:
- mutex_lock(&sbi->node_write);
+ down_write(&sbi->node_write);
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
- mutex_unlock(&sbi->node_write);
+ up_write(&sbi->node_write);
sync_node_pages(sbi, 0, &wbc);
goto retry_flush_nodes;
}
@@ -726,7 +774,7 @@ retry_flush_nodes:
static void unblock_operations(struct f2fs_sb_info *sbi)
{
- mutex_unlock(&sbi->node_write);
+ up_write(&sbi->node_write);
f2fs_unlock_all(sbi);
}
@@ -748,6 +796,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
nid_t last_nid = 0;
block_t start_blk;
struct page *cp_page;
@@ -761,7 +810,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
* This avoids to conduct wrong roll-forward operations and uses
* metapages, so should be called prior to sync_meta_pages below.
*/
- discard_next_dnode(sbi);
+ discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META))
@@ -885,8 +934,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
- if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
+ if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
clear_prefree_segments(sbi);
+ release_dirty_inode(sbi);
F2FS_RESET_SB_DIRT(sbi);
}
}
@@ -932,31 +982,37 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
}
-void init_orphan_info(struct f2fs_sb_info *sbi)
+void init_ino_entry_info(struct f2fs_sb_info *sbi)
{
- spin_lock_init(&sbi->orphan_inode_lock);
- INIT_LIST_HEAD(&sbi->orphan_inode_list);
- sbi->n_orphans = 0;
+ int i;
+
+ for (i = 0; i < MAX_INO_ENTRY; i++) {
+ INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
+ spin_lock_init(&sbi->ino_lock[i]);
+ INIT_LIST_HEAD(&sbi->ino_list[i]);
+ }
+
/*
* considering 512 blocks in a segment 8 blocks are needed for cp
* and log segment summaries. Remaining blocks are used to keep
* orphan entries with the limitation one reserved segment
* for cp pack we can have max 1020*504 orphan entries
*/
+ sbi->n_orphans = 0;
sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
* F2FS_ORPHANS_PER_BLOCK;
}
int __init create_checkpoint_caches(void)
{
- orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
- sizeof(struct orphan_inode_entry));
- if (!orphan_entry_slab)
+ ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
+ sizeof(struct ino_entry));
+ if (!ino_entry_slab)
return -ENOMEM;
inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
sizeof(struct dir_inode_entry));
if (!inode_entry_slab) {
- kmem_cache_destroy(orphan_entry_slab);
+ kmem_cache_destroy(ino_entry_slab);
return -ENOMEM;
}
return 0;
@@ -964,6 +1020,6 @@ int __init create_checkpoint_caches(void)
void destroy_checkpoint_caches(void)
{
- kmem_cache_destroy(orphan_entry_slab);
+ kmem_cache_destroy(ino_entry_slab);
kmem_cache_destroy(inode_entry_slab);
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0924521306b4..03313099c51c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -139,7 +139,10 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
- io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
+ if (test_opt(sbi, NOBARRIER))
+ io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
+ else
+ io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
}
__submit_merged_bio(io);
up_write(&io->io_rwsem);
@@ -608,8 +611,8 @@ static int __allocate_data_block(struct dnode_of_data *dn)
* b. do not use extent cache for better performance
* c. give the block addresses to blockdev
*/
-static int get_data_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+static int __get_data_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create, bool fiemap)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
unsigned int blkbits = inode->i_sb->s_blocksize_bits;
@@ -626,8 +629,10 @@ static int get_data_block(struct inode *inode, sector_t iblock,
if (check_extent_cache(inode, pgofs, bh_result))
goto out;
- if (create)
+ if (create) {
+ f2fs_balance_fs(sbi);
f2fs_lock_op(sbi);
+ }
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -637,7 +642,7 @@ static int get_data_block(struct inode *inode, sector_t iblock,
err = 0;
goto unlock_out;
}
- if (dn.data_blkaddr == NEW_ADDR)
+ if (dn.data_blkaddr == NEW_ADDR && !fiemap)
goto put_out;
if (dn.data_blkaddr != NULL_ADDR) {
@@ -671,7 +676,7 @@ get_next:
err = 0;
goto unlock_out;
}
- if (dn.data_blkaddr == NEW_ADDR)
+ if (dn.data_blkaddr == NEW_ADDR && !fiemap)
goto put_out;
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
@@ -708,10 +713,23 @@ out:
return err;
}
+static int get_data_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create, false);
+}
+
+static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create, true);
+}
+
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
- return generic_block_fiemap(inode, fieinfo, start, len, get_data_block);
+ return generic_block_fiemap(inode, fieinfo,
+ start, len, get_data_block_fiemap);
}
static int f2fs_read_data_page(struct file *file, struct page *page)
@@ -771,9 +789,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
!is_cold_data(page) &&
need_inplace_update(inode))) {
rewrite_data_page(page, old_blkaddr, fio);
+ set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
} else {
write_data_page(page, &dn, &new_blkaddr, fio);
update_extent_cache(new_blkaddr, &dn);
+ set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
}
out_writepage:
f2fs_put_dnode(&dn);
@@ -901,6 +921,16 @@ skip_write:
return 0;
}
+static void f2fs_write_failed(struct address_space *mapping, loff_t to)
+{
+ struct inode *inode = mapping->host;
+
+ if (to > inode->i_size) {
+ truncate_pagecache(inode, inode->i_size);
+ truncate_blocks(inode, inode->i_size);
+ }
+}
+
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -918,11 +948,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
repeat:
err = f2fs_convert_inline_data(inode, pos + len);
if (err)
- return err;
+ goto fail;
page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page)
- return -ENOMEM;
+ if (!page) {
+ err = -ENOMEM;
+ goto fail;
+ }
/* to avoid latency during memory pressure */
unlock_page(page);
@@ -936,10 +968,9 @@ repeat:
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = f2fs_reserve_block(&dn, index);
f2fs_unlock_op(sbi);
-
if (err) {
f2fs_put_page(page, 0);
- return err;
+ goto fail;
}
inline_data:
lock_page(page);
@@ -969,19 +1000,20 @@ inline_data:
err = f2fs_read_inline_data(inode, page);
if (err) {
page_cache_release(page);
- return err;
+ goto fail;
}
} else {
err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
READ_SYNC);
if (err)
- return err;
+ goto fail;
}
lock_page(page);
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
- return -EIO;
+ err = -EIO;
+ goto fail;
}
if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
@@ -992,6 +1024,9 @@ out:
SetPageUptodate(page);
clear_cold_data(page);
return 0;
+fail:
+ f2fs_write_failed(mapping, pos + len);
+ return err;
}
static int f2fs_write_end(struct file *file,
@@ -1003,7 +1038,6 @@ static int f2fs_write_end(struct file *file,
trace_f2fs_write_end(inode, pos, len, copied);
- SetPageUptodate(page);
set_page_dirty(page);
if (pos + copied > i_size_read(inode)) {
@@ -1037,7 +1071,10 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
struct iov_iter *iter, loff_t offset)
{
struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ size_t count = iov_iter_count(iter);
+ int err;
/* Let buffer I/O handle the inline data case. */
if (f2fs_has_inline_data(inode))
@@ -1049,8 +1086,15 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
/* clear fsync mark to recover these blocks */
fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
- return blockdev_direct_IO(rw, iocb, inode, iter, offset,
- get_data_block);
+ trace_f2fs_direct_IO_enter(inode, offset, count, rw);
+
+ err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
+ if (err < 0 && (rw & WRITE))
+ f2fs_write_failed(mapping, offset + count);
+
+ trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
+
+ return err;
}
static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index b52c12cf5873..a441ba33be11 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -167,7 +167,7 @@ get_cache:
si->cache_mem += npages << PAGE_CACHE_SHIFT;
npages = META_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
- si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
+ si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
}
@@ -345,21 +345,14 @@ void __init f2fs_create_root_stats(void)
f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
if (!f2fs_debugfs_root)
- goto bail;
+ return;
file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
NULL, &stat_fops);
- if (!file)
- goto free_debugfs_dir;
-
- return;
-
-free_debugfs_dir:
- debugfs_remove(f2fs_debugfs_root);
-
-bail:
- f2fs_debugfs_root = NULL;
- return;
+ if (!file) {
+ debugfs_remove(f2fs_debugfs_root);
+ f2fs_debugfs_root = NULL;
+ }
}
void f2fs_destroy_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 966acb039e3b..bcf893c3d903 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -77,8 +77,8 @@ static unsigned long dir_block_index(unsigned int level,
return bidx;
}
-static bool early_match_name(const char *name, size_t namelen,
- f2fs_hash_t namehash, struct f2fs_dir_entry *de)
+static bool early_match_name(size_t namelen, f2fs_hash_t namehash,
+ struct f2fs_dir_entry *de)
{
if (le16_to_cpu(de->name_len) != namelen)
return false;
@@ -90,7 +90,7 @@ static bool early_match_name(const char *name, size_t namelen,
}
static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
- const char *name, size_t namelen, int *max_slots,
+ struct qstr *name, int *max_slots,
f2fs_hash_t namehash, struct page **res_page)
{
struct f2fs_dir_entry *de;
@@ -109,9 +109,10 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
continue;
}
de = &dentry_blk->dentry[bit_pos];
- if (early_match_name(name, namelen, namehash, de)) {
+ if (early_match_name(name->len, namehash, de)) {
if (!memcmp(dentry_blk->filename[bit_pos],
- name, namelen)) {
+ name->name,
+ name->len)) {
*res_page = dentry_page;
goto found;
}
@@ -120,6 +121,13 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
*max_slots = max_len;
max_len = 0;
}
+
+ /*
+ * For the most part, it should be a bug when name_len is zero.
+ * We stop here for figuring out where the bugs are occurred.
+ */
+ f2fs_bug_on(!de->name_len);
+
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
}
@@ -132,10 +140,10 @@ found:
}
static struct f2fs_dir_entry *find_in_level(struct inode *dir,
- unsigned int level, const char *name, size_t namelen,
+ unsigned int level, struct qstr *name,
f2fs_hash_t namehash, struct page **res_page)
{
- int s = GET_DENTRY_SLOTS(namelen);
+ int s = GET_DENTRY_SLOTS(name->len);
unsigned int nbucket, nblock;
unsigned int bidx, end_block;
struct page *dentry_page;
@@ -160,8 +168,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
continue;
}
- de = find_in_block(dentry_page, name, namelen,
- &max_slots, namehash, res_page);
+ de = find_in_block(dentry_page, name, &max_slots,
+ namehash, res_page);
if (de)
break;
@@ -187,8 +195,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
struct qstr *child, struct page **res_page)
{
- const char *name = child->name;
- size_t namelen = child->len;
unsigned long npages = dir_blocks(dir);
struct f2fs_dir_entry *de = NULL;
f2fs_hash_t name_hash;
@@ -200,12 +206,11 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
*res_page = NULL;
- name_hash = f2fs_dentry_hash(name, namelen);
+ name_hash = f2fs_dentry_hash(child);
max_depth = F2FS_I(dir)->i_current_depth;
for (level = 0; level < max_depth; level++) {
- de = find_in_level(dir, level, name,
- namelen, name_hash, res_page);
+ de = find_in_level(dir, level, child, name_hash, res_page);
if (de)
break;
}
@@ -298,14 +303,13 @@ static int make_empty_dir(struct inode *inode,
struct page *dentry_page;
struct f2fs_dentry_block *dentry_blk;
struct f2fs_dir_entry *de;
- void *kaddr;
dentry_page = get_new_data_page(inode, page, 0, true);
if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);
- kaddr = kmap_atomic(dentry_page);
- dentry_blk = (struct f2fs_dentry_block *)kaddr;
+
+ dentry_blk = kmap_atomic(dentry_page);
de = &dentry_blk->dentry[0];
de->name_len = cpu_to_le16(1);
@@ -323,7 +327,7 @@ static int make_empty_dir(struct inode *inode,
test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
- kunmap_atomic(kaddr);
+ kunmap_atomic(dentry_blk);
set_page_dirty(dentry_page);
f2fs_put_page(dentry_page, 1);
@@ -333,11 +337,12 @@ static int make_empty_dir(struct inode *inode,
static struct page *init_inode_metadata(struct inode *inode,
struct inode *dir, const struct qstr *name)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct page *page;
int err;
if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
- page = new_inode_page(inode, name);
+ page = new_inode_page(inode);
if (IS_ERR(page))
return page;
@@ -362,7 +367,8 @@ static struct page *init_inode_metadata(struct inode *inode,
set_cold_node(inode, page);
}
- init_dent_inode(name, page);
+ if (name)
+ init_dent_inode(name, page);
/*
* This file should be checkpointed during fsync.
@@ -370,17 +376,23 @@ static struct page *init_inode_metadata(struct inode *inode,
*/
if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
file_lost_pino(inode);
+ /*
+ * If link the tmpfile to alias through linkat path,
+ * we should remove this inode from orphan list.
+ */
+ if (inode->i_nlink == 0)
+ remove_orphan_inode(sbi, inode->i_ino);
inc_nlink(inode);
}
return page;
put_error:
f2fs_put_page(page, 1);
+error:
/* once the failed inode becomes a bad inode, i_mode is S_IFREG */
truncate_inode_pages(&inode->i_data, 0);
truncate_blocks(inode, 0);
remove_dirty_dir_inode(inode);
-error:
remove_inode_page(inode);
return ERR_PTR(err);
}
@@ -453,7 +465,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
int err = 0;
int i;
- dentry_hash = f2fs_dentry_hash(name->name, name->len);
+ dentry_hash = f2fs_dentry_hash(name);
level = 0;
current_depth = F2FS_I(dir)->i_current_depth;
if (F2FS_I(dir)->chash == dentry_hash) {
@@ -529,6 +541,27 @@ fail:
return err;
}
+int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
+{
+ struct page *page;
+ int err = 0;
+
+ down_write(&F2FS_I(inode)->i_sem);
+ page = init_inode_metadata(inode, dir, NULL);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto fail;
+ }
+ /* we don't need to mark_inode_dirty now */
+ update_inode(inode, page);
+ f2fs_put_page(page, 1);
+
+ clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
+fail:
+ up_write(&F2FS_I(inode)->i_sem);
+ return err;
+}
+
/*
* It only removes the dentry from the dentry page,corresponding name
* entry in name page does not need to be touched during deletion.
@@ -541,14 +574,13 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct address_space *mapping = page->mapping;
struct inode *dir = mapping->host;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
- void *kaddr = page_address(page);
int i;
lock_page(page);
f2fs_wait_on_page_writeback(page, DATA);
- dentry_blk = (struct f2fs_dentry_block *)kaddr;
- bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
+ dentry_blk = page_address(page);
+ bit_pos = dentry - dentry_blk->dentry;
for (i = 0; i < slots; i++)
test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
@@ -603,7 +635,6 @@ bool f2fs_empty_dir(struct inode *dir)
unsigned long nblock = dir_blocks(dir);
for (bidx = 0; bidx < nblock; bidx++) {
- void *kaddr;
dentry_page = get_lock_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
if (PTR_ERR(dentry_page) == -ENOENT)
@@ -612,8 +643,8 @@ bool f2fs_empty_dir(struct inode *dir)
return false;
}
- kaddr = kmap_atomic(dentry_page);
- dentry_blk = (struct f2fs_dentry_block *)kaddr;
+
+ dentry_blk = kmap_atomic(dentry_page);
if (bidx == 0)
bit_pos = 2;
else
@@ -621,7 +652,7 @@ bool f2fs_empty_dir(struct inode *dir)
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK,
bit_pos);
- kunmap_atomic(kaddr);
+ kunmap_atomic(dentry_blk);
f2fs_put_page(dentry_page, 1);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e51c732b0dd9..4dab5338a97a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -41,6 +41,7 @@
#define F2FS_MOUNT_INLINE_XATTR 0x00000080
#define F2FS_MOUNT_INLINE_DATA 0x00000100
#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
+#define F2FS_MOUNT_NOBARRIER 0x00000400
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -99,8 +100,15 @@ enum {
META_SSA
};
-/* for the list of orphan inodes */
-struct orphan_inode_entry {
+/* for the list of ino */
+enum {
+ ORPHAN_INO, /* for orphan ino list */
+ APPEND_INO, /* for append ino list */
+ UPDATE_INO, /* for update ino list */
+ MAX_INO_ENTRY, /* max. list */
+};
+
+struct ino_entry {
struct list_head list; /* list head */
nid_t ino; /* inode number */
};
@@ -256,6 +264,8 @@ struct f2fs_nm_info {
unsigned int nat_cnt; /* the # of cached nat entries */
struct list_head nat_entries; /* cached nat entry list (clean) */
struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
+ struct list_head nat_entry_set; /* nat entry set list */
+ unsigned int dirty_nat_cnt; /* total num of nat entries in set */
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -342,9 +352,6 @@ struct f2fs_sm_info {
struct dirty_seglist_info *dirty_info; /* dirty segment information */
struct curseg_info *curseg_array; /* active segment information */
- struct list_head wblist_head; /* list of under-writeback pages */
- spinlock_t wblist_lock; /* lock for checkpoint */
-
block_t seg0_blkaddr; /* block address of 0'th segment */
block_t main_blkaddr; /* start block address of main area */
block_t ssa_blkaddr; /* start block address of SSA area */
@@ -445,14 +452,17 @@ struct f2fs_sb_info {
struct inode *meta_inode; /* cache meta blocks */
struct mutex cp_mutex; /* checkpoint procedure lock */
struct rw_semaphore cp_rwsem; /* blocking FS operations */
- struct mutex node_write; /* locking node writes */
+ struct rw_semaphore node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */
bool por_doing; /* recovery is doing or not */
wait_queue_head_t cp_wait;
- /* for orphan inode management */
- struct list_head orphan_inode_list; /* orphan inode list */
- spinlock_t orphan_inode_lock; /* for orphan inode list */
+ /* for inode management */
+ struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */
+ spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
+ struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */
+
+ /* for orphan inode, use 0'th array */
unsigned int n_orphans; /* # of orphan inodes */
unsigned int max_orphans; /* max orphan inodes */
@@ -644,7 +654,8 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
*/
static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
{
- WARN_ON((nid >= NM_I(sbi)->max_nid));
+ if (unlikely(nid < F2FS_ROOT_INO(sbi)))
+ return -EINVAL;
if (unlikely(nid >= NM_I(sbi)->max_nid))
return -EINVAL;
return 0;
@@ -770,7 +781,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
if (flag == NAT_BITMAP)
return &ckpt->sit_nat_version_bitmap;
else
- return ((unsigned char *)ckpt + F2FS_BLKSIZE);
+ return (unsigned char *)ckpt + F2FS_BLKSIZE;
} else {
offset = (flag == NAT_BITMAP) ?
le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
@@ -985,11 +996,15 @@ enum {
FI_NO_EXTENT, /* not to use the extent cache */
FI_INLINE_XATTR, /* used for inline xattr */
FI_INLINE_DATA, /* used for inline data*/
+ FI_APPEND_WRITE, /* inode has appended data */
+ FI_UPDATE_WRITE, /* inode has in-place-update data */
+ FI_NEED_IPU, /* used fo ipu for fdatasync */
};
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
{
- set_bit(flag, &fi->flags);
+ if (!test_bit(flag, &fi->flags))
+ set_bit(flag, &fi->flags);
}
static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
@@ -999,7 +1014,8 @@ static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)
{
- clear_bit(flag, &fi->flags);
+ if (test_bit(flag, &fi->flags))
+ clear_bit(flag, &fi->flags);
}
static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
@@ -1138,6 +1154,7 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
int update_dent_inode(struct inode *, const struct qstr *);
int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
+int f2fs_do_tmpfile(struct inode *, struct inode *);
int f2fs_make_empty(struct inode *, struct inode *);
bool f2fs_empty_dir(struct inode *);
@@ -1157,7 +1174,7 @@ void f2fs_msg(struct super_block *, const char *, const char *, ...);
/*
* hash.c
*/
-f2fs_hash_t f2fs_dentry_hash(const char *, size_t);
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
/*
* node.c
@@ -1175,7 +1192,7 @@ int truncate_inode_blocks(struct inode *, pgoff_t);
int truncate_xattr_node(struct inode *, struct page *);
int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
void remove_inode_page(struct inode *);
-struct page *new_inode_page(struct inode *, const struct qstr *);
+struct page *new_inode_page(struct inode *);
struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
void ra_node_page(struct f2fs_sb_info *, nid_t);
struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
@@ -1187,6 +1204,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
void recover_node_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, struct node_info *, block_t);
+void recover_inline_xattr(struct inode *, struct page *);
bool recover_xattr_data(struct inode *, struct page *, block_t);
int recover_inode_page(struct f2fs_sb_info *, struct page *);
int restore_node_summary(struct f2fs_sb_info *, unsigned int,
@@ -1208,7 +1226,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *);
-void discard_next_dnode(struct f2fs_sb_info *);
+void discard_next_dnode(struct f2fs_sb_info *, block_t);
int npages_for_summary_flush(struct f2fs_sb_info *);
void allocate_new_segments(struct f2fs_sb_info *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
@@ -1242,6 +1260,9 @@ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
+void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
+void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
+bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
void add_orphan_inode(struct f2fs_sb_info *, nid_t);
@@ -1253,7 +1274,7 @@ void add_dirty_dir_inode(struct inode *);
void remove_dirty_dir_inode(struct inode *);
void sync_dirty_dir_inodes(struct f2fs_sb_info *);
void write_checkpoint(struct f2fs_sb_info *, bool);
-void init_orphan_info(struct f2fs_sb_info *);
+void init_ino_entry_info(struct f2fs_sb_info *);
int __init create_checkpoint_caches(void);
void destroy_checkpoint_caches(void);
@@ -1297,7 +1318,6 @@ bool space_for_roll_forward(struct f2fs_sb_info *);
struct f2fs_stat_info {
struct list_head stat_list;
struct f2fs_sb_info *sbi;
- struct mutex stat_lock;
int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
int main_area_segs, main_area_sections, main_area_zones;
int hit_ext, total_ext;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c58e33075719..208f1a9bd569 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -127,12 +127,30 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return 0;
trace_f2fs_sync_file_enter(inode);
+
+ /* if fdatasync is triggered, let's do in-place-update */
+ if (datasync)
+ set_inode_flag(fi, FI_NEED_IPU);
+
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (datasync)
+ clear_inode_flag(fi, FI_NEED_IPU);
if (ret) {
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
return ret;
}
+ /*
+ * if there is no written data, don't waste time to write recovery info.
+ */
+ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
+ !exist_written_data(sbi, inode->i_ino, APPEND_INO)) {
+ if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
+ exist_written_data(sbi, inode->i_ino, UPDATE_INO))
+ goto flush_out;
+ goto out;
+ }
+
/* guarantee free sections for fsync */
f2fs_balance_fs(sbi);
@@ -188,6 +206,13 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
if (ret)
goto out;
+
+ /* once recovery info is written, don't need to tack this */
+ remove_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+ clear_inode_flag(fi, FI_APPEND_WRITE);
+flush_out:
+ remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
+ clear_inode_flag(fi, FI_UPDATE_WRITE);
ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
}
out:
@@ -206,8 +231,9 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
/* find first dirty page index */
pagevec_init(&pvec, 0);
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1);
- pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX;
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
+ PAGECACHE_TAG_DIRTY, 1);
+ pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX;
pagevec_release(&pvec);
return pgofs;
}
@@ -272,8 +298,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
}
}
- end_offset = IS_INODE(dn.node_page) ?
- ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+ end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
/* find data/hole in dnode block */
for (; dn.ofs_in_node < end_offset;
@@ -380,13 +405,15 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
return;
lock_page(page);
- if (unlikely(page->mapping != inode->i_mapping)) {
- f2fs_put_page(page, 1);
- return;
- }
+ if (unlikely(!PageUptodate(page) ||
+ page->mapping != inode->i_mapping))
+ goto out;
+
f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, offset, PAGE_CACHE_SIZE - offset);
set_page_dirty(page);
+
+out:
f2fs_put_page(page, 1);
}
@@ -645,6 +672,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
loff_t off_start, off_end;
int ret = 0;
+ f2fs_balance_fs(sbi);
+
ret = inode_newsize_ok(inode, (len + offset));
if (ret)
return ret;
@@ -659,16 +688,19 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
off_start = offset & (PAGE_CACHE_SIZE - 1);
off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+ f2fs_lock_op(sbi);
+
for (index = pg_start; index <= pg_end; index++) {
struct dnode_of_data dn;
- f2fs_lock_op(sbi);
+ if (index == pg_end && !off_end)
+ goto noalloc;
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = f2fs_reserve_block(&dn, index);
- f2fs_unlock_op(sbi);
if (ret)
break;
-
+noalloc:
if (pg_start == pg_end)
new_size = offset + len;
else if (index == pg_start && off_start)
@@ -683,8 +715,9 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
i_size_read(inode) < new_size) {
i_size_write(inode, new_size);
mark_inode_dirty(inode);
- f2fs_write_inode(inode, NULL);
+ update_inode_page(inode);
}
+ f2fs_unlock_op(sbi);
return ret;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b90dbe55403a..d7947d90ccc3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -186,7 +186,6 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned int hint = 0;
unsigned int secno;
/*
@@ -194,11 +193,9 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
* selected by background GC before.
* Those segments guarantee they have small valid blocks.
*/
-next:
- secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++);
- if (secno < TOTAL_SECS(sbi)) {
+ for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) {
if (sec_usage_check(sbi, secno))
- goto next;
+ continue;
clear_bit(secno, dirty_i->victim_secmap);
return secno * sbi->segs_per_sec;
}
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 6eb8d269b53b..948d17bf7281 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -69,12 +69,14 @@ static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)
*buf++ = pad;
}
-f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len)
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
{
__u32 hash;
f2fs_hash_t f2fs_hash;
const char *p;
__u32 in[8], buf[4];
+ const char *name = name_info->name;
+ size_t len = name_info->len;
if ((len <= 2) && (name[0] == '.') &&
(name[1] == '.' || name[1] == '\0'))
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 1bba5228c197..5beeccef9ae1 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -172,6 +172,7 @@ int f2fs_write_inline_data(struct inode *inode,
stat_inc_inline_inode(inode);
}
+ set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
sync_inode_page(&dn);
f2fs_put_dnode(&dn);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index adc622c6bdce..2c39999f3868 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -78,6 +78,7 @@ static int do_read_inode(struct inode *inode)
if (check_nid_range(sbi, inode->i_ino)) {
f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu",
(unsigned long) inode->i_ino);
+ WARN_ON(1);
return -EINVAL;
}
@@ -266,13 +267,14 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
void f2fs_evict_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ nid_t xnid = F2FS_I(inode)->i_xattr_nid;
trace_f2fs_evict_inode(inode);
truncate_inode_pages_final(&inode->i_data);
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
inode->i_ino == F2FS_META_INO(sbi))
- goto no_delete;
+ goto out_clear;
f2fs_bug_on(get_dirty_dents(inode));
remove_dirty_dir_inode(inode);
@@ -294,6 +296,13 @@ void f2fs_evict_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb);
no_delete:
- clear_inode(inode);
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
+ if (xnid)
+ invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
+ if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE))
+ add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+ if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE))
+ add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
+out_clear:
+ clear_inode(inode);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 9138c32aa698..27b03776ffd2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -13,6 +13,7 @@
#include <linux/pagemap.h>
#include <linux/sched.h>
#include <linux/ctype.h>
+#include <linux/dcache.h>
#include "f2fs.h"
#include "node.h"
@@ -22,14 +23,13 @@
static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
nid_t ino;
struct inode *inode;
bool nid_free = false;
int err;
- inode = new_inode(sb);
+ inode = new_inode(dir->i_sb);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -102,8 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode;
nid_t ino = 0;
int err;
@@ -146,8 +145,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
struct inode *inode = old_dentry->d_inode;
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
int err;
f2fs_balance_fs(sbi);
@@ -207,8 +205,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode = dentry->d_inode;
struct f2fs_dir_entry *de;
struct page *page;
@@ -242,8 +239,7 @@ fail:
static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
const char *symname)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode;
size_t symlen = strlen(symname) + 1;
int err;
@@ -330,8 +326,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t rdev)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode;
int err = 0;
@@ -369,8 +364,7 @@ out:
static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
- struct super_block *sb = old_dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb);
struct inode *old_inode = old_dentry->d_inode;
struct inode *new_inode = new_dentry->d_inode;
struct page *old_dir_page;
@@ -393,8 +387,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_old;
}
- f2fs_lock_op(sbi);
-
if (new_inode) {
err = -ENOTEMPTY;
@@ -407,6 +399,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!new_entry)
goto out_dir;
+ f2fs_lock_op(sbi);
+
err = acquire_orphan_inode(sbi);
if (err)
goto put_out_dir;
@@ -417,9 +411,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
- down_write(&F2FS_I(old_inode)->i_sem);
- F2FS_I(old_inode)->i_pino = new_dir->i_ino;
- up_write(&F2FS_I(old_inode)->i_sem);
new_inode->i_ctime = CURRENT_TIME;
down_write(&F2FS_I(new_inode)->i_sem);
@@ -438,9 +429,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
update_inode_page(old_inode);
update_inode_page(new_inode);
} else {
+ f2fs_lock_op(sbi);
+
err = f2fs_add_link(new_dentry, old_inode);
- if (err)
+ if (err) {
+ f2fs_unlock_op(sbi);
goto out_dir;
+ }
if (old_dir_entry) {
inc_nlink(new_dir);
@@ -448,6 +443,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
}
+ down_write(&F2FS_I(old_inode)->i_sem);
+ file_lost_pino(old_inode);
+ up_write(&F2FS_I(old_inode)->i_sem);
+
old_inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(old_inode);
@@ -457,9 +456,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (old_dir != new_dir) {
f2fs_set_link(old_inode, old_dir_entry,
old_dir_page, new_dir);
- down_write(&F2FS_I(old_inode)->i_sem);
- F2FS_I(old_inode)->i_pino = new_dir->i_ino;
- up_write(&F2FS_I(old_inode)->i_sem);
update_inode_page(old_inode);
} else {
kunmap(old_dir_page);
@@ -474,13 +470,159 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
return 0;
put_out_dir:
- f2fs_put_page(new_page, 1);
+ f2fs_unlock_op(sbi);
+ kunmap(new_page);
+ f2fs_put_page(new_page, 0);
out_dir:
if (old_dir_entry) {
kunmap(old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
+out_old:
+ kunmap(old_page);
+ f2fs_put_page(old_page, 0);
+out:
+ return err;
+}
+
+static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ struct super_block *sb = old_dir->i_sb;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct inode *old_inode = old_dentry->d_inode;
+ struct inode *new_inode = new_dentry->d_inode;
+ struct page *old_dir_page, *new_dir_page;
+ struct page *old_page, *new_page;
+ struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL;
+ struct f2fs_dir_entry *old_entry, *new_entry;
+ int old_nlink = 0, new_nlink = 0;
+ int err = -ENOENT;
+
+ f2fs_balance_fs(sbi);
+
+ old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
+ if (!old_entry)
+ goto out;
+
+ new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page);
+ if (!new_entry)
+ goto out_old;
+
+ /* prepare for updating ".." directory entry info later */
+ if (old_dir != new_dir) {
+ if (S_ISDIR(old_inode->i_mode)) {
+ err = -EIO;
+ old_dir_entry = f2fs_parent_dir(old_inode,
+ &old_dir_page);
+ if (!old_dir_entry)
+ goto out_new;
+ }
+
+ if (S_ISDIR(new_inode->i_mode)) {
+ err = -EIO;
+ new_dir_entry = f2fs_parent_dir(new_inode,
+ &new_dir_page);
+ if (!new_dir_entry)
+ goto out_old_dir;
+ }
+ }
+
+ /*
+ * If cross rename between file and directory those are not
+ * in the same directory, we will inc nlink of file's parent
+ * later, so we should check upper boundary of its nlink.
+ */
+ if ((!old_dir_entry || !new_dir_entry) &&
+ old_dir_entry != new_dir_entry) {
+ old_nlink = old_dir_entry ? -1 : 1;
+ new_nlink = -old_nlink;
+ err = -EMLINK;
+ if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) ||
+ (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX))
+ goto out_new_dir;
+ }
+
+ f2fs_lock_op(sbi);
+
+ err = update_dent_inode(old_inode, &new_dentry->d_name);
+ if (err)
+ goto out_unlock;
+
+ err = update_dent_inode(new_inode, &old_dentry->d_name);
+ if (err)
+ goto out_undo;
+
+ /* update ".." directory entry info of old dentry */
+ if (old_dir_entry)
+ f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir);
+
+ /* update ".." directory entry info of new dentry */
+ if (new_dir_entry)
+ f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir);
+
+ /* update directory entry info of old dir inode */
+ f2fs_set_link(old_dir, old_entry, old_page, new_inode);
+
+ down_write(&F2FS_I(old_inode)->i_sem);
+ file_lost_pino(old_inode);
+ up_write(&F2FS_I(old_inode)->i_sem);
+
+ update_inode_page(old_inode);
+
+ old_dir->i_ctime = CURRENT_TIME;
+ if (old_nlink) {
+ down_write(&F2FS_I(old_dir)->i_sem);
+ if (old_nlink < 0)
+ drop_nlink(old_dir);
+ else
+ inc_nlink(old_dir);
+ up_write(&F2FS_I(old_dir)->i_sem);
+ }
+ mark_inode_dirty(old_dir);
+ update_inode_page(old_dir);
+
+ /* update directory entry info of new dir inode */
+ f2fs_set_link(new_dir, new_entry, new_page, old_inode);
+
+ down_write(&F2FS_I(new_inode)->i_sem);
+ file_lost_pino(new_inode);
+ up_write(&F2FS_I(new_inode)->i_sem);
+
+ update_inode_page(new_inode);
+
+ new_dir->i_ctime = CURRENT_TIME;
+ if (new_nlink) {
+ down_write(&F2FS_I(new_dir)->i_sem);
+ if (new_nlink < 0)
+ drop_nlink(new_dir);
+ else
+ inc_nlink(new_dir);
+ up_write(&F2FS_I(new_dir)->i_sem);
+ }
+ mark_inode_dirty(new_dir);
+ update_inode_page(new_dir);
+
f2fs_unlock_op(sbi);
+ return 0;
+out_undo:
+ /* Still we may fail to recover name info of f2fs_inode here */
+ update_dent_inode(old_inode, &old_dentry->d_name);
+out_unlock:
+ f2fs_unlock_op(sbi);
+out_new_dir:
+ if (new_dir_entry) {
+ kunmap(new_dir_page);
+ f2fs_put_page(new_dir_page, 0);
+ }
+out_old_dir:
+ if (old_dir_entry) {
+ kunmap(old_dir_page);
+ f2fs_put_page(old_dir_page, 0);
+ }
+out_new:
+ kunmap(new_page);
+ f2fs_put_page(new_page, 0);
out_old:
kunmap(old_page);
f2fs_put_page(old_page, 0);
@@ -488,6 +630,71 @@ out:
return err;
}
+static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+ return -EINVAL;
+
+ if (flags & RENAME_EXCHANGE) {
+ return f2fs_cross_rename(old_dir, old_dentry,
+ new_dir, new_dentry);
+ }
+ /*
+ * VFS has already handled the new dentry existence case,
+ * here, we just deal with "RENAME_NOREPLACE" as regular rename.
+ */
+ return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+
+static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+ struct inode *inode;
+ int err;
+
+ inode = f2fs_new_inode(dir, mode);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ inode->i_op = &f2fs_file_inode_operations;
+ inode->i_fop = &f2fs_file_operations;
+ inode->i_mapping->a_ops = &f2fs_dblock_aops;
+
+ f2fs_lock_op(sbi);
+ err = acquire_orphan_inode(sbi);
+ if (err)
+ goto out;
+
+ err = f2fs_do_tmpfile(inode, dir);
+ if (err)
+ goto release_out;
+
+ /*
+ * add this non-linked tmpfile to orphan list, in this way we could
+ * remove all unused data of tmpfile after abnormal power-off.
+ */
+ add_orphan_inode(sbi, inode->i_ino);
+ f2fs_unlock_op(sbi);
+
+ alloc_nid_done(sbi, inode->i_ino);
+ d_tmpfile(dentry, inode);
+ unlock_new_inode(inode);
+ return 0;
+
+release_out:
+ release_orphan_inode(sbi);
+out:
+ f2fs_unlock_op(sbi);
+ clear_nlink(inode);
+ unlock_new_inode(inode);
+ make_bad_inode(inode);
+ iput(inode);
+ alloc_nid_failed(sbi, inode->i_ino);
+ return err;
+}
+
const struct inode_operations f2fs_dir_inode_operations = {
.create = f2fs_create,
.lookup = f2fs_lookup,
@@ -498,6 +705,8 @@ const struct inode_operations f2fs_dir_inode_operations = {
.rmdir = f2fs_rmdir,
.mknod = f2fs_mknod,
.rename = f2fs_rename,
+ .rename2 = f2fs_rename2,
+ .tmpfile = f2fs_tmpfile,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 9dfb9a042fd2..d3d90d284631 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -25,6 +25,7 @@
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
+static struct kmem_cache *nat_entry_set_slab;
bool available_free_memory(struct f2fs_sb_info *sbi, int type)
{
@@ -42,6 +43,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
} else if (type == DIRTY_DENTS) {
+ if (sbi->sb->s_bdi->dirty_exceeded)
+ return false;
mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
}
@@ -88,12 +91,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
/* get current nat block page with lock */
src_page = get_meta_page(sbi, src_off);
-
- /* Dirty src_page means that it is already the new target NAT page. */
- if (PageDirty(src_page))
- return src_page;
-
dst_page = grab_meta_page(sbi, dst_off);
+ f2fs_bug_on(PageDirty(src_page));
src_addr = page_address(src_page);
dst_addr = page_address(dst_page);
@@ -843,7 +842,7 @@ void remove_inode_page(struct inode *inode)
truncate_node(&dn);
}
-struct page *new_inode_page(struct inode *inode, const struct qstr *name)
+struct page *new_inode_page(struct inode *inode)
{
struct dnode_of_data dn;
@@ -1232,12 +1231,12 @@ static int f2fs_write_node_page(struct page *page,
if (wbc->for_reclaim)
goto redirty_out;
- mutex_lock(&sbi->node_write);
+ down_read(&sbi->node_write);
set_page_writeback(page);
write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
- mutex_unlock(&sbi->node_write);
+ up_read(&sbi->node_write);
unlock_page(page);
return 0;
@@ -1550,7 +1549,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
clear_node_page_dirty(page);
}
-static void recover_inline_xattr(struct inode *inode, struct page *page)
+void recover_inline_xattr(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
void *src_addr, *dst_addr;
@@ -1589,8 +1588,6 @@ bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
nid_t new_xnid = nid_of_node(page);
struct node_info ni;
- recover_inline_xattr(inode, page);
-
if (!f2fs_has_xattr_block(ofs_of_node(page)))
return false;
@@ -1742,7 +1739,90 @@ skip:
return err;
}
-static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
+static struct nat_entry_set *grab_nat_entry_set(void)
+{
+ struct nat_entry_set *nes =
+ f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
+
+ nes->entry_cnt = 0;
+ INIT_LIST_HEAD(&nes->set_list);
+ INIT_LIST_HEAD(&nes->entry_list);
+ return nes;
+}
+
+static void release_nat_entry_set(struct nat_entry_set *nes,
+ struct f2fs_nm_info *nm_i)
+{
+ f2fs_bug_on(!list_empty(&nes->entry_list));
+
+ nm_i->dirty_nat_cnt -= nes->entry_cnt;
+ list_del(&nes->set_list);
+ kmem_cache_free(nat_entry_set_slab, nes);
+}
+
+static void adjust_nat_entry_set(struct nat_entry_set *nes,
+ struct list_head *head)
+{
+ struct nat_entry_set *next = nes;
+
+ if (list_is_last(&nes->set_list, head))
+ return;
+
+ list_for_each_entry_continue(next, head, set_list)
+ if (nes->entry_cnt <= next->entry_cnt)
+ break;
+
+ list_move_tail(&nes->set_list, &next->set_list);
+}
+
+static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
+{
+ struct nat_entry_set *nes;
+ nid_t start_nid = START_NID(ne->ni.nid);
+
+ list_for_each_entry(nes, head, set_list) {
+ if (nes->start_nid == start_nid) {
+ list_move_tail(&ne->list, &nes->entry_list);
+ nes->entry_cnt++;
+ adjust_nat_entry_set(nes, head);
+ return;
+ }
+ }
+
+ nes = grab_nat_entry_set();
+
+ nes->start_nid = start_nid;
+ list_move_tail(&ne->list, &nes->entry_list);
+ nes->entry_cnt++;
+ list_add(&nes->set_list, head);
+}
+
+static void merge_nats_in_set(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct list_head *dirty_list = &nm_i->dirty_nat_entries;
+ struct list_head *set_list = &nm_i->nat_entry_set;
+ struct nat_entry *ne, *tmp;
+
+ write_lock(&nm_i->nat_tree_lock);
+ list_for_each_entry_safe(ne, tmp, dirty_list, list) {
+ if (nat_get_blkaddr(ne) == NEW_ADDR)
+ continue;
+ add_nat_entry(ne, set_list);
+ nm_i->dirty_nat_cnt++;
+ }
+ write_unlock(&nm_i->nat_tree_lock);
+}
+
+static bool __has_cursum_space(struct f2fs_summary_block *sum, int size)
+{
+ if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES)
+ return true;
+ else
+ return false;
+}
+
+static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1750,12 +1830,6 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
int i;
mutex_lock(&curseg->curseg_mutex);
-
- if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
- mutex_unlock(&curseg->curseg_mutex);
- return false;
- }
-
for (i = 0; i < nats_in_cursum(sum); i++) {
struct nat_entry *ne;
struct f2fs_nat_entry raw_ne;
@@ -1765,23 +1839,21 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
retry:
write_lock(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
- if (ne) {
- __set_nat_cache_dirty(nm_i, ne);
- write_unlock(&nm_i->nat_tree_lock);
- continue;
- }
+ if (ne)
+ goto found;
+
ne = grab_nat_entry(nm_i, nid);
if (!ne) {
write_unlock(&nm_i->nat_tree_lock);
goto retry;
}
node_info_from_raw_nat(&ne->ni, &raw_ne);
+found:
__set_nat_cache_dirty(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
}
update_nats_in_cursum(sum, -i);
mutex_unlock(&curseg->curseg_mutex);
- return true;
}
/*
@@ -1792,80 +1864,91 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
- struct nat_entry *ne, *cur;
- struct page *page = NULL;
- struct f2fs_nat_block *nat_blk = NULL;
- nid_t start_nid = 0, end_nid = 0;
- bool flushed;
+ struct nat_entry_set *nes, *tmp;
+ struct list_head *head = &nm_i->nat_entry_set;
+ bool to_journal = true;
- flushed = flush_nats_in_journal(sbi);
-
- if (!flushed)
- mutex_lock(&curseg->curseg_mutex);
-
- /* 1) flush dirty nat caches */
- list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
- nid_t nid;
- struct f2fs_nat_entry raw_ne;
- int offset = -1;
-
- if (nat_get_blkaddr(ne) == NEW_ADDR)
- continue;
+ /* merge nat entries of dirty list to nat entry set temporarily */
+ merge_nats_in_set(sbi);
- nid = nat_get_nid(ne);
+ /*
+ * if there are no enough space in journal to store dirty nat
+ * entries, remove all entries from journal and merge them
+ * into nat entry set.
+ */
+ if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) {
+ remove_nats_in_journal(sbi);
- if (flushed)
- goto to_nat_page;
+ /*
+ * merge nat entries of dirty list to nat entry set temporarily
+ */
+ merge_nats_in_set(sbi);
+ }
- /* if there is room for nat enries in curseg->sumpage */
- offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
- if (offset >= 0) {
- raw_ne = nat_in_journal(sum, offset);
- goto flush_now;
- }
-to_nat_page:
- if (!page || (start_nid > nid || nid > end_nid)) {
- if (page) {
- f2fs_put_page(page, 1);
- page = NULL;
- }
- start_nid = START_NID(nid);
- end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
+ if (!nm_i->dirty_nat_cnt)
+ return;
- /*
- * get nat block with dirty flag, increased reference
- * count, mapped and lock
- */
+ /*
+ * there are two steps to flush nat entries:
+ * #1, flush nat entries to journal in current hot data summary block.
+ * #2, flush nat entries to nat page.
+ */
+ list_for_each_entry_safe(nes, tmp, head, set_list) {
+ struct f2fs_nat_block *nat_blk;
+ struct nat_entry *ne, *cur;
+ struct page *page;
+ nid_t start_nid = nes->start_nid;
+
+ if (to_journal && !__has_cursum_space(sum, nes->entry_cnt))
+ to_journal = false;
+
+ if (to_journal) {
+ mutex_lock(&curseg->curseg_mutex);
+ } else {
page = get_next_nat_page(sbi, start_nid);
nat_blk = page_address(page);
+ f2fs_bug_on(!nat_blk);
}
- f2fs_bug_on(!nat_blk);
- raw_ne = nat_blk->entries[nid - start_nid];
-flush_now:
- raw_nat_from_node_info(&raw_ne, &ne->ni);
-
- if (offset < 0) {
- nat_blk->entries[nid - start_nid] = raw_ne;
- } else {
- nat_in_journal(sum, offset) = raw_ne;
- nid_in_journal(sum, offset) = cpu_to_le32(nid);
- }
+ /* flush dirty nats in nat entry set */
+ list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
+ struct f2fs_nat_entry *raw_ne;
+ nid_t nid = nat_get_nid(ne);
+ int offset;
+
+ if (to_journal) {
+ offset = lookup_journal_in_cursum(sum,
+ NAT_JOURNAL, nid, 1);
+ f2fs_bug_on(offset < 0);
+ raw_ne = &nat_in_journal(sum, offset);
+ nid_in_journal(sum, offset) = cpu_to_le32(nid);
+ } else {
+ raw_ne = &nat_blk->entries[nid - start_nid];
+ }
+ raw_nat_from_node_info(raw_ne, &ne->ni);
- if (nat_get_blkaddr(ne) == NULL_ADDR &&
+ if (nat_get_blkaddr(ne) == NULL_ADDR &&
add_free_nid(sbi, nid, false) <= 0) {
- write_lock(&nm_i->nat_tree_lock);
- __del_from_nat_cache(nm_i, ne);
- write_unlock(&nm_i->nat_tree_lock);
- } else {
- write_lock(&nm_i->nat_tree_lock);
- __clear_nat_cache_dirty(nm_i, ne);
- write_unlock(&nm_i->nat_tree_lock);
+ write_lock(&nm_i->nat_tree_lock);
+ __del_from_nat_cache(nm_i, ne);
+ write_unlock(&nm_i->nat_tree_lock);
+ } else {
+ write_lock(&nm_i->nat_tree_lock);
+ __clear_nat_cache_dirty(nm_i, ne);
+ write_unlock(&nm_i->nat_tree_lock);
+ }
}
+
+ if (to_journal)
+ mutex_unlock(&curseg->curseg_mutex);
+ else
+ f2fs_put_page(page, 1);
+
+ release_nat_entry_set(nes, nm_i);
}
- if (!flushed)
- mutex_unlock(&curseg->curseg_mutex);
- f2fs_put_page(page, 1);
+
+ f2fs_bug_on(!list_empty(head));
+ f2fs_bug_on(nm_i->dirty_nat_cnt);
}
static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1894,6 +1977,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->nat_entries);
INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
+ INIT_LIST_HEAD(&nm_i->nat_entry_set);
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->free_nid_list_lock);
@@ -1974,19 +2058,30 @@ int __init create_node_manager_caches(void)
nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
sizeof(struct nat_entry));
if (!nat_entry_slab)
- return -ENOMEM;
+ goto fail;
free_nid_slab = f2fs_kmem_cache_create("free_nid",
sizeof(struct free_nid));
- if (!free_nid_slab) {
- kmem_cache_destroy(nat_entry_slab);
- return -ENOMEM;
- }
+ if (!free_nid_slab)
+ goto destory_nat_entry;
+
+ nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
+ sizeof(struct nat_entry_set));
+ if (!nat_entry_set_slab)
+ goto destory_free_nid;
return 0;
+
+destory_free_nid:
+ kmem_cache_destroy(free_nid_slab);
+destory_nat_entry:
+ kmem_cache_destroy(nat_entry_slab);
+fail:
+ return -ENOMEM;
}
void destroy_node_manager_caches(void)
{
+ kmem_cache_destroy(nat_entry_set_slab);
kmem_cache_destroy(free_nid_slab);
kmem_cache_destroy(nat_entry_slab);
}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 7281112cd1c8..8a116a407599 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -89,6 +89,13 @@ enum mem_type {
DIRTY_DENTS /* indicates dirty dentry pages */
};
+struct nat_entry_set {
+ struct list_head set_list; /* link with all nat sets */
+ struct list_head entry_list; /* link with dirty nat entries */
+ nid_t start_nid; /* start nid of nats in set */
+ unsigned int entry_cnt; /* the # of nat entries in set */
+};
+
/*
* For free nid mangement
*/
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index a112368a4a86..fe1c6d921ba2 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -300,6 +300,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
struct node_info ni;
int err = 0, recovered = 0;
+ recover_inline_xattr(inode, page);
+
if (recover_inline_data(inode, page))
goto out;
@@ -434,7 +436,9 @@ next:
int recover_fsync_data(struct f2fs_sb_info *sbi)
{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct list_head inode_list;
+ block_t blkaddr;
int err;
bool need_writecp = false;
@@ -447,6 +451,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
/* step #1: find fsynced inode numbers */
sbi->por_doing = true;
+
+ blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
err = find_fsync_dnodes(sbi, &inode_list);
if (err)
goto out;
@@ -462,8 +469,21 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
out:
destroy_fsync_dnodes(&inode_list);
kmem_cache_destroy(fsync_entry_slab);
+
+ if (err) {
+ truncate_inode_pages_final(NODE_MAPPING(sbi));
+ truncate_inode_pages_final(META_MAPPING(sbi));
+ }
+
sbi->por_doing = false;
- if (!err && need_writecp)
+ if (err) {
+ discard_next_dnode(sbi, blkaddr);
+
+ /* Flush all the NAT/SIT pages */
+ while (get_pages(sbi, F2FS_DIRTY_META))
+ sync_meta_pages(sbi, META, LONG_MAX);
+ } else if (need_writecp) {
write_checkpoint(sbi, false);
+ }
return err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f25f0e07e26f..0dfeebae2a50 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -239,6 +239,12 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
struct flush_cmd cmd;
+ trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
+ test_opt(sbi, FLUSH_MERGE));
+
+ if (test_opt(sbi, NOBARRIER))
+ return 0;
+
if (!test_opt(sbi, FLUSH_MERGE))
return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
@@ -272,27 +278,27 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
return -ENOMEM;
spin_lock_init(&fcc->issue_lock);
init_waitqueue_head(&fcc->flush_wait_queue);
+ SM_I(sbi)->cmd_control_info = fcc;
fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
kfree(fcc);
+ SM_I(sbi)->cmd_control_info = NULL;
return err;
}
- sbi->sm_info->cmd_control_info = fcc;
return err;
}
void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
{
- struct flush_cmd_control *fcc =
- sbi->sm_info->cmd_control_info;
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
if (fcc && fcc->f2fs_issue_flush)
kthread_stop(fcc->f2fs_issue_flush);
kfree(fcc);
- sbi->sm_info->cmd_control_info = NULL;
+ SM_I(sbi)->cmd_control_info = NULL;
}
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
@@ -376,11 +382,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
}
-void discard_next_dnode(struct f2fs_sb_info *sbi)
+void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
{
- struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
- block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
-
if (f2fs_issue_discard(sbi, blkaddr, 1)) {
struct page *page = grab_meta_page(sbi, blkaddr);
/* zero-filled page */
@@ -436,17 +439,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi,
static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned int segno = -1;
+ unsigned int segno;
unsigned int total_segs = TOTAL_SEGS(sbi);
mutex_lock(&dirty_i->seglist_lock);
- while (1) {
- segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
- segno + 1);
- if (segno >= total_segs)
- break;
+ for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs)
__set_test_and_free(sbi, segno);
- }
mutex_unlock(&dirty_i->seglist_lock);
}
@@ -973,14 +971,12 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
- unsigned int old_cursegno;
curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
- old_cursegno = curseg->segno;
/*
* __add_sum_entry should be resided under the curseg_mutex
@@ -1001,7 +997,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
* since SSR needs latest valid block information.
*/
refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
- locate_dirty_segment(sbi, old_cursegno);
mutex_unlock(&sit_i->sentry_lock);
@@ -1531,7 +1526,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
struct page *page = NULL;
struct f2fs_sit_block *raw_sit = NULL;
unsigned int start = 0, end = 0;
- unsigned int segno = -1;
+ unsigned int segno;
bool flushed;
mutex_lock(&curseg->curseg_mutex);
@@ -1543,7 +1538,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
*/
flushed = flush_sits_in_journal(sbi);
- while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) {
+ for_each_set_bit(segno, bitmap, nsegs) {
struct seg_entry *se = get_seg_entry(sbi, segno);
int sit_offset, offset;
@@ -1702,7 +1697,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
struct curseg_info *array;
int i;
- array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
+ array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
if (!array)
return -ENOMEM;
@@ -1885,8 +1880,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
/* init sm info */
sbi->sm_info = sm_info;
- INIT_LIST_HEAD(&sm_info->wblist_head);
- spin_lock_init(&sm_info->wblist_lock);
sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7091204680f4..55973f7b0330 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -347,8 +347,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
- next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi),
- start_segno);
+ next = find_next_bit(free_i->free_segmap,
+ start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
if (test_and_clear_bit(secno, free_i->free_secmap))
free_i->free_sections++;
@@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
if (S_ISDIR(inode->i_mode))
return false;
+ /* this is only set during fdatasync */
+ if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
+ return true;
+
switch (SM_I(sbi)->ipu_policy) {
case F2FS_IPU_FORCE:
return true;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index b2b18637cb9e..657582fc7601 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -52,6 +52,7 @@ enum {
Opt_inline_xattr,
Opt_inline_data,
Opt_flush_merge,
+ Opt_nobarrier,
Opt_err,
};
@@ -69,6 +70,7 @@ static match_table_t f2fs_tokens = {
{Opt_inline_xattr, "inline_xattr"},
{Opt_inline_data, "inline_data"},
{Opt_flush_merge, "flush_merge"},
+ {Opt_nobarrier, "nobarrier"},
{Opt_err, NULL},
};
@@ -339,6 +341,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_flush_merge:
set_opt(sbi, FLUSH_MERGE);
break;
+ case Opt_nobarrier:
+ set_opt(sbi, NOBARRIER);
+ break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
@@ -544,6 +549,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",inline_data");
if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
seq_puts(seq, ",flush_merge");
+ if (test_opt(sbi, NOBARRIER))
+ seq_puts(seq, ",nobarrier");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
@@ -615,7 +622,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* Previous and new state of filesystem is RO,
* so skip checking GC and FLUSH_MERGE conditions.
*/
- if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
+ if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
goto skip;
/*
@@ -642,8 +649,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
*/
if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
destroy_flush_cmd_control(sbi);
- } else if (test_opt(sbi, FLUSH_MERGE) &&
- !sbi->sm_info->cmd_control_info) {
+ } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) {
err = create_flush_cmd_control(sbi);
if (err)
goto restore_gc;
@@ -689,9 +695,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
- if (unlikely(ino < F2FS_ROOT_INO(sbi)))
- return ERR_PTR(-ESTALE);
- if (unlikely(ino >= NM_I(sbi)->max_nid))
+ if (check_nid_range(sbi, ino))
return ERR_PTR(-ESTALE);
/*
@@ -949,7 +953,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
- mutex_init(&sbi->node_write);
+ init_rwsem(&sbi->node_write);
sbi->por_doing = false;
spin_lock_init(&sbi->stat_lock);
@@ -999,7 +1003,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
INIT_LIST_HEAD(&sbi->dir_inode_list);
spin_lock_init(&sbi->dir_inode_lock);
- init_orphan_info(sbi);
+ init_ino_entry_info(sbi);
/* setup f2fs internal modules */
err = build_segment_manager(sbi);
@@ -1036,8 +1040,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_node_inode;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+ iput(root);
err = -EINVAL;
- goto free_root_inode;
+ goto free_node_inode;
}
sb->s_root = d_make_root(root); /* allocate root dentry */
@@ -1084,7 +1089,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
* If filesystem is not mounted as read-only then
* do start the gc_thread.
*/
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!f2fs_readonly(sb)) {
/* After POR, we can run background GC thread.*/
err = start_gc_thread(sbi);
if (err)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 72c82f69b01b..22d1c3df61ac 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -21,6 +21,7 @@
#include <linux/rcupdate.h>
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
+#include <linux/shmem_fs.h>
#include <asm/poll.h>
#include <asm/siginfo.h>
@@ -336,6 +337,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_GETPIPE_SZ:
err = pipe_fcntl(filp, cmd, arg);
break;
+ case F_ADD_SEALS:
+ case F_GET_SEALS:
+ err = shmem_fcntl(filp, cmd, arg);
+ break;
default:
break;
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index be568b7311d6..ef9bef118342 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -342,7 +342,8 @@ static void __inode_wait_for_writeback(struct inode *inode)
wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
while (inode->i_state & I_SYNC) {
spin_unlock(&inode->i_lock);
- __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
+ __wait_on_bit(wqh, &wq, bit_wait,
+ TASK_UNINTERRUPTIBLE);
spin_lock(&inode->i_lock);
}
}
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index aec01be91b0a..89acec742e0b 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -160,7 +160,7 @@ void __fscache_enable_cookie(struct fscache_cookie *cookie,
_enter("%p", cookie);
wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
- fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
goto out_unlock;
@@ -255,7 +255,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
if (!fscache_defer_lookup) {
_debug("non-deferred lookup %p", &cookie->flags);
wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
- fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
_debug("complete");
if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags))
goto unavailable;
@@ -463,7 +463,6 @@ void __fscache_wait_on_invalidate(struct fscache_cookie *cookie)
_enter("%p", cookie);
wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING,
- fscache_wait_bit_interruptible,
TASK_UNINTERRUPTIBLE);
_leave("");
@@ -525,7 +524,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
}
wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
- fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
goto out_unlock_enable;
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index bc6c08fcfddd..7872a62ef30c 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -97,8 +97,6 @@ static inline bool fscache_object_congested(void)
return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
}
-extern int fscache_wait_bit(void *);
-extern int fscache_wait_bit_interruptible(void *);
extern int fscache_wait_atomic_t(atomic_t *);
/*
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 63f868e869b9..b39d487ccfb0 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -67,7 +67,7 @@ static int fscache_max_active_sysctl(struct ctl_table *table, int write,
return ret;
}
-struct ctl_table fscache_sysctls[] = {
+static struct ctl_table fscache_sysctls[] = {
{
.procname = "object_max_active",
.data = &fscache_object_max_active,
@@ -87,7 +87,7 @@ struct ctl_table fscache_sysctls[] = {
{}
};
-struct ctl_table fscache_sysctls_root[] = {
+static struct ctl_table fscache_sysctls_root[] = {
{
.procname = "fscache",
.mode = 0555,
@@ -197,24 +197,6 @@ static void __exit fscache_exit(void)
module_exit(fscache_exit);
/*
- * wait_on_bit() sleep function for uninterruptible waiting
- */
-int fscache_wait_bit(void *flags)
-{
- schedule();
- return 0;
-}
-
-/*
- * wait_on_bit() sleep function for interruptible waiting
- */
-int fscache_wait_bit_interruptible(void *flags)
-{
- schedule();
- return signal_pending(current);
-}
-
-/*
* wait_on_atomic_t() sleep function for uninterruptible waiting
*/
int fscache_wait_atomic_t(atomic_t *p)
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index ed70714503fa..85332b9d19d1 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -298,7 +298,6 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
jif = jiffies;
if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
- fscache_wait_bit_interruptible,
TASK_INTERRUPTIBLE) != 0) {
fscache_stat(&fscache_n_retrievals_intr);
_leave(" = -ERESTARTSYS");
@@ -342,7 +341,6 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
if (stat_op_waits)
fscache_stat(stat_op_waits);
if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
- fscache_wait_bit_interruptible,
TASK_INTERRUPTIBLE) != 0) {
ret = fscache_cancel_op(op, do_cancel);
if (ret == 0)
@@ -351,7 +349,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
/* it's been removed from the pending queue by another party,
* so we should get to run shortly */
wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
- fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
}
_debug("<<< GO");
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 098f97bdcf1b..ca887314aba9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -643,9 +643,8 @@ struct fuse_copy_state {
unsigned long seglen;
unsigned long addr;
struct page *pg;
- void *mapaddr;
- void *buf;
unsigned len;
+ unsigned offset;
unsigned move_pages:1;
};
@@ -666,23 +665,17 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
if (cs->currbuf) {
struct pipe_buffer *buf = cs->currbuf;
- if (!cs->write) {
- kunmap_atomic(cs->mapaddr);
- } else {
- kunmap_atomic(cs->mapaddr);
+ if (cs->write)
buf->len = PAGE_SIZE - cs->len;
- }
cs->currbuf = NULL;
- cs->mapaddr = NULL;
- } else if (cs->mapaddr) {
- kunmap_atomic(cs->mapaddr);
+ } else if (cs->pg) {
if (cs->write) {
flush_dcache_page(cs->pg);
set_page_dirty_lock(cs->pg);
}
put_page(cs->pg);
- cs->mapaddr = NULL;
}
+ cs->pg = NULL;
}
/*
@@ -691,7 +684,7 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
*/
static int fuse_copy_fill(struct fuse_copy_state *cs)
{
- unsigned long offset;
+ struct page *page;
int err;
unlock_request(cs->fc, cs->req);
@@ -706,14 +699,12 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
BUG_ON(!cs->nr_segs);
cs->currbuf = buf;
- cs->mapaddr = kmap_atomic(buf->page);
+ cs->pg = buf->page;
+ cs->offset = buf->offset;
cs->len = buf->len;
- cs->buf = cs->mapaddr + buf->offset;
cs->pipebufs++;
cs->nr_segs--;
} else {
- struct page *page;
-
if (cs->nr_segs == cs->pipe->buffers)
return -EIO;
@@ -726,8 +717,8 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
buf->len = 0;
cs->currbuf = buf;
- cs->mapaddr = kmap_atomic(page);
- cs->buf = cs->mapaddr;
+ cs->pg = page;
+ cs->offset = 0;
cs->len = PAGE_SIZE;
cs->pipebufs++;
cs->nr_segs++;
@@ -740,14 +731,13 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
cs->iov++;
cs->nr_segs--;
}
- err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
+ err = get_user_pages_fast(cs->addr, 1, cs->write, &page);
if (err < 0)
return err;
BUG_ON(err != 1);
- offset = cs->addr % PAGE_SIZE;
- cs->mapaddr = kmap_atomic(cs->pg);
- cs->buf = cs->mapaddr + offset;
- cs->len = min(PAGE_SIZE - offset, cs->seglen);
+ cs->pg = page;
+ cs->offset = cs->addr % PAGE_SIZE;
+ cs->len = min(PAGE_SIZE - cs->offset, cs->seglen);
cs->seglen -= cs->len;
cs->addr += cs->len;
}
@@ -760,15 +750,20 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
{
unsigned ncpy = min(*size, cs->len);
if (val) {
+ void *pgaddr = kmap_atomic(cs->pg);
+ void *buf = pgaddr + cs->offset;
+
if (cs->write)
- memcpy(cs->buf, *val, ncpy);
+ memcpy(buf, *val, ncpy);
else
- memcpy(*val, cs->buf, ncpy);
+ memcpy(*val, buf, ncpy);
+
+ kunmap_atomic(pgaddr);
*val += ncpy;
}
*size -= ncpy;
cs->len -= ncpy;
- cs->buf += ncpy;
+ cs->offset += ncpy;
return ncpy;
}
@@ -874,8 +869,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
out_fallback_unlock:
unlock_page(newpage);
out_fallback:
- cs->mapaddr = kmap_atomic(buf->page);
- cs->buf = cs->mapaddr + buf->offset;
+ cs->pg = buf->page;
+ cs->offset = buf->offset;
err = lock_request(cs->fc, cs->req);
if (err)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 42198359fa1b..0c6048247a34 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -198,7 +198,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
inode = ACCESS_ONCE(entry->d_inode);
if (inode && is_bad_inode(inode))
goto invalid;
- else if (fuse_dentry_time(entry) < get_jiffies_64()) {
+ else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
+ (flags & LOOKUP_REVAL)) {
int err;
struct fuse_entry_out outarg;
struct fuse_req *req;
@@ -814,13 +815,6 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
return err;
}
-static int fuse_rename(struct inode *olddir, struct dentry *oldent,
- struct inode *newdir, struct dentry *newent)
-{
- return fuse_rename_common(olddir, oldent, newdir, newent, 0,
- FUSE_RENAME, sizeof(struct fuse_rename_in));
-}
-
static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
struct inode *newdir, struct dentry *newent,
unsigned int flags)
@@ -831,17 +825,30 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
return -EINVAL;
- if (fc->no_rename2 || fc->minor < 23)
- return -EINVAL;
+ if (flags) {
+ if (fc->no_rename2 || fc->minor < 23)
+ return -EINVAL;
- err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
- FUSE_RENAME2, sizeof(struct fuse_rename2_in));
- if (err == -ENOSYS) {
- fc->no_rename2 = 1;
- err = -EINVAL;
+ err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
+ FUSE_RENAME2,
+ sizeof(struct fuse_rename2_in));
+ if (err == -ENOSYS) {
+ fc->no_rename2 = 1;
+ err = -EINVAL;
+ }
+ } else {
+ err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
+ FUSE_RENAME,
+ sizeof(struct fuse_rename_in));
}
+
return err;
+}
+static int fuse_rename(struct inode *olddir, struct dentry *oldent,
+ struct inode *newdir, struct dentry *newent)
+{
+ return fuse_rename2(olddir, oldent, newdir, newent, 0);
}
static int fuse_link(struct dentry *entry, struct inode *newdir,
@@ -985,7 +992,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
int err;
bool r;
- if (fi->i_time < get_jiffies_64()) {
+ if (time_before64(fi->i_time, get_jiffies_64())) {
r = true;
err = fuse_do_getattr(inode, stat, file);
} else {
@@ -1171,7 +1178,7 @@ static int fuse_permission(struct inode *inode, int mask)
((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
struct fuse_inode *fi = get_fuse_inode(inode);
- if (fi->i_time < get_jiffies_64()) {
+ if (time_before64(fi->i_time, get_jiffies_64())) {
refreshed = true;
err = fuse_perm_getattr(inode, mask);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6e16dad13e9b..40ac2628ddcf 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1687,7 +1687,7 @@ static int fuse_writepage_locked(struct page *page)
error = -EIO;
req->ff = fuse_write_file_get(fc, fi);
if (!req->ff)
- goto err_free;
+ goto err_nofile;
fuse_write_fill(req, req->ff, page_offset(page), 0);
@@ -1715,6 +1715,8 @@ static int fuse_writepage_locked(struct page *page)
return 0;
+err_nofile:
+ __free_page(tmp_page);
err_free:
fuse_request_free(req);
err:
@@ -1955,8 +1957,8 @@ static int fuse_writepages(struct address_space *mapping,
data.ff = NULL;
err = -ENOMEM;
- data.orig_pages = kzalloc(sizeof(struct page *) *
- FUSE_MAX_PAGES_PER_REQ,
+ data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ,
+ sizeof(struct page *),
GFP_NOFS);
if (!data.orig_pages)
goto out;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 754dcf23de8a..03246cd9d47a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -478,6 +478,17 @@ static const match_table_t tokens = {
{OPT_ERR, NULL}
};
+static int fuse_match_uint(substring_t *s, unsigned int *res)
+{
+ int err = -ENOMEM;
+ char *buf = match_strdup(s);
+ if (buf) {
+ err = kstrtouint(buf, 10, res);
+ kfree(buf);
+ }
+ return err;
+}
+
static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
{
char *p;
@@ -488,6 +499,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
while ((p = strsep(&opt, ",")) != NULL) {
int token;
int value;
+ unsigned uv;
substring_t args[MAX_OPT_ARGS];
if (!*p)
continue;
@@ -511,18 +523,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
break;
case OPT_USER_ID:
- if (match_int(&args[0], &value))
+ if (fuse_match_uint(&args[0], &uv))
return 0;
- d->user_id = make_kuid(current_user_ns(), value);
+ d->user_id = make_kuid(current_user_ns(), uv);
if (!uid_valid(d->user_id))
return 0;
d->user_id_present = 1;
break;
case OPT_GROUP_ID:
- if (match_int(&args[0], &value))
+ if (fuse_match_uint(&args[0], &uv))
return 0;
- d->group_id = make_kgid(current_user_ns(), value);
+ d->group_id = make_kgid(current_user_ns(), uv);
if (!gid_valid(d->group_id))
return 0;
d->group_id_present = 1;
@@ -895,9 +907,6 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->writeback_cache = 1;
if (arg->time_gran && arg->time_gran <= 1000000000)
fc->sb->s_time_gran = arg->time_gran;
- else
- fc->sb->s_time_gran = 1000000000;
-
} else {
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
fc->no_lock = 1;
@@ -926,7 +935,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
- FUSE_WRITEBACK_CACHE;
+ FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -1006,7 +1015,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION);
- if (!parse_fuse_opt((char *) data, &d, is_bdev))
+ if (!parse_fuse_opt(data, &d, is_bdev))
goto err;
if (is_bdev) {
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4fc3a3046174..26b3f952e6b1 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -981,7 +981,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
int error = 0;
state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
- flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
+ flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT;
mutex_lock(&fp->f_fl_mutex);
@@ -991,7 +991,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
goto out;
flock_lock_file_wait(file,
&(struct file_lock){.fl_type = F_UNLCK});
- gfs2_glock_dq_wait(fl_gh);
+ gfs2_glock_dq(fl_gh);
gfs2_holder_reinit(state, flags, fl_gh);
} else {
error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c355f7320e44..7f513b1ceb2c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -731,14 +731,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
cachep = gfs2_glock_aspace_cachep;
else
cachep = gfs2_glock_cachep;
- gl = kmem_cache_alloc(cachep, GFP_KERNEL);
+ gl = kmem_cache_alloc(cachep, GFP_NOFS);
if (!gl)
return -ENOMEM;
memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
if (glops->go_flags & GLOF_LVB) {
- gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL);
+ gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_NOFS);
if (!gl->gl_lksb.sb_lvbptr) {
kmem_cache_free(cachep, gl);
return -ENOMEM;
@@ -856,27 +856,6 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
}
/**
- * gfs2_glock_holder_wait
- * @word: unused
- *
- * This function and gfs2_glock_demote_wait both show up in the WCHAN
- * field. Thus I've separated these otherwise identical functions in
- * order to be more informative to the user.
- */
-
-static int gfs2_glock_holder_wait(void *word)
-{
- schedule();
- return 0;
-}
-
-static int gfs2_glock_demote_wait(void *word)
-{
- schedule();
- return 0;
-}
-
-/**
* gfs2_glock_wait - wait on a glock acquisition
* @gh: the glock holder
*
@@ -888,7 +867,7 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
unsigned long time1 = jiffies;
might_sleep();
- wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
+ wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
/* Lengthen the minimum hold time. */
gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
@@ -1128,7 +1107,7 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh)
struct gfs2_glock *gl = gh->gh_gl;
gfs2_glock_dq(gh);
might_sleep();
- wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
+ wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
}
/**
@@ -1404,12 +1383,16 @@ __acquires(&lru_lock)
gl = list_entry(list->next, struct gfs2_glock, gl_lru);
list_del_init(&gl->gl_lru);
if (!spin_trylock(&gl->gl_spin)) {
+add_back_to_lru:
list_add(&gl->gl_lru, &lru_list);
atomic_inc(&lru_count);
continue;
}
+ if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+ spin_unlock(&gl->gl_spin);
+ goto add_back_to_lru;
+ }
clear_bit(GLF_LRU, &gl->gl_flags);
- spin_unlock(&lru_lock);
gl->gl_lockref.count++;
if (demote_ok(gl))
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
@@ -1417,7 +1400,7 @@ __acquires(&lru_lock)
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
gl->gl_lockref.count--;
spin_unlock(&gl->gl_spin);
- spin_lock(&lru_lock);
+ cond_resched_lock(&lru_lock);
}
}
@@ -1442,7 +1425,7 @@ static long gfs2_scan_glock_lru(int nr)
gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
/* Test for being demotable */
- if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+ if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
list_move(&gl->gl_lru, &dispose);
atomic_dec(&lru_count);
freed++;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index fc1100781bbc..2ffc67dce87f 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -234,8 +234,8 @@ static void inode_go_sync(struct gfs2_glock *gl)
* inode_go_inval - prepare a inode glock to be released
* @gl: the glock
* @flags:
- *
- * Normally we invlidate everything, but if we are moving into
+ *
+ * Normally we invalidate everything, but if we are moving into
* LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
* can keep hold of the metadata, since it won't have changed.
*
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 91f274de1246..641383a9c1bb 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -936,12 +936,6 @@ fail:
return error;
}
-static int dlm_recovery_wait(void *word)
-{
- schedule();
- return 0;
-}
-
static int control_first_done(struct gfs2_sbd *sdp)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -976,7 +970,7 @@ restart:
fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
- dlm_recovery_wait, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
goto restart;
}
@@ -1036,8 +1030,8 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
new_size = old_size + RECOVER_SIZE_INC;
- submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
- result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
+ submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
+ result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
if (!submit || !result) {
kfree(submit);
kfree(result);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index bc564c0d6d16..d3eae244076e 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1024,20 +1024,13 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp)
lm->lm_unmount(sdp);
}
-static int gfs2_journalid_wait(void *word)
-{
- if (signal_pending(current))
- return -EINTR;
- schedule();
- return 0;
-}
-
static int wait_on_journal(struct gfs2_sbd *sdp)
{
if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
return 0;
- return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, gfs2_journalid_wait, TASK_INTERRUPTIBLE);
+ return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, TASK_INTERRUPTIBLE)
+ ? -EINTR : 0;
}
void gfs2_online_uevent(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 94555d4c5698..573bd3b758fa 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -591,12 +591,6 @@ done:
wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
}
-static int gfs2_recovery_wait(void *word)
-{
- schedule();
- return 0;
-}
-
int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
{
int rv;
@@ -609,7 +603,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
BUG_ON(!rv);
if (wait)
- wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait,
+ wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
TASK_UNINTERRUPTIBLE);
return wait ? jd->jd_recover_error : 0;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index db629d1bd1bd..f4cb9c0d6bbd 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -337,7 +337,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
/**
* gfs2_free_extlen - Return extent length of free blocks
- * @rbm: Starting position
+ * @rrbm: Starting position
* @len: Max length to check
*
* Starting at the block specified by the rbm, see how many free blocks
@@ -2522,7 +2522,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
/**
* gfs2_rlist_free - free a resource group list
- * @list: the list of resource groups
+ * @rlist: the list of resource groups
*
*/
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 1319b5c4ec68..2607ff13d486 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -864,12 +864,6 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
return error;
}
-static int gfs2_umount_recovery_wait(void *word)
-{
- schedule();
- return 0;
-}
-
/**
* gfs2_put_super - Unmount the filesystem
* @sb: The VFS superblock
@@ -894,7 +888,7 @@ restart:
continue;
spin_unlock(&sdp->sd_jindex_spin);
wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
- gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
goto restart;
}
spin_unlock(&sdp->sd_jindex_spin);
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index f36fc010fccb..2923a7bd82ac 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -545,12 +545,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
struct dnode *d1;
struct quad_buffer_head qbh1;
if (hpfs_sb(i->i_sb)->sb_chk)
- if (up != i->i_ino) {
- hpfs_error(i->i_sb,
- "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx",
- dno, up, (unsigned long)i->i_ino);
- return;
- }
+ if (up != i->i_ino) {
+ hpfs_error(i->i_sb,
+ "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx",
+ dno, up,
+ (unsigned long)i->i_ino);
+ return;
+ }
if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
d1->up = cpu_to_le32(up);
d1->root_dnode = 1;
@@ -1061,8 +1062,8 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
hpfs_brelse4(qbh);
if (hpfs_sb(s)->sb_chk)
if (hpfs_stop_cycles(s, dno, &c1, &c2, "map_fnode_dirent #1")) {
- kfree(name2);
- return NULL;
+ kfree(name2);
+ return NULL;
}
goto go_down;
}
diff --git a/fs/inode.c b/fs/inode.c
index 6eecb7ff0b9a..26753ba7b6d6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -165,6 +165,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
+ atomic_set(&mapping->i_mmap_writable, 0);
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->private_data = NULL;
mapping->backing_dev_info = &default_backing_dev_info;
@@ -1695,13 +1696,6 @@ int inode_needs_sync(struct inode *inode)
}
EXPORT_SYMBOL(inode_needs_sync);
-int inode_wait(void *word)
-{
- schedule();
- return 0;
-}
-EXPORT_SYMBOL(inode_wait);
-
/*
* If we try to find an inode in the inode hash while it is being
* deleted, we have to wait until the filesystem completes its
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 592e5115a561..f311bf084015 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -158,8 +158,8 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
"zisofs: zisofs_inflate returned"
" %d, inode = %lu,"
" page idx = %d, bh idx = %d,"
- " avail_in = %d,"
- " avail_out = %d\n",
+ " avail_in = %ld,"
+ " avail_out = %ld\n",
zerr, inode->i_ino, curpage,
curbh, stream.avail_in,
stream.avail_out);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 38cfcf5f6fce..5f09370c90a8 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -763,12 +763,6 @@ static void warn_dirty_buffer(struct buffer_head *bh)
bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
}
-static int sleep_on_shadow_bh(void *word)
-{
- io_schedule();
- return 0;
-}
-
/*
* If the buffer is already part of the current transaction, then there
* is nothing we need to do. If it is already part of a prior
@@ -906,8 +900,8 @@ repeat:
if (buffer_shadow(bh)) {
JBUFFER_TRACE(jh, "on shadow: sleep");
jbd_unlock_bh_state(bh);
- wait_on_bit(&bh->b_state, BH_Shadow,
- sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE);
+ wait_on_bit_io(&bh->b_state, BH_Shadow,
+ TASK_UNINTERRUPTIBLE);
goto repeat;
}
@@ -1588,9 +1582,12 @@ int jbd2_journal_stop(handle_t *handle)
* to perform a synchronous write. We do this to detect the
* case where a single process is doing a stream of sync
* writes. No point in waiting for joiners in that case.
+ *
+ * Setting max_batch_time to 0 disables this completely.
*/
pid = current->pid;
- if (handle->h_sync && journal->j_last_sync_writer != pid) {
+ if (handle->h_sync && journal->j_last_sync_writer != pid &&
+ journal->j_max_batch_time) {
u64 commit_time, trans_time;
journal->j_last_sync_writer = pid;
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 009ec0b5993d..2f7a3c090489 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -202,8 +202,7 @@ struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
} else {
acl = ERR_PTR(rc);
}
- if (value)
- kfree(value);
+ kfree(value);
if (!IS_ERR(acl))
set_cached_acl(inode, type, acl);
return acl;
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 0b9a1e44e833..5698dae5d92d 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -94,11 +94,12 @@ static int jffs2_zlib_compress(unsigned char *data_in,
while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) {
def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE);
- def_strm.avail_in = min((unsigned)(*sourcelen-def_strm.total_in), def_strm.avail_out);
- jffs2_dbg(1, "calling deflate with avail_in %d, avail_out %d\n",
+ def_strm.avail_in = min_t(unsigned long,
+ (*sourcelen-def_strm.total_in), def_strm.avail_out);
+ jffs2_dbg(1, "calling deflate with avail_in %ld, avail_out %ld\n",
def_strm.avail_in, def_strm.avail_out);
ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH);
- jffs2_dbg(1, "deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n",
+ jffs2_dbg(1, "deflate returned with avail_in %ld, avail_out %ld, total_in %ld, total_out %ld\n",
def_strm.avail_in, def_strm.avail_out,
def_strm.total_in, def_strm.total_out);
if (ret != Z_OK) {
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index ad0f2e2a1700..d72817ac51f6 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -756,8 +756,7 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
for (i=0; i < XATTRINDEX_HASHSIZE; i++) {
list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) {
list_del(&xd->xindex);
- if (xd->xname)
- kfree(xd->xname);
+ kfree(xd->xname);
jffs2_free_xattr_datum(xd);
}
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e3d37f607f97..4429d6d9217f 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -39,6 +39,19 @@ struct kernfs_open_node {
struct list_head files; /* goes through kernfs_open_file.list */
};
+/*
+ * kernfs_notify() may be called from any context and bounces notifications
+ * through a work item. To minimize space overhead in kernfs_node, the
+ * pending queue is implemented as a singly linked list of kernfs_nodes.
+ * The list is terminated with the self pointer so that whether a
+ * kernfs_node is on the list or not can be determined by testing the next
+ * pointer for NULL.
+ */
+#define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list)
+
+static DEFINE_SPINLOCK(kernfs_notify_lock);
+static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
+
static struct kernfs_open_file *kernfs_of(struct file *file)
{
return ((struct seq_file *)file->private_data)->private;
@@ -783,24 +796,25 @@ static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait)
return DEFAULT_POLLMASK|POLLERR|POLLPRI;
}
-/**
- * kernfs_notify - notify a kernfs file
- * @kn: file to notify
- *
- * Notify @kn such that poll(2) on @kn wakes up.
- */
-void kernfs_notify(struct kernfs_node *kn)
+static void kernfs_notify_workfn(struct work_struct *work)
{
- struct kernfs_root *root = kernfs_root(kn);
+ struct kernfs_node *kn;
struct kernfs_open_node *on;
struct kernfs_super_info *info;
- unsigned long flags;
-
- if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
+repeat:
+ /* pop one off the notify_list */
+ spin_lock_irq(&kernfs_notify_lock);
+ kn = kernfs_notify_list;
+ if (kn == KERNFS_NOTIFY_EOL) {
+ spin_unlock_irq(&kernfs_notify_lock);
return;
+ }
+ kernfs_notify_list = kn->attr.notify_next;
+ kn->attr.notify_next = NULL;
+ spin_unlock_irq(&kernfs_notify_lock);
/* kick poll */
- spin_lock_irqsave(&kernfs_open_node_lock, flags);
+ spin_lock_irq(&kernfs_open_node_lock);
on = kn->attr.open;
if (on) {
@@ -808,12 +822,12 @@ void kernfs_notify(struct kernfs_node *kn)
wake_up_interruptible(&on->poll);
}
- spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
+ spin_unlock_irq(&kernfs_open_node_lock);
/* kick fsnotify */
mutex_lock(&kernfs_mutex);
- list_for_each_entry(info, &root->supers, node) {
+ list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
struct inode *inode;
struct dentry *dentry;
@@ -833,6 +847,33 @@ void kernfs_notify(struct kernfs_node *kn)
}
mutex_unlock(&kernfs_mutex);
+ kernfs_put(kn);
+ goto repeat;
+}
+
+/**
+ * kernfs_notify - notify a kernfs file
+ * @kn: file to notify
+ *
+ * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any
+ * context.
+ */
+void kernfs_notify(struct kernfs_node *kn)
+{
+ static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
+ unsigned long flags;
+
+ if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
+ return;
+
+ spin_lock_irqsave(&kernfs_notify_lock, flags);
+ if (!kn->attr.notify_next) {
+ kernfs_get(kn);
+ kn->attr.notify_next = kernfs_notify_list;
+ kernfs_notify_list = kn;
+ schedule_work(&kernfs_notify_work);
+ }
+ spin_unlock_irqrestore(&kernfs_notify_lock, flags);
}
EXPORT_SYMBOL_GPL(kernfs_notify);
@@ -855,7 +896,7 @@ const struct file_operations kernfs_file_fops = {
* @ops: kernfs operations for the file
* @priv: private data for the file
* @ns: optional namespace tag of the file
- * @static_name: don't copy file name
+ * @name_is_static: don't copy file name
* @key: lockdep key for the file's active_ref, %NULL to disable lockdep
*
* Returns the created node on success, ERR_PTR() value on error.
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d171b98a6cdd..f973ae9b05f1 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -211,6 +211,36 @@ void kernfs_kill_sb(struct super_block *sb)
kernfs_put(root_kn);
}
+/**
+ * kernfs_pin_sb: try to pin the superblock associated with a kernfs_root
+ * @kernfs_root: the kernfs_root in question
+ * @ns: the namespace tag
+ *
+ * Pin the superblock so the superblock won't be destroyed in subsequent
+ * operations. This can be used to block ->kill_sb() which may be useful
+ * for kernfs users which dynamically manage superblocks.
+ *
+ * Returns NULL if there's no superblock associated to this kernfs_root, or
+ * -EINVAL if the superblock is being freed.
+ */
+struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
+{
+ struct kernfs_super_info *info;
+ struct super_block *sb = NULL;
+
+ mutex_lock(&kernfs_mutex);
+ list_for_each_entry(info, &root->supers, node) {
+ if (info->ns == ns) {
+ sb = info->sb;
+ if (!atomic_inc_not_zero(&info->sb->s_active))
+ sb = ERR_PTR(-EINVAL);
+ break;
+ }
+ }
+ mutex_unlock(&kernfs_mutex);
+ return sb;
+}
+
void __init kernfs_init(void)
{
kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 1812f026960c..daa8e7514eae 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -306,11 +306,9 @@ static struct nsm_handle *nsm_lookup_priv(const struct nsm_private *priv)
static void nsm_init_private(struct nsm_handle *nsm)
{
u64 *p = (u64 *)&nsm->sm_priv.data;
- struct timespec ts;
s64 ns;
- ktime_get_ts(&ts);
- ns = timespec_to_ns(&ts);
+ ns = ktime_get_ns();
put_unaligned(ns, p);
put_unaligned((unsigned long)nsm, p + 1);
}
diff --git a/fs/locks.c b/fs/locks.c
index 717fbc404e6b..a6f54802d277 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -325,7 +325,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock,
return -ENOMEM;
fl->fl_file = filp;
- fl->fl_owner = (fl_owner_t)filp;
+ fl->fl_owner = filp;
fl->fl_pid = current->tgid;
fl->fl_flags = FL_FLOCK;
fl->fl_type = type;
@@ -431,7 +431,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl)
if (assign_type(fl, type) != 0)
return -EINVAL;
- fl->fl_owner = (fl_owner_t)current->files;
+ fl->fl_owner = current->files;
fl->fl_pid = current->tgid;
fl->fl_file = filp;
@@ -1155,7 +1155,6 @@ EXPORT_SYMBOL(posix_lock_file_wait);
int locks_mandatory_locked(struct file *file)
{
struct inode *inode = file_inode(file);
- fl_owner_t owner = current->files;
struct file_lock *fl;
/*
@@ -1165,7 +1164,8 @@ int locks_mandatory_locked(struct file *file)
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!IS_POSIX(fl))
continue;
- if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file)
+ if (fl->fl_owner != current->files &&
+ fl->fl_owner != file)
break;
}
spin_unlock(&inode->i_lock);
@@ -1205,7 +1205,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
for (;;) {
if (filp) {
- fl.fl_owner = (fl_owner_t)filp;
+ fl.fl_owner = filp;
fl.fl_flags &= ~FL_SLEEP;
error = __posix_lock_file(inode, &fl, NULL);
if (!error)
@@ -1948,7 +1948,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
cmd = F_GETLK;
file_lock.fl_flags |= FL_OFDLCK;
- file_lock.fl_owner = (fl_owner_t)filp;
+ file_lock.fl_owner = filp;
}
error = vfs_test_lock(filp, &file_lock);
@@ -2103,7 +2103,7 @@ again:
cmd = F_SETLK;
file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = (fl_owner_t)filp;
+ file_lock->fl_owner = filp;
break;
case F_OFD_SETLKW:
error = -EINVAL;
@@ -2112,7 +2112,7 @@ again:
cmd = F_SETLKW;
file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = (fl_owner_t)filp;
+ file_lock->fl_owner = filp;
/* Fallthrough */
case F_SETLKW:
file_lock->fl_flags |= FL_SLEEP;
@@ -2170,7 +2170,7 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
cmd = F_GETLK64;
file_lock.fl_flags |= FL_OFDLCK;
- file_lock.fl_owner = (fl_owner_t)filp;
+ file_lock.fl_owner = filp;
}
error = vfs_test_lock(filp, &file_lock);
@@ -2242,7 +2242,7 @@ again:
cmd = F_SETLK64;
file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = (fl_owner_t)filp;
+ file_lock->fl_owner = filp;
break;
case F_OFD_SETLKW:
error = -EINVAL;
@@ -2251,7 +2251,7 @@ again:
cmd = F_SETLKW64;
file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = (fl_owner_t)filp;
+ file_lock->fl_owner = filp;
/* Fallthrough */
case F_SETLKW64:
file_lock->fl_flags |= FL_SLEEP;
@@ -2324,11 +2324,11 @@ void locks_remove_file(struct file *filp)
if (!inode->i_flock)
return;
- locks_remove_posix(filp, (fl_owner_t)filp);
+ locks_remove_posix(filp, filp);
if (filp->f_op->flock) {
struct file_lock fl = {
- .fl_owner = (fl_owner_t)filp,
+ .fl_owner = filp,
.fl_pid = current->tgid,
.fl_file = filp,
.fl_flags = FL_FLOCK,
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 48140315f627..380d86e1ab45 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1019,11 +1019,11 @@ static int __logfs_is_valid_block(struct inode *inode, u64 bix, u64 ofs)
/**
* logfs_is_valid_block - check whether this block is still valid
*
- * @sb - superblock
- * @ofs - block physical offset
- * @ino - block inode number
- * @bix - block index
- * @level - block level
+ * @sb: superblock
+ * @ofs: block physical offset
+ * @ino: block inode number
+ * @bix: block index
+ * @gc_level: block level
*
* Returns 0 if the block is invalid, 1 if it is valid and 2 if it will
* become invalid once the journal is written.
@@ -2226,10 +2226,9 @@ void btree_write_block(struct logfs_block *block)
*
* @inode: parent inode (ifile or directory)
* @buf: object to write (inode or dentry)
- * @n: object size
- * @_pos: object number (file position in blocks/objects)
+ * @count: object size
+ * @bix: block index
* @flags: write flags
- * @lock: 0 if write lock is already taken, 1 otherwise
* @shadow_tree: shadow below this inode
*
* FIXME: All caller of this put a 200-300 byte variable on the stack,
diff --git a/fs/mbcache.c b/fs/mbcache.c
index bf166e388f0d..187477ded6b3 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -73,6 +73,7 @@
#include <linux/mbcache.h>
#include <linux/init.h>
#include <linux/blockgroup_lock.h>
+#include <linux/log2.h>
#ifdef MB_CACHE_DEBUG
# define mb_debug(f...) do { \
@@ -93,7 +94,7 @@
#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
-#define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS)
+#define MB_CACHE_ENTRY_LOCK_BITS ilog2(NR_BG_LOCKS)
#define MB_CACHE_ENTRY_LOCK_INDEX(ce) \
(hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 4bc50dac8e97..742942a983be 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -96,7 +96,7 @@ int minix_new_block(struct inode * inode)
unsigned long minix_count_free_blocks(struct super_block *sb)
{
struct minix_sb_info *sbi = minix_sb(sb);
- u32 bits = sbi->s_nzones - (sbi->s_firstdatazone + 1);
+ u32 bits = sbi->s_nzones - sbi->s_firstdatazone + 1;
return (count_free(sbi->s_zmap, sb->s_blocksize, bits)
<< sbi->s_log_zone_size);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index f007a3355570..3f57af196a7d 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -267,12 +267,12 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
block = minix_blocks_needed(sbi->s_ninodes, s->s_blocksize);
if (sbi->s_imap_blocks < block) {
printk("MINIX-fs: file system does not have enough "
- "imap blocks allocated. Refusing to mount\n");
+ "imap blocks allocated. Refusing to mount.\n");
goto out_no_bitmap;
}
block = minix_blocks_needed(
- (sbi->s_nzones - (sbi->s_firstdatazone + 1)),
+ (sbi->s_nzones - sbi->s_firstdatazone + 1),
s->s_blocksize);
if (sbi->s_zmap_blocks < block) {
printk("MINIX-fs: file system does not have enough "
diff --git a/fs/namei.c b/fs/namei.c
index 985c6f368485..9eb787e5c167 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2256,9 +2256,10 @@ done:
goto out;
}
path->dentry = dentry;
- path->mnt = mntget(nd->path.mnt);
+ path->mnt = nd->path.mnt;
if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
return 1;
+ mntget(path->mnt);
follow_mount(path);
error = 0;
out:
diff --git a/fs/namespace.c b/fs/namespace.c
index 182bc41cd887..2a1447c946e7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,7 +798,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
list_splice(&head, n->list.prev);
if (shadows)
- hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
+ hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
else
hlist_add_head_rcu(&mnt->mnt_hash,
m_hash(&parent->mnt, mnt->mnt_mountpoint));
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8f98138cbc43..f11b9eed0de1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -756,7 +756,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
spin_unlock(&dreq->lock);
while (!list_empty(&hdr->pages)) {
- bool do_destroy = true;
req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
@@ -765,7 +764,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
case NFS_IOHDR_NEED_COMMIT:
kref_get(&req->wb_kref);
nfs_mark_request_commit(req, hdr->lseg, &cinfo);
- do_destroy = false;
}
nfs_unlock_and_release_request(req);
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4042ff58fe3f..524dd80d1898 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -361,8 +361,8 @@ start:
* Prevent starvation issues if someone is doing a consistency
* sync-to-disk
*/
- ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
if (ret)
return ret;
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 44bf0140a4c7..e2a0361e24c6 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -783,8 +783,8 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
{
might_sleep();
- wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
}
static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 567983d2c0eb..7dd55b745c4d 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -174,7 +174,9 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
static struct key_type key_type_id_resolver = {
.name = "id_resolver",
- .instantiate = user_instantiate,
+ .preparse = user_preparse,
+ .free_preparse = user_free_preparse,
+ .instantiate = generic_key_instantiate,
.match = user_match,
.revoke = user_revoke,
.destroy = user_destroy,
@@ -282,6 +284,8 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
desc, "", 0, idmap);
mutex_unlock(&idmap->idmap_mutex);
}
+ if (!IS_ERR(rkey))
+ set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
kfree(desc);
return rkey;
@@ -394,7 +398,9 @@ static const struct rpc_pipe_ops idmap_upcall_ops = {
static struct key_type key_type_id_resolver_legacy = {
.name = "id_legacy",
- .instantiate = user_instantiate,
+ .preparse = user_preparse,
+ .free_preparse = user_free_preparse,
+ .instantiate = generic_key_instantiate,
.match = user_match,
.revoke = user_revoke,
.destroy = user_destroy,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c496f8a74639..abd37a380535 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -75,7 +75,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
* nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
* @word: long word containing the bit lock
*/
-int nfs_wait_bit_killable(void *word)
+int nfs_wait_bit_killable(struct wait_bit_key *key)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
@@ -147,6 +147,17 @@ int nfs_sync_mapping(struct address_space *mapping)
return ret;
}
+static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ if (inode->i_mapping->nrpages == 0)
+ flags &= ~NFS_INO_INVALID_DATA;
+ nfsi->cache_validity |= flags;
+ if (flags & NFS_INO_INVALID_DATA)
+ nfs_fscache_invalidate(inode);
+}
+
/*
* Invalidate the local caches
*/
@@ -162,17 +173,16 @@ static void nfs_zap_caches_locked(struct inode *inode)
memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
- nfs_fscache_invalidate(inode);
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_DATA
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
- | NFS_INO_REVAL_PAGECACHE;
+ | NFS_INO_REVAL_PAGECACHE);
} else
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
- | NFS_INO_REVAL_PAGECACHE;
+ | NFS_INO_REVAL_PAGECACHE);
nfs_zap_label_cache_locked(nfsi);
}
@@ -187,8 +197,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
{
if (mapping->nrpages != 0) {
spin_lock(&inode->i_lock);
- NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
- nfs_fscache_invalidate(inode);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
spin_unlock(&inode->i_lock);
}
}
@@ -209,7 +218,7 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache);
void nfs_invalidate_atime(struct inode *inode)
{
spin_lock(&inode->i_lock);
- NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL_GPL(nfs_invalidate_atime);
@@ -369,7 +378,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
inode->i_mode = fattr->mode;
if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
&& nfs_server_capable(inode, NFS_CAP_MODE))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
/* Why so? Because we want revalidate for devices/FIFOs, and
* that's precisely what we have in nfs_file_inode_operations.
*/
@@ -415,36 +424,36 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime;
else if (nfs_server_capable(inode, NFS_CAP_ATIME))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = fattr->mtime;
else if (nfs_server_capable(inode, NFS_CAP_MTIME))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime;
else if (nfs_server_capable(inode, NFS_CAP_CTIME))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
inode->i_version = fattr->change_attr;
else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_SIZE)
inode->i_size = nfs_size_to_loff_t(fattr->size);
else
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_REVAL_PAGECACHE;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
+ | NFS_INO_REVAL_PAGECACHE);
if (fattr->valid & NFS_ATTR_FATTR_NLINK)
set_nlink(inode, fattr->nlink);
else if (nfs_server_capable(inode, NFS_CAP_NLINK))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_GROUP)
inode->i_gid = fattr->gid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -550,6 +559,9 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
spin_lock(&inode->i_lock);
i_size_write(inode, offset);
+ /* Optimisation */
+ if (offset == 0)
+ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
spin_unlock(&inode->i_lock);
truncate_pagecache(inode, offset);
@@ -578,7 +590,8 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
inode->i_uid = attr->ia_uid;
if ((attr->ia_valid & ATTR_GID) != 0)
inode->i_gid = attr->ia_gid;
- NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL);
spin_unlock(&inode->i_lock);
}
if ((attr->ia_valid & ATTR_SIZE) != 0) {
@@ -1061,8 +1074,8 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
* the bit lock here if it looks like we're going to be doing that.
*/
for (;;) {
- ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
if (ret)
goto out;
spin_lock(&inode->i_lock);
@@ -1101,7 +1114,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
&& inode->i_version == fattr->pre_change_attr) {
inode->i_version = fattr->change_attr;
if (S_ISDIR(inode->i_mode))
- nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
ret |= NFS_INO_INVALID_ATTR;
}
/* If we have atomic WCC data, we may update some attributes */
@@ -1117,7 +1130,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
&& timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
if (S_ISDIR(inode->i_mode))
- nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
ret |= NFS_INO_INVALID_ATTR;
}
if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
@@ -1128,9 +1141,6 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
ret |= NFS_INO_INVALID_ATTR;
}
- if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
- nfs_fscache_invalidate(inode);
-
return ret;
}
@@ -1189,7 +1199,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
invalid |= NFS_INO_INVALID_ATIME;
if (invalid != 0)
- nfsi->cache_validity |= invalid;
+ nfs_set_cache_invalid(inode, invalid);
nfsi->read_cache_jiffies = fattr->time_start;
return 0;
@@ -1402,13 +1412,11 @@ EXPORT_SYMBOL_GPL(nfs_refresh_inode);
static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
{
- struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
- if (S_ISDIR(inode->i_mode)) {
- nfsi->cache_validity |= NFS_INO_INVALID_DATA;
- nfs_fscache_invalidate(inode);
- }
+ if (S_ISDIR(inode->i_mode))
+ invalid |= NFS_INO_INVALID_DATA;
+ nfs_set_cache_invalid(inode, invalid);
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
return 0;
return nfs_refresh_inode_locked(inode, fattr);
@@ -1601,6 +1609,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if ((nfsi->npages == 0) || new_isize > cur_isize) {
i_size_write(inode, new_isize);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+ invalid &= ~NFS_INO_REVAL_PAGECACHE;
}
dprintk("NFS: isize change on server for file %s/%ld "
"(%Ld to %Ld)\n",
@@ -1702,10 +1711,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid &= ~NFS_INO_INVALID_DATA;
if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) ||
(save_cache_validity & NFS_INO_REVAL_FORCED))
- nfsi->cache_validity |= invalid;
-
- if (invalid & NFS_INO_INVALID_DATA)
- nfs_fscache_invalidate(inode);
+ nfs_set_cache_invalid(inode, invalid);
return 0;
out_err:
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 82ddbf46660e..617f36611d4a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -244,6 +244,7 @@ void nfs_pgio_data_release(struct nfs_pgio_data *);
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
const struct rpc_call_ops *, int, int);
+void nfs_free_request(struct nfs_page *req);
static inline void nfs_iocounter_init(struct nfs_io_counter *c)
{
@@ -347,7 +348,7 @@ extern int nfs_drop_inode(struct inode *);
extern void nfs_clear_inode(struct inode *);
extern void nfs_evict_inode(struct inode *);
void nfs_zap_acl_cache(struct inode *inode);
-extern int nfs_wait_bit_killable(void *word);
+extern int nfs_wait_bit_killable(struct wait_bit_key *key);
/* super.c */
extern const struct super_operations nfs_sops;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 871d6eda8dba..8f854dde4150 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -247,3 +247,46 @@ const struct xattr_handler *nfs3_xattr_handlers[] = {
&posix_acl_default_xattr_handler,
NULL,
};
+
+static int
+nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
+ size_t size, ssize_t *result)
+{
+ struct posix_acl *acl;
+ char *p = data + *result;
+
+ acl = get_acl(inode, type);
+ if (!acl)
+ return 0;
+
+ posix_acl_release(acl);
+
+ *result += strlen(name);
+ *result += 1;
+ if (!size)
+ return 0;
+ if (*result > size)
+ return -ERANGE;
+
+ strcpy(p, name);
+ return 0;
+}
+
+ssize_t
+nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+ struct inode *inode = dentry->d_inode;
+ ssize_t result = 0;
+ int error;
+
+ error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS,
+ POSIX_ACL_XATTR_ACCESS, data, size, &result);
+ if (error)
+ return error;
+
+ error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT,
+ POSIX_ACL_XATTR_DEFAULT, data, size, &result);
+ if (error)
+ return error;
+ return result;
+}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e7daa42bbc86..f0afa291fd58 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -885,7 +885,7 @@ static const struct inode_operations nfs3_dir_inode_operations = {
.getattr = nfs_getattr,
.setattr = nfs_setattr,
#ifdef CONFIG_NFS_V3_ACL
- .listxattr = generic_listxattr,
+ .listxattr = nfs3_listxattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
.removexattr = generic_removexattr,
@@ -899,7 +899,7 @@ static const struct inode_operations nfs3_file_inode_operations = {
.getattr = nfs_getattr,
.setattr = nfs_setattr,
#ifdef CONFIG_NFS_V3_ACL
- .listxattr = generic_listxattr,
+ .listxattr = nfs3_listxattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
.removexattr = generic_removexattr,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index f63cb87cd730..ba2affa51941 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -230,7 +230,7 @@ int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
extern struct file_system_type nfs4_fs_type;
/* nfs4namespace.c */
-struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
+struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, struct qstr *);
struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
struct nfs_fh *, struct nfs_fattr *);
int nfs4_replace_transport(struct nfs_server *server,
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 3d5dbf80d46a..3d83cb1fdc70 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -139,16 +139,22 @@ static size_t nfs_parse_server_name(char *string, size_t len,
* @server: NFS server struct
* @flavors: List of security tuples returned by SECINFO procedure
*
- * Return the pseudoflavor of the first security mechanism in
- * "flavors" that is locally supported. Return RPC_AUTH_UNIX if
- * no matching flavor is found in the array. The "flavors" array
+ * Return an rpc client that uses the first security mechanism in
+ * "flavors" that is locally supported. The "flavors" array
* is searched in the order returned from the server, per RFC 3530
- * recommendation.
+ * recommendation and each flavor is checked for membership in the
+ * sec= mount option list if it exists.
+ *
+ * Return -EPERM if no matching flavor is found in the array.
+ *
+ * Please call rpc_shutdown_client() when you are done with this rpc client.
+ *
*/
-static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server,
+static struct rpc_clnt *nfs_find_best_sec(struct rpc_clnt *clnt,
+ struct nfs_server *server,
struct nfs4_secinfo_flavors *flavors)
{
- rpc_authflavor_t pseudoflavor;
+ rpc_authflavor_t pflavor;
struct nfs4_secinfo4 *secinfo;
unsigned int i;
@@ -159,62 +165,73 @@ static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server,
case RPC_AUTH_NULL:
case RPC_AUTH_UNIX:
case RPC_AUTH_GSS:
- pseudoflavor = rpcauth_get_pseudoflavor(secinfo->flavor,
+ pflavor = rpcauth_get_pseudoflavor(secinfo->flavor,
&secinfo->flavor_info);
- /* make sure pseudoflavor matches sec= mount opt */
- if (pseudoflavor != RPC_AUTH_MAXFLAVOR &&
- nfs_auth_info_match(&server->auth_info,
- pseudoflavor))
- return pseudoflavor;
- break;
+ /* does the pseudoflavor match a sec= mount opt? */
+ if (pflavor != RPC_AUTH_MAXFLAVOR &&
+ nfs_auth_info_match(&server->auth_info, pflavor)) {
+ struct rpc_clnt *new;
+ struct rpc_cred *cred;
+
+ /* Cloning creates an rpc_auth for the flavor */
+ new = rpc_clone_client_set_auth(clnt, pflavor);
+ if (IS_ERR(new))
+ continue;
+ /**
+ * Check that the user actually can use the
+ * flavor. This is mostly for RPC_AUTH_GSS
+ * where cr_init obtains a gss context
+ */
+ cred = rpcauth_lookupcred(new->cl_auth, 0);
+ if (IS_ERR(cred)) {
+ rpc_shutdown_client(new);
+ continue;
+ }
+ put_rpccred(cred);
+ return new;
+ }
}
}
-
- /* if there were any sec= options then nothing matched */
- if (server->auth_info.flavor_len > 0)
- return -EPERM;
-
- return RPC_AUTH_UNIX;
+ return ERR_PTR(-EPERM);
}
-static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
+/**
+ * nfs4_negotiate_security - in response to an NFS4ERR_WRONGSEC on lookup,
+ * return an rpc_clnt that uses the best available security flavor with
+ * respect to the secinfo flavor list and the sec= mount options.
+ *
+ * @clnt: RPC client to clone
+ * @inode: directory inode
+ * @name: lookup name
+ *
+ * Please call rpc_shutdown_client() when you are done with this rpc client.
+ */
+struct rpc_clnt *
+nfs4_negotiate_security(struct rpc_clnt *clnt, struct inode *inode,
+ struct qstr *name)
{
struct page *page;
struct nfs4_secinfo_flavors *flavors;
- rpc_authflavor_t flavor;
+ struct rpc_clnt *new;
int err;
page = alloc_page(GFP_KERNEL);
if (!page)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
+
flavors = page_address(page);
err = nfs4_proc_secinfo(inode, name, flavors);
if (err < 0) {
- flavor = err;
+ new = ERR_PTR(err);
goto out;
}
- flavor = nfs_find_best_sec(NFS_SERVER(inode), flavors);
+ new = nfs_find_best_sec(clnt, NFS_SERVER(inode), flavors);
out:
put_page(page);
- return flavor;
-}
-
-/*
- * Please call rpc_shutdown_client() when you are done with this client.
- */
-struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode,
- struct qstr *name)
-{
- rpc_authflavor_t flavor;
-
- flavor = nfs4_negotiate_security(inode, name);
- if ((int)flavor < 0)
- return ERR_PTR((int)flavor);
-
- return rpc_clone_client_set_auth(clnt, flavor);
+ return new;
}
static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
@@ -397,11 +414,6 @@ struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
if (client->cl_auth->au_flavor != flavor)
flavor = client->cl_auth->au_flavor;
- else {
- rpc_authflavor_t new = nfs4_negotiate_security(dir, name);
- if ((int)new >= 0)
- flavor = new;
- }
mnt = nfs_do_submount(dentry, fh, fattr, flavor);
out:
rpc_shutdown_client(client);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 285ad5334018..4bf3d97cc5a0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3247,7 +3247,7 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
err = -EPERM;
if (client != *clnt)
goto out;
- client = nfs4_create_sec_client(client, dir, name);
+ client = nfs4_negotiate_security(client, dir, name);
if (IS_ERR(client))
return PTR_ERR(client);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 848f6853c59e..42f121182167 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1251,8 +1251,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
might_sleep();
atomic_inc(&clp->cl_count);
- res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
if (res)
goto out;
if (clp->cl_cons_state < 0)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b6ee3a6ee96d..0be5050638f7 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,8 +29,6 @@
static struct kmem_cache *nfs_page_cachep;
static const struct rpc_call_ops nfs_pgio_common_ops;
-static void nfs_free_request(struct nfs_page *);
-
static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
{
p->npages = pagecount;
@@ -117,7 +115,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
set_bit(NFS_IO_INPROGRESS, &c->flags);
if (atomic_read(&c->io_count) == 0)
break;
- ret = nfs_wait_bit_killable(&c->flags);
+ ret = nfs_wait_bit_killable(&q.key);
} while (atomic_read(&c->io_count) != 0);
finish_wait(wq, &q.wait);
return ret;
@@ -138,12 +136,6 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
return __nfs_iocounter_wait(c);
}
-static int nfs_wait_bit_uninterruptible(void *word)
-{
- io_schedule();
- return 0;
-}
-
/*
* nfs_page_group_lock - lock the head of the page group
* @req - request in group that is to be locked
@@ -158,7 +150,6 @@ nfs_page_group_lock(struct nfs_page *req)
WARN_ON_ONCE(head != head->wb_head);
wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
- nfs_wait_bit_uninterruptible,
TASK_UNINTERRUPTIBLE);
}
@@ -239,20 +230,28 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
WARN_ON_ONCE(prev == req);
if (!prev) {
+ /* a head request */
req->wb_head = req;
req->wb_this_page = req;
} else {
+ /* a subrequest */
WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
req->wb_head = prev->wb_head;
req->wb_this_page = prev->wb_this_page;
prev->wb_this_page = req;
+ /* All subrequests take a ref on the head request until
+ * nfs_page_group_destroy is called */
+ kref_get(&req->wb_head->wb_kref);
+
/* grab extra ref if head request has extra ref from
* the write/commit path to handle handoff between write
* and commit lists */
- if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags))
+ if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
+ set_bit(PG_INODE_REF, &req->wb_flags);
kref_get(&req->wb_kref);
+ }
}
}
@@ -269,6 +268,10 @@ nfs_page_group_destroy(struct kref *kref)
struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
struct nfs_page *tmp, *next;
+ /* subrequests must release the ref on the head request */
+ if (req->wb_head != req)
+ nfs_release_request(req->wb_head);
+
if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
return;
@@ -394,7 +397,7 @@ static void nfs_clear_request(struct nfs_page *req)
*
* Note: Should never be called with the spinlock held!
*/
-static void nfs_free_request(struct nfs_page *req)
+void nfs_free_request(struct nfs_page *req)
{
WARN_ON_ONCE(req->wb_this_page != req);
@@ -425,9 +428,8 @@ void nfs_release_request(struct nfs_page *req)
int
nfs_wait_on_request(struct nfs_page *req)
{
- return wait_on_bit(&req->wb_flags, PG_BUSY,
- nfs_wait_bit_uninterruptible,
- TASK_UNINTERRUPTIBLE);
+ return wait_on_bit_io(&req->wb_flags, PG_BUSY,
+ TASK_UNINTERRUPTIBLE);
}
/*
@@ -925,7 +927,6 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
nfs_pageio_doio(desc);
if (desc->pg_error < 0)
return 0;
- desc->pg_moreio = 0;
if (desc->pg_recoalesce)
return 0;
/* retry add_request for this subreq */
@@ -972,6 +973,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
desc->pg_count = 0;
desc->pg_base = 0;
desc->pg_recoalesce = 0;
+ desc->pg_moreio = 0;
while (!list_empty(&head)) {
struct nfs_page *req;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6fdcd233d6f7..a8914b335617 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1885,7 +1885,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
if (!sync)
goto out;
- status = wait_on_bit_lock(&nfsi->flags,
+ status = wait_on_bit_lock_action(&nfsi->flags,
NFS_INO_LAYOUTCOMMITTING,
nfs_wait_bit_killable,
TASK_KILLABLE);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3ee5af4e738e..962c9ee758be 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_commit_ops;
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
static const struct nfs_rw_ops nfs_rw_write_ops;
+static void nfs_clear_request_commit(struct nfs_page *req);
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -91,8 +92,15 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
}
+/*
+ * nfs_page_find_head_request_locked - find head request associated with @page
+ *
+ * must be called while holding the inode lock.
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
static struct nfs_page *
-nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
+nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
{
struct nfs_page *req = NULL;
@@ -104,25 +112,33 @@ nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
/* Linearly search the commit list for the correct req */
list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
if (freq->wb_page == page) {
- req = freq;
+ req = freq->wb_head;
break;
}
}
}
- if (req)
+ if (req) {
+ WARN_ON_ONCE(req->wb_head != req);
+
kref_get(&req->wb_kref);
+ }
return req;
}
-static struct nfs_page *nfs_page_find_request(struct page *page)
+/*
+ * nfs_page_find_head_request - find head request associated with @page
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
+static struct nfs_page *nfs_page_find_head_request(struct page *page)
{
struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *req = NULL;
spin_lock(&inode->i_lock);
- req = nfs_page_find_request_locked(NFS_I(inode), page);
+ req = nfs_page_find_head_request_locked(NFS_I(inode), page);
spin_unlock(&inode->i_lock);
return req;
}
@@ -274,36 +290,246 @@ static void nfs_end_page_writeback(struct nfs_page *req)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
}
-static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
+
+/* nfs_page_group_clear_bits
+ * @req - an nfs request
+ * clears all page group related bits from @req
+ */
+static void
+nfs_page_group_clear_bits(struct nfs_page *req)
+{
+ clear_bit(PG_TEARDOWN, &req->wb_flags);
+ clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
+ clear_bit(PG_UPTODATE, &req->wb_flags);
+ clear_bit(PG_WB_END, &req->wb_flags);
+ clear_bit(PG_REMOVE, &req->wb_flags);
+}
+
+
+/*
+ * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req
+ *
+ * this is a helper function for nfs_lock_and_join_requests
+ *
+ * @inode - inode associated with request page group, must be holding inode lock
+ * @head - head request of page group, must be holding head lock
+ * @req - request that couldn't lock and needs to wait on the req bit lock
+ * @nonblock - if true, don't actually wait
+ *
+ * NOTE: this must be called holding page_group bit lock and inode spin lock
+ * and BOTH will be released before returning.
+ *
+ * returns 0 on success, < 0 on error.
+ */
+static int
+nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
+ struct nfs_page *req, bool nonblock)
+ __releases(&inode->i_lock)
+{
+ struct nfs_page *tmp;
+ int ret;
+
+ /* relinquish all the locks successfully grabbed this run */
+ for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
+ nfs_unlock_request(tmp);
+
+ WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+
+ /* grab a ref on the request that will be waited on */
+ kref_get(&req->wb_kref);
+
+ nfs_page_group_unlock(head);
+ spin_unlock(&inode->i_lock);
+
+ /* release ref from nfs_page_find_head_request_locked */
+ nfs_release_request(head);
+
+ if (!nonblock)
+ ret = nfs_wait_on_request(req);
+ else
+ ret = -EAGAIN;
+ nfs_release_request(req);
+
+ return ret;
+}
+
+/*
+ * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
+ *
+ * @destroy_list - request list (using wb_this_page) terminated by @old_head
+ * @old_head - the old head of the list
+ *
+ * All subrequests must be locked and removed from all lists, so at this point
+ * they are only "active" in this function, and possibly in nfs_wait_on_request
+ * with a reference held by some other context.
+ */
+static void
+nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
+ struct nfs_page *old_head)
+{
+ while (destroy_list) {
+ struct nfs_page *subreq = destroy_list;
+
+ destroy_list = (subreq->wb_this_page == old_head) ?
+ NULL : subreq->wb_this_page;
+
+ WARN_ON_ONCE(old_head != subreq->wb_head);
+
+ /* make sure old group is not used */
+ subreq->wb_head = subreq;
+ subreq->wb_this_page = subreq;
+
+ nfs_clear_request_commit(subreq);
+
+ /* subreq is now totally disconnected from page group or any
+ * write / commit lists. last chance to wake any waiters */
+ nfs_unlock_request(subreq);
+
+ if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
+ /* release ref on old head request */
+ nfs_release_request(old_head);
+
+ nfs_page_group_clear_bits(subreq);
+
+ /* release the PG_INODE_REF reference */
+ if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
+ nfs_release_request(subreq);
+ else
+ WARN_ON_ONCE(1);
+ } else {
+ WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
+ /* zombie requests have already released the last
+ * reference and were waiting on the rest of the
+ * group to complete. Since it's no longer part of a
+ * group, simply free the request */
+ nfs_page_group_clear_bits(subreq);
+ nfs_free_request(subreq);
+ }
+ }
+}
+
+/*
+ * nfs_lock_and_join_requests - join all subreqs to the head req and return
+ * a locked reference, cancelling any pending
+ * operations for this page.
+ *
+ * @page - the page used to lookup the "page group" of nfs_page structures
+ * @nonblock - if true, don't block waiting for request locks
+ *
+ * This function joins all sub requests to the head request by first
+ * locking all requests in the group, cancelling any pending operations
+ * and finally updating the head request to cover the whole range covered by
+ * the (former) group. All subrequests are removed from any write or commit
+ * lists, unlinked from the group and destroyed.
+ *
+ * Returns a locked, referenced pointer to the head request - which after
+ * this call is guaranteed to be the only request associated with the page.
+ * Returns NULL if no requests are found for @page, or a ERR_PTR if an
+ * error was encountered.
+ */
+static struct nfs_page *
+nfs_lock_and_join_requests(struct page *page, bool nonblock)
{
struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *req;
+ struct nfs_page *head, *subreq;
+ struct nfs_page *destroy_list = NULL;
+ unsigned int total_bytes;
int ret;
+try_again:
+ total_bytes = 0;
+
+ WARN_ON_ONCE(destroy_list);
+
spin_lock(&inode->i_lock);
- for (;;) {
- req = nfs_page_find_request_locked(NFS_I(inode), page);
- if (req == NULL)
- break;
- if (nfs_lock_request(req))
- break;
- /* Note: If we hold the page lock, as is the case in nfs_writepage,
- * then the call to nfs_lock_request() will always
- * succeed provided that someone hasn't already marked the
- * request as dirty (in which case we don't care).
- */
+
+ /*
+ * A reference is taken only on the head request which acts as a
+ * reference to the whole page group - the group will not be destroyed
+ * until the head reference is released.
+ */
+ head = nfs_page_find_head_request_locked(NFS_I(inode), page);
+
+ if (!head) {
spin_unlock(&inode->i_lock);
- if (!nonblock)
- ret = nfs_wait_on_request(req);
- else
- ret = -EAGAIN;
- nfs_release_request(req);
- if (ret != 0)
+ return NULL;
+ }
+
+ /* lock each request in the page group */
+ nfs_page_group_lock(head);
+ subreq = head;
+ do {
+ /*
+ * Subrequests are always contiguous, non overlapping
+ * and in order. If not, it's a programming error.
+ */
+ WARN_ON_ONCE(subreq->wb_offset !=
+ (head->wb_offset + total_bytes));
+
+ /* keep track of how many bytes this group covers */
+ total_bytes += subreq->wb_bytes;
+
+ if (!nfs_lock_request(subreq)) {
+ /* releases page group bit lock and
+ * inode spin lock and all references */
+ ret = nfs_unroll_locks_and_wait(inode, head,
+ subreq, nonblock);
+
+ if (ret == 0)
+ goto try_again;
+
return ERR_PTR(ret);
- spin_lock(&inode->i_lock);
+ }
+
+ subreq = subreq->wb_this_page;
+ } while (subreq != head);
+
+ /* Now that all requests are locked, make sure they aren't on any list.
+ * Commit list removal accounting is done after locks are dropped */
+ subreq = head;
+ do {
+ nfs_list_remove_request(subreq);
+ subreq = subreq->wb_this_page;
+ } while (subreq != head);
+
+ /* unlink subrequests from head, destroy them later */
+ if (head->wb_this_page != head) {
+ /* destroy list will be terminated by head */
+ destroy_list = head->wb_this_page;
+ head->wb_this_page = head;
+
+ /* change head request to cover whole range that
+ * the former page group covered */
+ head->wb_bytes = total_bytes;
}
+
+ /*
+ * prepare head request to be added to new pgio descriptor
+ */
+ nfs_page_group_clear_bits(head);
+
+ /*
+ * some part of the group was still on the inode list - otherwise
+ * the group wouldn't be involved in async write.
+ * grab a reference for the head request, iff it needs one.
+ */
+ if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
+ kref_get(&head->wb_kref);
+
+ nfs_page_group_unlock(head);
+
+ /* drop lock to clear_request_commit the head req and clean up
+ * requests on destroy list */
spin_unlock(&inode->i_lock);
- return req;
+
+ nfs_destroy_unlinked_subrequests(destroy_list, head);
+
+ /* clean up commit list state */
+ nfs_clear_request_commit(head);
+
+ /* still holds ref on head from nfs_page_find_head_request_locked
+ * and still has lock on head from lock loop */
+ return head;
}
/*
@@ -316,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req;
int ret = 0;
- req = nfs_find_and_lock_request(page, nonblock);
+ req = nfs_lock_and_join_requests(page, nonblock);
if (!req)
goto out;
ret = PTR_ERR(req);
@@ -397,7 +623,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
int err;
/* Stop dirtying of new pages while we sync */
- err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
+ err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING,
nfs_wait_bit_killable, TASK_KILLABLE);
if (err)
goto out_err;
@@ -448,7 +674,9 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
set_page_private(req->wb_page, (unsigned long)req);
}
nfsi->npages++;
- set_bit(PG_INODE_REF, &req->wb_flags);
+ /* this a head request for a page group - mark it as having an
+ * extra reference so sub groups can follow suit */
+ WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
kref_get(&req->wb_kref);
spin_unlock(&inode->i_lock);
}
@@ -474,7 +702,9 @@ static void nfs_inode_remove_request(struct nfs_page *req)
nfsi->npages--;
spin_unlock(&inode->i_lock);
}
- nfs_release_request(req);
+
+ if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
+ nfs_release_request(req);
}
static void
@@ -638,7 +868,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_commit_info cinfo;
unsigned long bytes = 0;
- bool do_destroy;
if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
goto out;
@@ -668,7 +897,6 @@ remove_req:
next:
nfs_unlock_request(req);
nfs_end_page_writeback(req);
- do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
nfs_release_request(req);
}
out:
@@ -769,7 +997,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
spin_lock(&inode->i_lock);
for (;;) {
- req = nfs_page_find_request_locked(NFS_I(inode), page);
+ req = nfs_page_find_head_request_locked(NFS_I(inode), page);
if (req == NULL)
goto out_unlock;
@@ -877,7 +1105,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
* dropped page.
*/
do {
- req = nfs_page_find_request(page);
+ req = nfs_page_find_head_request(page);
if (req == NULL)
return 0;
l_ctx = req->wb_lock_context;
@@ -934,12 +1162,14 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
if (nfs_have_delegated_attributes(inode))
goto out;
- if (nfsi->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE))
+ if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
return false;
smp_rmb();
if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
return false;
out:
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ return false;
return PageUptodate(page) != 0;
}
@@ -1473,7 +1703,7 @@ int nfs_commit_inode(struct inode *inode, int how)
return error;
if (!may_wait)
goto out_mark_dirty;
- error = wait_on_bit(&NFS_I(inode)->flags,
+ error = wait_on_bit_action(&NFS_I(inode)->flags,
NFS_INO_COMMIT,
nfs_wait_bit_killable,
TASK_KILLABLE);
@@ -1567,27 +1797,28 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
struct nfs_page *req;
int ret = 0;
- for (;;) {
- wait_on_page_writeback(page);
- req = nfs_page_find_request(page);
- if (req == NULL)
- break;
- if (nfs_lock_request(req)) {
- nfs_clear_request_commit(req);
- nfs_inode_remove_request(req);
- /*
- * In case nfs_inode_remove_request has marked the
- * page as being dirty
- */
- cancel_dirty_page(page, PAGE_CACHE_SIZE);
- nfs_unlock_and_release_request(req);
- break;
- }
- ret = nfs_wait_on_request(req);
- nfs_release_request(req);
- if (ret < 0)
- break;
+ wait_on_page_writeback(page);
+
+ /* blocking call to cancel all requests and join to a single (head)
+ * request */
+ req = nfs_lock_and_join_requests(page, false);
+
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ } else if (req) {
+ /* all requests from this page have been cancelled by
+ * nfs_lock_and_join_requests, so just remove the head
+ * request from the inode / page_private pointer and
+ * release it */
+ nfs_inode_remove_request(req);
+ /*
+ * In case nfs_inode_remove_request has marked the
+ * page as being dirty
+ */
+ cancel_dirty_page(page, PAGE_CACHE_SIZE);
+ nfs_unlock_and_release_request(req);
}
+
return ret;
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 72a2a82e11a4..f9821ce6658a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2636,7 +2636,7 @@ nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
{
__be32 *p;
- p = xdr_reserve_space(xdr, 6);
+ p = xdr_reserve_space(xdr, 20);
if (!p)
return NULL;
*p++ = htonl(2);
@@ -3268,7 +3268,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
wire_count = htonl(maxcount);
write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4);
- xdr_truncate_encode(xdr, length_offset + 4 + maxcount);
+ xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4));
if (maxcount & 3)
write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount,
&zero, 4 - (maxcount&3));
diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile
index 85c98737a146..fc603e0431bb 100644
--- a/fs/nilfs2/Makefile
+++ b/fs/nilfs2/Makefile
@@ -2,4 +2,4 @@ obj-$(CONFIG_NILFS2_FS) += nilfs2.o
nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \
btnode.o bmap.o btree.o direct.o dat.o recovery.o \
the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \
- ifile.o alloc.o gcinode.o ioctl.o
+ ifile.o alloc.o gcinode.o ioctl.o sysfs.o
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 9bc72dec3fa6..0696161bf59d 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -320,6 +320,14 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *);
int nilfs_init_gcinode(struct inode *inode);
void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs);
+/* sysfs.c */
+int __init nilfs_sysfs_init(void);
+void nilfs_sysfs_exit(void);
+int nilfs_sysfs_create_device_group(struct super_block *);
+void nilfs_sysfs_delete_device_group(struct the_nilfs *);
+int nilfs_sysfs_create_snapshot_group(struct nilfs_root *);
+void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *);
+
/*
* Inodes and files operations
*/
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8c532b2ca3ab..c519927b7b5e 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1452,13 +1452,19 @@ static int __init init_nilfs_fs(void)
if (err)
goto fail;
- err = register_filesystem(&nilfs_fs_type);
+ err = nilfs_sysfs_init();
if (err)
goto free_cachep;
+ err = register_filesystem(&nilfs_fs_type);
+ if (err)
+ goto deinit_sysfs_entry;
+
printk(KERN_INFO "NILFS version 2 loaded\n");
return 0;
+deinit_sysfs_entry:
+ nilfs_sysfs_exit();
free_cachep:
nilfs_destroy_cachep();
fail:
@@ -1468,6 +1474,7 @@ fail:
static void __exit exit_nilfs_fs(void)
{
nilfs_destroy_cachep();
+ nilfs_sysfs_exit();
unregister_filesystem(&nilfs_fs_type);
}
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
new file mode 100644
index 000000000000..bbb0dcc35905
--- /dev/null
+++ b/fs/nilfs2/sysfs.c
@@ -0,0 +1,1137 @@
+/*
+ * sysfs.c - sysfs support implementation.
+ *
+ * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
+ * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
+ */
+
+#include <linux/kobject.h>
+
+#include "nilfs.h"
+#include "mdt.h"
+#include "sufile.h"
+#include "cpfile.h"
+#include "sysfs.h"
+
+/* /sys/fs/<nilfs>/ */
+static struct kset *nilfs_kset;
+
+#define NILFS_SHOW_TIME(time_t_val, buf) ({ \
+ struct tm res; \
+ int count = 0; \
+ time_to_tm(time_t_val, 0, &res); \
+ res.tm_year += 1900; \
+ res.tm_mon += 1; \
+ count = scnprintf(buf, PAGE_SIZE, \
+ "%ld-%.2d-%.2d %.2d:%.2d:%.2d\n", \
+ res.tm_year, res.tm_mon, res.tm_mday, \
+ res.tm_hour, res.tm_min, res.tm_sec);\
+ count; \
+})
+
+#define NILFS_DEV_INT_GROUP_OPS(name, parent_name) \
+static ssize_t nilfs_##name##_attr_show(struct kobject *kobj, \
+ struct attribute *attr, char *buf) \
+{ \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ struct nilfs_##name##_attr *a = container_of(attr, \
+ struct nilfs_##name##_attr, \
+ attr); \
+ return a->show ? a->show(a, nilfs, buf) : 0; \
+} \
+static ssize_t nilfs_##name##_attr_store(struct kobject *kobj, \
+ struct attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ struct nilfs_##name##_attr *a = container_of(attr, \
+ struct nilfs_##name##_attr, \
+ attr); \
+ return a->store ? a->store(a, nilfs, buf, len) : 0; \
+} \
+static const struct sysfs_ops nilfs_##name##_attr_ops = { \
+ .show = nilfs_##name##_attr_show, \
+ .store = nilfs_##name##_attr_store, \
+};
+
+#define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \
+static void nilfs_##name##_attr_release(struct kobject *kobj) \
+{ \
+ struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ subgroups = nilfs->ns_##parent_name##_subgroups; \
+ complete(&subgroups->sg_##name##_kobj_unregister); \
+} \
+static struct kobj_type nilfs_##name##_ktype = { \
+ .default_attrs = nilfs_##name##_attrs, \
+ .sysfs_ops = &nilfs_##name##_attr_ops, \
+ .release = nilfs_##name##_attr_release, \
+};
+
+#define NILFS_DEV_INT_GROUP_FNS(name, parent_name) \
+static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \
+{ \
+ struct kobject *parent; \
+ struct kobject *kobj; \
+ struct completion *kobj_unregister; \
+ struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
+ int err; \
+ subgroups = nilfs->ns_##parent_name##_subgroups; \
+ kobj = &subgroups->sg_##name##_kobj; \
+ kobj_unregister = &subgroups->sg_##name##_kobj_unregister; \
+ parent = &nilfs->ns_##parent_name##_kobj; \
+ kobj->kset = nilfs_kset; \
+ init_completion(kobj_unregister); \
+ err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \
+ #name); \
+ if (err) \
+ return err; \
+ return 0; \
+} \
+static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \
+{ \
+ kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
+}
+
+/************************************************************************
+ * NILFS snapshot attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_snapshot_inodes_count_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)atomic64_read(&root->inodes_count));
+}
+
+static ssize_t
+nilfs_snapshot_blocks_count_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)atomic64_read(&root->blocks_count));
+}
+
+static const char snapshot_readme_str[] =
+ "The group contains details about mounted snapshot.\n\n"
+ "(1) inodes_count\n\tshow number of inodes for snapshot.\n\n"
+ "(2) blocks_count\n\tshow number of blocks for snapshot.\n\n";
+
+static ssize_t
+nilfs_snapshot_README_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, snapshot_readme_str);
+}
+
+NILFS_SNAPSHOT_RO_ATTR(inodes_count);
+NILFS_SNAPSHOT_RO_ATTR(blocks_count);
+NILFS_SNAPSHOT_RO_ATTR(README);
+
+static struct attribute *nilfs_snapshot_attrs[] = {
+ NILFS_SNAPSHOT_ATTR_LIST(inodes_count),
+ NILFS_SNAPSHOT_ATTR_LIST(blocks_count),
+ NILFS_SNAPSHOT_ATTR_LIST(README),
+ NULL,
+};
+
+static ssize_t nilfs_snapshot_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct nilfs_root *root =
+ container_of(kobj, struct nilfs_root, snapshot_kobj);
+ struct nilfs_snapshot_attr *a =
+ container_of(attr, struct nilfs_snapshot_attr, attr);
+
+ return a->show ? a->show(a, root, buf) : 0;
+}
+
+static ssize_t nilfs_snapshot_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct nilfs_root *root =
+ container_of(kobj, struct nilfs_root, snapshot_kobj);
+ struct nilfs_snapshot_attr *a =
+ container_of(attr, struct nilfs_snapshot_attr, attr);
+
+ return a->store ? a->store(a, root, buf, len) : 0;
+}
+
+static void nilfs_snapshot_attr_release(struct kobject *kobj)
+{
+ struct nilfs_root *root = container_of(kobj, struct nilfs_root,
+ snapshot_kobj);
+ complete(&root->snapshot_kobj_unregister);
+}
+
+static const struct sysfs_ops nilfs_snapshot_attr_ops = {
+ .show = nilfs_snapshot_attr_show,
+ .store = nilfs_snapshot_attr_store,
+};
+
+static struct kobj_type nilfs_snapshot_ktype = {
+ .default_attrs = nilfs_snapshot_attrs,
+ .sysfs_ops = &nilfs_snapshot_attr_ops,
+ .release = nilfs_snapshot_attr_release,
+};
+
+int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root)
+{
+ struct the_nilfs *nilfs;
+ struct kobject *parent;
+ int err;
+
+ nilfs = root->nilfs;
+ parent = &nilfs->ns_dev_subgroups->sg_mounted_snapshots_kobj;
+ root->snapshot_kobj.kset = nilfs_kset;
+ init_completion(&root->snapshot_kobj_unregister);
+
+ if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
+ err = kobject_init_and_add(&root->snapshot_kobj,
+ &nilfs_snapshot_ktype,
+ &nilfs->ns_dev_kobj,
+ "current_checkpoint");
+ } else {
+ err = kobject_init_and_add(&root->snapshot_kobj,
+ &nilfs_snapshot_ktype,
+ parent,
+ "%llu", root->cno);
+ }
+
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root)
+{
+ kobject_del(&root->snapshot_kobj);
+}
+
+/************************************************************************
+ * NILFS mounted snapshots attrs *
+ ************************************************************************/
+
+static const char mounted_snapshots_readme_str[] =
+ "The mounted_snapshots group contains group for\n"
+ "every mounted snapshot.\n";
+
+static ssize_t
+nilfs_mounted_snapshots_README_show(struct nilfs_mounted_snapshots_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, mounted_snapshots_readme_str);
+}
+
+NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(README);
+
+static struct attribute *nilfs_mounted_snapshots_attrs[] = {
+ NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(mounted_snapshots, dev);
+NILFS_DEV_INT_GROUP_TYPE(mounted_snapshots, dev);
+NILFS_DEV_INT_GROUP_FNS(mounted_snapshots, dev);
+
+/************************************************************************
+ * NILFS checkpoints attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_checkpoints_checkpoints_number_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 ncheckpoints;
+ struct nilfs_cpstat cpstat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
+ err);
+ return err;
+ }
+
+ ncheckpoints = cpstat.cs_ncps;
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", ncheckpoints);
+}
+
+static ssize_t
+nilfs_checkpoints_snapshots_number_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 nsnapshots;
+ struct nilfs_cpstat cpstat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
+ err);
+ return err;
+ }
+
+ nsnapshots = cpstat.cs_nsss;
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", nsnapshots);
+}
+
+static ssize_t
+nilfs_checkpoints_last_seg_checkpoint_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 last_cno;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_cno = nilfs->ns_last_cno;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
+}
+
+static ssize_t
+nilfs_checkpoints_next_checkpoint_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 cno;
+
+ down_read(&nilfs->ns_sem);
+ cno = nilfs->ns_cno;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
+}
+
+static const char checkpoints_readme_str[] =
+ "The checkpoints group contains attributes that describe\n"
+ "details about volume's checkpoints.\n\n"
+ "(1) checkpoints_number\n\tshow number of checkpoints on volume.\n\n"
+ "(2) snapshots_number\n\tshow number of snapshots on volume.\n\n"
+ "(3) last_seg_checkpoint\n"
+ "\tshow checkpoint number of the latest segment.\n\n"
+ "(4) next_checkpoint\n\tshow next checkpoint number.\n\n";
+
+static ssize_t
+nilfs_checkpoints_README_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, checkpoints_readme_str);
+}
+
+NILFS_CHECKPOINTS_RO_ATTR(checkpoints_number);
+NILFS_CHECKPOINTS_RO_ATTR(snapshots_number);
+NILFS_CHECKPOINTS_RO_ATTR(last_seg_checkpoint);
+NILFS_CHECKPOINTS_RO_ATTR(next_checkpoint);
+NILFS_CHECKPOINTS_RO_ATTR(README);
+
+static struct attribute *nilfs_checkpoints_attrs[] = {
+ NILFS_CHECKPOINTS_ATTR_LIST(checkpoints_number),
+ NILFS_CHECKPOINTS_ATTR_LIST(snapshots_number),
+ NILFS_CHECKPOINTS_ATTR_LIST(last_seg_checkpoint),
+ NILFS_CHECKPOINTS_ATTR_LIST(next_checkpoint),
+ NILFS_CHECKPOINTS_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(checkpoints, dev);
+NILFS_DEV_INT_GROUP_TYPE(checkpoints, dev);
+NILFS_DEV_INT_GROUP_FNS(checkpoints, dev);
+
+/************************************************************************
+ * NILFS segments attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_segments_segments_number_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_nsegments);
+}
+
+static ssize_t
+nilfs_segments_blocks_per_segment_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_blocks_per_segment);
+}
+
+static ssize_t
+nilfs_segments_clean_segments_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned long ncleansegs;
+
+ down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+ ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
+ up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n", ncleansegs);
+}
+
+static ssize_t
+nilfs_segments_dirty_segments_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_sustat sustat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ printk(KERN_ERR "NILFS: unable to get segment stat: err=%d\n",
+ err);
+ return err;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", sustat.ss_ndirtysegs);
+}
+
+static const char segments_readme_str[] =
+ "The segments group contains attributes that describe\n"
+ "details about volume's segments.\n\n"
+ "(1) segments_number\n\tshow number of segments on volume.\n\n"
+ "(2) blocks_per_segment\n\tshow number of blocks in segment.\n\n"
+ "(3) clean_segments\n\tshow count of clean segments.\n\n"
+ "(4) dirty_segments\n\tshow count of dirty segments.\n\n";
+
+static ssize_t
+nilfs_segments_README_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, segments_readme_str);
+}
+
+NILFS_SEGMENTS_RO_ATTR(segments_number);
+NILFS_SEGMENTS_RO_ATTR(blocks_per_segment);
+NILFS_SEGMENTS_RO_ATTR(clean_segments);
+NILFS_SEGMENTS_RO_ATTR(dirty_segments);
+NILFS_SEGMENTS_RO_ATTR(README);
+
+static struct attribute *nilfs_segments_attrs[] = {
+ NILFS_SEGMENTS_ATTR_LIST(segments_number),
+ NILFS_SEGMENTS_ATTR_LIST(blocks_per_segment),
+ NILFS_SEGMENTS_ATTR_LIST(clean_segments),
+ NILFS_SEGMENTS_ATTR_LIST(dirty_segments),
+ NILFS_SEGMENTS_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(segments, dev);
+NILFS_DEV_INT_GROUP_TYPE(segments, dev);
+NILFS_DEV_INT_GROUP_FNS(segments, dev);
+
+/************************************************************************
+ * NILFS segctor attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_segctor_last_pseg_block_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ sector_t last_pseg;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_pseg = nilfs->ns_last_pseg;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)last_pseg);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_sequence_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u64 last_seq;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_seq = nilfs->ns_last_seq;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", last_seq);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_checkpoint_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 last_cno;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_cno = nilfs->ns_last_cno;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
+}
+
+static ssize_t
+nilfs_segctor_current_seg_sequence_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u64 seg_seq;
+
+ down_read(&nilfs->ns_sem);
+ seg_seq = nilfs->ns_seg_seq;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", seg_seq);
+}
+
+static ssize_t
+nilfs_segctor_current_last_full_seg_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 segnum;
+
+ down_read(&nilfs->ns_sem);
+ segnum = nilfs->ns_segnum;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", segnum);
+}
+
+static ssize_t
+nilfs_segctor_next_full_seg_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 nextnum;
+
+ down_read(&nilfs->ns_sem);
+ nextnum = nilfs->ns_nextnum;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", nextnum);
+}
+
+static ssize_t
+nilfs_segctor_next_pseg_offset_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned long pseg_offset;
+
+ down_read(&nilfs->ns_sem);
+ pseg_offset = nilfs->ns_pseg_offset;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n", pseg_offset);
+}
+
+static ssize_t
+nilfs_segctor_next_checkpoint_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 cno;
+
+ down_read(&nilfs->ns_sem);
+ cno = nilfs->ns_cno;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t ctime;
+
+ down_read(&nilfs->ns_sem);
+ ctime = nilfs->ns_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return NILFS_SHOW_TIME(ctime, buf);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t ctime;
+
+ down_read(&nilfs->ns_sem);
+ ctime = nilfs->ns_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)ctime);
+}
+
+static ssize_t
+nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t nongc_ctime;
+
+ down_read(&nilfs->ns_sem);
+ nongc_ctime = nilfs->ns_nongc_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return NILFS_SHOW_TIME(nongc_ctime, buf);
+}
+
+static ssize_t
+nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t nongc_ctime;
+
+ down_read(&nilfs->ns_sem);
+ nongc_ctime = nilfs->ns_nongc_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)nongc_ctime);
+}
+
+static ssize_t
+nilfs_segctor_dirty_data_blocks_count_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u32 ndirtyblks;
+
+ down_read(&nilfs->ns_sem);
+ ndirtyblks = atomic_read(&nilfs->ns_ndirtyblks);
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", ndirtyblks);
+}
+
+static const char segctor_readme_str[] =
+ "The segctor group contains attributes that describe\n"
+ "segctor thread activity details.\n\n"
+ "(1) last_pseg_block\n"
+ "\tshow start block number of the latest segment.\n\n"
+ "(2) last_seg_sequence\n"
+ "\tshow sequence value of the latest segment.\n\n"
+ "(3) last_seg_checkpoint\n"
+ "\tshow checkpoint number of the latest segment.\n\n"
+ "(4) current_seg_sequence\n\tshow segment sequence counter.\n\n"
+ "(5) current_last_full_seg\n"
+ "\tshow index number of the latest full segment.\n\n"
+ "(6) next_full_seg\n"
+ "\tshow index number of the full segment index to be used next.\n\n"
+ "(7) next_pseg_offset\n"
+ "\tshow offset of next partial segment in the current full segment.\n\n"
+ "(8) next_checkpoint\n\tshow next checkpoint number.\n\n"
+ "(9) last_seg_write_time\n"
+ "\tshow write time of the last segment in human-readable format.\n\n"
+ "(10) last_seg_write_time_secs\n"
+ "\tshow write time of the last segment in seconds.\n\n"
+ "(11) last_nongc_write_time\n"
+ "\tshow write time of the last segment not for cleaner operation "
+ "in human-readable format.\n\n"
+ "(12) last_nongc_write_time_secs\n"
+ "\tshow write time of the last segment not for cleaner operation "
+ "in seconds.\n\n"
+ "(13) dirty_data_blocks_count\n"
+ "\tshow number of dirty data blocks.\n\n";
+
+static ssize_t
+nilfs_segctor_README_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, segctor_readme_str);
+}
+
+NILFS_SEGCTOR_RO_ATTR(last_pseg_block);
+NILFS_SEGCTOR_RO_ATTR(last_seg_sequence);
+NILFS_SEGCTOR_RO_ATTR(last_seg_checkpoint);
+NILFS_SEGCTOR_RO_ATTR(current_seg_sequence);
+NILFS_SEGCTOR_RO_ATTR(current_last_full_seg);
+NILFS_SEGCTOR_RO_ATTR(next_full_seg);
+NILFS_SEGCTOR_RO_ATTR(next_pseg_offset);
+NILFS_SEGCTOR_RO_ATTR(next_checkpoint);
+NILFS_SEGCTOR_RO_ATTR(last_seg_write_time);
+NILFS_SEGCTOR_RO_ATTR(last_seg_write_time_secs);
+NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time);
+NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time_secs);
+NILFS_SEGCTOR_RO_ATTR(dirty_data_blocks_count);
+NILFS_SEGCTOR_RO_ATTR(README);
+
+static struct attribute *nilfs_segctor_attrs[] = {
+ NILFS_SEGCTOR_ATTR_LIST(last_pseg_block),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_sequence),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_checkpoint),
+ NILFS_SEGCTOR_ATTR_LIST(current_seg_sequence),
+ NILFS_SEGCTOR_ATTR_LIST(current_last_full_seg),
+ NILFS_SEGCTOR_ATTR_LIST(next_full_seg),
+ NILFS_SEGCTOR_ATTR_LIST(next_pseg_offset),
+ NILFS_SEGCTOR_ATTR_LIST(next_checkpoint),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time_secs),
+ NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time),
+ NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time_secs),
+ NILFS_SEGCTOR_ATTR_LIST(dirty_data_blocks_count),
+ NILFS_SEGCTOR_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(segctor, dev);
+NILFS_DEV_INT_GROUP_TYPE(segctor, dev);
+NILFS_DEV_INT_GROUP_FNS(segctor, dev);
+
+/************************************************************************
+ * NILFS superblock attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t sbwtime;
+
+ down_read(&nilfs->ns_sem);
+ sbwtime = nilfs->ns_sbwtime;
+ up_read(&nilfs->ns_sem);
+
+ return NILFS_SHOW_TIME(sbwtime, buf);
+}
+
+static ssize_t
+nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t sbwtime;
+
+ down_read(&nilfs->ns_sem);
+ sbwtime = nilfs->ns_sbwtime;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)sbwtime);
+}
+
+static ssize_t
+nilfs_superblock_sb_write_count_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned sbwcount;
+
+ down_read(&nilfs->ns_sem);
+ sbwcount = nilfs->ns_sbwcount;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbwcount);
+}
+
+static ssize_t
+nilfs_superblock_sb_update_frequency_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned sb_update_freq;
+
+ down_read(&nilfs->ns_sem);
+ sb_update_freq = nilfs->ns_sb_update_freq;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sb_update_freq);
+}
+
+static ssize_t
+nilfs_superblock_sb_update_frequency_store(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ const char *buf, size_t count)
+{
+ unsigned val;
+ int err;
+
+ err = kstrtouint(skip_spaces(buf), 0, &val);
+ if (err) {
+ printk(KERN_ERR "NILFS: unable to convert string: err=%d\n",
+ err);
+ return err;
+ }
+
+ if (val < NILFS_SB_FREQ) {
+ val = NILFS_SB_FREQ;
+ printk(KERN_WARNING "NILFS: superblock update frequency cannot be lesser than 10 seconds\n");
+ }
+
+ down_write(&nilfs->ns_sem);
+ nilfs->ns_sb_update_freq = val;
+ up_write(&nilfs->ns_sem);
+
+ return count;
+}
+
+static const char sb_readme_str[] =
+ "The superblock group contains attributes that describe\n"
+ "superblock's details.\n\n"
+ "(1) sb_write_time\n\tshow previous write time of super block "
+ "in human-readable format.\n\n"
+ "(2) sb_write_time_secs\n\tshow previous write time of super block "
+ "in seconds.\n\n"
+ "(3) sb_write_count\n\tshow write count of super block.\n\n"
+ "(4) sb_update_frequency\n"
+ "\tshow/set interval of periodical update of superblock (in seconds).\n\n"
+ "\tYou can set preferable frequency of superblock update by command:\n\n"
+ "\t'echo <val> > /sys/fs/<nilfs>/<dev>/superblock/sb_update_frequency'\n";
+
+static ssize_t
+nilfs_superblock_README_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, sb_readme_str);
+}
+
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_time);
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_time_secs);
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_count);
+NILFS_SUPERBLOCK_RW_ATTR(sb_update_frequency);
+NILFS_SUPERBLOCK_RO_ATTR(README);
+
+static struct attribute *nilfs_superblock_attrs[] = {
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time_secs),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_count),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_update_frequency),
+ NILFS_SUPERBLOCK_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(superblock, dev);
+NILFS_DEV_INT_GROUP_TYPE(superblock, dev);
+NILFS_DEV_INT_GROUP_FNS(superblock, dev);
+
+/************************************************************************
+ * NILFS device attrs *
+ ************************************************************************/
+
+static
+ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ u32 major = le32_to_cpu(sbp[0]->s_rev_level);
+ u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level);
+
+ return snprintf(buf, PAGE_SIZE, "%d.%d\n", major, minor);
+}
+
+static
+ssize_t nilfs_dev_blocksize_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", nilfs->ns_blocksize);
+}
+
+static
+ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", dev_size);
+}
+
+static
+ssize_t nilfs_dev_free_blocks_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ sector_t free_blocks = 0;
+
+ nilfs_count_free_blocks(nilfs, &free_blocks);
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)free_blocks);
+}
+
+static
+ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+
+ return snprintf(buf, PAGE_SIZE, "%pUb\n", sbp[0]->s_uuid);
+}
+
+static
+ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+
+ return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n",
+ sbp[0]->s_volume_name);
+}
+
+static const char dev_readme_str[] =
+ "The <device> group contains attributes that describe file system\n"
+ "partition's details.\n\n"
+ "(1) revision\n\tshow NILFS file system revision.\n\n"
+ "(2) blocksize\n\tshow volume block size in bytes.\n\n"
+ "(3) device_size\n\tshow volume size in bytes.\n\n"
+ "(4) free_blocks\n\tshow count of free blocks on volume.\n\n"
+ "(5) uuid\n\tshow volume's UUID.\n\n"
+ "(6) volume_name\n\tshow volume's name.\n\n";
+
+static ssize_t nilfs_dev_README_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, dev_readme_str);
+}
+
+NILFS_DEV_RO_ATTR(revision);
+NILFS_DEV_RO_ATTR(blocksize);
+NILFS_DEV_RO_ATTR(device_size);
+NILFS_DEV_RO_ATTR(free_blocks);
+NILFS_DEV_RO_ATTR(uuid);
+NILFS_DEV_RO_ATTR(volume_name);
+NILFS_DEV_RO_ATTR(README);
+
+static struct attribute *nilfs_dev_attrs[] = {
+ NILFS_DEV_ATTR_LIST(revision),
+ NILFS_DEV_ATTR_LIST(blocksize),
+ NILFS_DEV_ATTR_LIST(device_size),
+ NILFS_DEV_ATTR_LIST(free_blocks),
+ NILFS_DEV_ATTR_LIST(uuid),
+ NILFS_DEV_ATTR_LIST(volume_name),
+ NILFS_DEV_ATTR_LIST(README),
+ NULL,
+};
+
+static ssize_t nilfs_dev_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
+ attr);
+
+ return a->show ? a->show(a, nilfs, buf) : 0;
+}
+
+static ssize_t nilfs_dev_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
+ attr);
+
+ return a->store ? a->store(a, nilfs, buf, len) : 0;
+}
+
+static void nilfs_dev_attr_release(struct kobject *kobj)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ complete(&nilfs->ns_dev_kobj_unregister);
+}
+
+static const struct sysfs_ops nilfs_dev_attr_ops = {
+ .show = nilfs_dev_attr_show,
+ .store = nilfs_dev_attr_store,
+};
+
+static struct kobj_type nilfs_dev_ktype = {
+ .default_attrs = nilfs_dev_attrs,
+ .sysfs_ops = &nilfs_dev_attr_ops,
+ .release = nilfs_dev_attr_release,
+};
+
+int nilfs_sysfs_create_device_group(struct super_block *sb)
+{
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ size_t devgrp_size = sizeof(struct nilfs_sysfs_dev_subgroups);
+ int err;
+
+ nilfs->ns_dev_subgroups = kzalloc(devgrp_size, GFP_KERNEL);
+ if (unlikely(!nilfs->ns_dev_subgroups)) {
+ err = -ENOMEM;
+ printk(KERN_ERR "NILFS: unable to allocate memory for device group\n");
+ goto failed_create_device_group;
+ }
+
+ nilfs->ns_dev_kobj.kset = nilfs_kset;
+ init_completion(&nilfs->ns_dev_kobj_unregister);
+ err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL,
+ "%s", sb->s_id);
+ if (err)
+ goto free_dev_subgroups;
+
+ err = nilfs_sysfs_create_mounted_snapshots_group(nilfs);
+ if (err)
+ goto cleanup_dev_kobject;
+
+ err = nilfs_sysfs_create_checkpoints_group(nilfs);
+ if (err)
+ goto delete_mounted_snapshots_group;
+
+ err = nilfs_sysfs_create_segments_group(nilfs);
+ if (err)
+ goto delete_checkpoints_group;
+
+ err = nilfs_sysfs_create_superblock_group(nilfs);
+ if (err)
+ goto delete_segments_group;
+
+ err = nilfs_sysfs_create_segctor_group(nilfs);
+ if (err)
+ goto delete_superblock_group;
+
+ return 0;
+
+delete_superblock_group:
+ nilfs_sysfs_delete_superblock_group(nilfs);
+
+delete_segments_group:
+ nilfs_sysfs_delete_segments_group(nilfs);
+
+delete_checkpoints_group:
+ nilfs_sysfs_delete_checkpoints_group(nilfs);
+
+delete_mounted_snapshots_group:
+ nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
+
+cleanup_dev_kobject:
+ kobject_del(&nilfs->ns_dev_kobj);
+
+free_dev_subgroups:
+ kfree(nilfs->ns_dev_subgroups);
+
+failed_create_device_group:
+ return err;
+}
+
+void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
+{
+ nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
+ nilfs_sysfs_delete_checkpoints_group(nilfs);
+ nilfs_sysfs_delete_segments_group(nilfs);
+ nilfs_sysfs_delete_superblock_group(nilfs);
+ nilfs_sysfs_delete_segctor_group(nilfs);
+ kobject_del(&nilfs->ns_dev_kobj);
+ kfree(nilfs->ns_dev_subgroups);
+}
+
+/************************************************************************
+ * NILFS feature attrs *
+ ************************************************************************/
+
+static ssize_t nilfs_feature_revision_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d.%d\n",
+ NILFS_CURRENT_REV, NILFS_MINOR_REV);
+}
+
+static const char features_readme_str[] =
+ "The features group contains attributes that describe NILFS file\n"
+ "system driver features.\n\n"
+ "(1) revision\n\tshow current revision of NILFS file system driver.\n";
+
+static ssize_t nilfs_feature_README_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, features_readme_str);
+}
+
+NILFS_FEATURE_RO_ATTR(revision);
+NILFS_FEATURE_RO_ATTR(README);
+
+static struct attribute *nilfs_feature_attrs[] = {
+ NILFS_FEATURE_ATTR_LIST(revision),
+ NILFS_FEATURE_ATTR_LIST(README),
+ NULL,
+};
+
+static const struct attribute_group nilfs_feature_attr_group = {
+ .name = "features",
+ .attrs = nilfs_feature_attrs,
+};
+
+int __init nilfs_sysfs_init(void)
+{
+ int err;
+
+ nilfs_kset = kset_create_and_add(NILFS_ROOT_GROUP_NAME, NULL, fs_kobj);
+ if (!nilfs_kset) {
+ err = -ENOMEM;
+ printk(KERN_ERR "NILFS: unable to create sysfs entry: err %d\n",
+ err);
+ goto failed_sysfs_init;
+ }
+
+ err = sysfs_create_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
+ if (unlikely(err)) {
+ printk(KERN_ERR "NILFS: unable to create feature group: err %d\n",
+ err);
+ goto cleanup_sysfs_init;
+ }
+
+ return 0;
+
+cleanup_sysfs_init:
+ kset_unregister(nilfs_kset);
+
+failed_sysfs_init:
+ return err;
+}
+
+void nilfs_sysfs_exit(void)
+{
+ sysfs_remove_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
+ kset_unregister(nilfs_kset);
+}
diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h
new file mode 100644
index 000000000000..677e3a1a8370
--- /dev/null
+++ b/fs/nilfs2/sysfs.h
@@ -0,0 +1,176 @@
+/*
+ * sysfs.h - sysfs support declarations.
+ *
+ * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
+ * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
+ */
+
+#ifndef _NILFS_SYSFS_H
+#define _NILFS_SYSFS_H
+
+#include <linux/sysfs.h>
+
+#define NILFS_ROOT_GROUP_NAME "nilfs2"
+
+/*
+ * struct nilfs_sysfs_dev_subgroups - device subgroup kernel objects
+ * @sg_superblock_kobj: /sys/fs/<nilfs>/<device>/superblock
+ * @sg_superblock_kobj_unregister: completion state
+ * @sg_segctor_kobj: /sys/fs/<nilfs>/<device>/segctor
+ * @sg_segctor_kobj_unregister: completion state
+ * @sg_mounted_snapshots_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots
+ * @sg_mounted_snapshots_kobj_unregister: completion state
+ * @sg_checkpoints_kobj: /sys/fs/<nilfs>/<device>/checkpoints
+ * @sg_checkpoints_kobj_unregister: completion state
+ * @sg_segments_kobj: /sys/fs/<nilfs>/<device>/segments
+ * @sg_segments_kobj_unregister: completion state
+ */
+struct nilfs_sysfs_dev_subgroups {
+ /* /sys/fs/<nilfs>/<device>/superblock */
+ struct kobject sg_superblock_kobj;
+ struct completion sg_superblock_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/segctor */
+ struct kobject sg_segctor_kobj;
+ struct completion sg_segctor_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/mounted_snapshots */
+ struct kobject sg_mounted_snapshots_kobj;
+ struct completion sg_mounted_snapshots_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/checkpoints */
+ struct kobject sg_checkpoints_kobj;
+ struct completion sg_checkpoints_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/segments */
+ struct kobject sg_segments_kobj;
+ struct completion sg_segments_kobj_unregister;
+};
+
+#define NILFS_COMMON_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct kobject *, struct attribute *, \
+ char *); \
+ ssize_t (*store)(struct kobject *, struct attribute *, \
+ const char *, size_t); \
+};
+
+NILFS_COMMON_ATTR_STRUCT(feature);
+
+#define NILFS_DEV_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct nilfs_##name##_attr *, struct the_nilfs *, \
+ char *); \
+ ssize_t (*store)(struct nilfs_##name##_attr *, struct the_nilfs *, \
+ const char *, size_t); \
+};
+
+NILFS_DEV_ATTR_STRUCT(dev);
+NILFS_DEV_ATTR_STRUCT(segments);
+NILFS_DEV_ATTR_STRUCT(mounted_snapshots);
+NILFS_DEV_ATTR_STRUCT(checkpoints);
+NILFS_DEV_ATTR_STRUCT(superblock);
+NILFS_DEV_ATTR_STRUCT(segctor);
+
+#define NILFS_CP_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct nilfs_##name##_attr *, struct nilfs_root *, \
+ char *); \
+ ssize_t (*store)(struct nilfs_##name##_attr *, struct nilfs_root *, \
+ const char *, size_t); \
+};
+
+NILFS_CP_ATTR_STRUCT(snapshot);
+
+#define NILFS_ATTR(type, name, mode, show, store) \
+ static struct nilfs_##type##_attr nilfs_##type##_attr_##name = \
+ __ATTR(name, mode, show, store)
+
+#define NILFS_INFO_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0444, NULL, NULL)
+#define NILFS_RO_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0444, nilfs_##type##_##name##_show, NULL)
+#define NILFS_RW_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0644, \
+ nilfs_##type##_##name##_show, \
+ nilfs_##type##_##name##_store)
+
+#define NILFS_FEATURE_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(feature, name)
+#define NILFS_FEATURE_RO_ATTR(name) \
+ NILFS_RO_ATTR(feature, name)
+#define NILFS_FEATURE_RW_ATTR(name) \
+ NILFS_RW_ATTR(feature, name)
+
+#define NILFS_DEV_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(dev, name)
+#define NILFS_DEV_RO_ATTR(name) \
+ NILFS_RO_ATTR(dev, name)
+#define NILFS_DEV_RW_ATTR(name) \
+ NILFS_RW_ATTR(dev, name)
+
+#define NILFS_SEGMENTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(segments, name)
+#define NILFS_SEGMENTS_RW_ATTR(name) \
+ NILFS_RW_ATTR(segs_info, name)
+
+#define NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(mounted_snapshots, name)
+
+#define NILFS_CHECKPOINTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(checkpoints, name)
+#define NILFS_CHECKPOINTS_RW_ATTR(name) \
+ NILFS_RW_ATTR(checkpoints, name)
+
+#define NILFS_SNAPSHOT_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(snapshot, name)
+#define NILFS_SNAPSHOT_RO_ATTR(name) \
+ NILFS_RO_ATTR(snapshot, name)
+#define NILFS_SNAPSHOT_RW_ATTR(name) \
+ NILFS_RW_ATTR(snapshot, name)
+
+#define NILFS_SUPERBLOCK_RO_ATTR(name) \
+ NILFS_RO_ATTR(superblock, name)
+#define NILFS_SUPERBLOCK_RW_ATTR(name) \
+ NILFS_RW_ATTR(superblock, name)
+
+#define NILFS_SEGCTOR_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(segctor, name)
+#define NILFS_SEGCTOR_RO_ATTR(name) \
+ NILFS_RO_ATTR(segctor, name)
+#define NILFS_SEGCTOR_RW_ATTR(name) \
+ NILFS_RW_ATTR(segctor, name)
+
+#define NILFS_FEATURE_ATTR_LIST(name) \
+ (&nilfs_feature_attr_##name.attr)
+#define NILFS_DEV_ATTR_LIST(name) \
+ (&nilfs_dev_attr_##name.attr)
+#define NILFS_SEGMENTS_ATTR_LIST(name) \
+ (&nilfs_segments_attr_##name.attr)
+#define NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(name) \
+ (&nilfs_mounted_snapshots_attr_##name.attr)
+#define NILFS_CHECKPOINTS_ATTR_LIST(name) \
+ (&nilfs_checkpoints_attr_##name.attr)
+#define NILFS_SNAPSHOT_ATTR_LIST(name) \
+ (&nilfs_snapshot_attr_##name.attr)
+#define NILFS_SUPERBLOCK_ATTR_LIST(name) \
+ (&nilfs_superblock_attr_##name.attr)
+#define NILFS_SEGCTOR_ATTR_LIST(name) \
+ (&nilfs_segctor_attr_##name.attr)
+
+#endif /* _NILFS_SYSFS_H */
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 8ba8229ba076..9da25fe9ea61 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -85,6 +85,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
nilfs->ns_cptree = RB_ROOT;
spin_lock_init(&nilfs->ns_cptree_lock);
init_rwsem(&nilfs->ns_segctor_sem);
+ nilfs->ns_sb_update_freq = NILFS_SB_FREQ;
return nilfs;
}
@@ -97,6 +98,7 @@ void destroy_nilfs(struct the_nilfs *nilfs)
{
might_sleep();
if (nilfs_init(nilfs)) {
+ nilfs_sysfs_delete_device_group(nilfs);
brelse(nilfs->ns_sbh[0]);
brelse(nilfs->ns_sbh[1]);
}
@@ -640,6 +642,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto failed_sbh;
+ err = nilfs_sysfs_create_device_group(sb);
+ if (err)
+ goto failed_sbh;
+
set_nilfs_init(nilfs);
err = 0;
out:
@@ -740,12 +746,13 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
{
struct rb_node **p, *parent;
struct nilfs_root *root, *new;
+ int err;
root = nilfs_lookup_root(nilfs, cno);
if (root)
return root;
- new = kmalloc(sizeof(*root), GFP_KERNEL);
+ new = kzalloc(sizeof(*root), GFP_KERNEL);
if (!new)
return NULL;
@@ -782,6 +789,12 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
spin_unlock(&nilfs->ns_cptree_lock);
+ err = nilfs_sysfs_create_snapshot_group(new);
+ if (err) {
+ kfree(new);
+ new = NULL;
+ }
+
return new;
}
@@ -790,6 +803,8 @@ void nilfs_put_root(struct nilfs_root *root)
if (atomic_dec_and_test(&root->count)) {
struct the_nilfs *nilfs = root->nilfs;
+ nilfs_sysfs_delete_snapshot_group(root);
+
spin_lock(&nilfs->ns_cptree_lock);
rb_erase(&root->rb_node, &nilfs->ns_cptree);
spin_unlock(&nilfs->ns_cptree_lock);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index de8cc53b4a5c..d01ead1bea9a 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -33,6 +33,7 @@
#include <linux/slab.h>
struct nilfs_sc_info;
+struct nilfs_sysfs_dev_subgroups;
/* the_nilfs struct */
enum {
@@ -54,6 +55,7 @@ enum {
* @ns_sbwcount: write count of super block
* @ns_sbsize: size of valid data in super block
* @ns_mount_state: file system state
+ * @ns_sb_update_freq: interval of periodical update of superblocks (in seconds)
* @ns_seg_seq: segment sequence counter
* @ns_segnum: index number of the latest full segment.
* @ns_nextnum: index number of the full segment index to be used next
@@ -95,6 +97,9 @@ enum {
* @ns_inode_size: size of on-disk inode
* @ns_first_ino: first not-special inode number
* @ns_crc_seed: seed value of CRC32 calculation
+ * @ns_dev_kobj: /sys/fs/<nilfs>/<device>
+ * @ns_dev_kobj_unregister: completion state
+ * @ns_dev_subgroups: <device> subgroups pointer
*/
struct the_nilfs {
unsigned long ns_flags;
@@ -114,6 +119,7 @@ struct the_nilfs {
unsigned ns_sbwcount;
unsigned ns_sbsize;
unsigned ns_mount_state;
+ unsigned ns_sb_update_freq;
/*
* Following fields are dedicated to a writable FS-instance.
@@ -188,6 +194,11 @@ struct the_nilfs {
int ns_inode_size;
int ns_first_ino;
u32 ns_crc_seed;
+
+ /* /sys/fs/<nilfs>/<device> */
+ struct kobject ns_dev_kobj;
+ struct completion ns_dev_kobj_unregister;
+ struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups;
};
#define THE_NILFS_FNS(bit, name) \
@@ -232,6 +243,8 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty)
* @ifile: inode file
* @inodes_count: number of inodes
* @blocks_count: number of blocks
+ * @snapshot_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot>
+ * @snapshot_kobj_unregister: completion state for kernel object
*/
struct nilfs_root {
__u64 cno;
@@ -243,6 +256,10 @@ struct nilfs_root {
atomic64_t inodes_count;
atomic64_t blocks_count;
+
+ /* /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> */
+ struct kobject snapshot_kobj;
+ struct completion snapshot_kobj_unregister;
};
/* Special checkpoint number */
@@ -254,7 +271,8 @@ struct nilfs_root {
static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
{
u64 t = get_seconds();
- return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ;
+ return t < nilfs->ns_sbwtime ||
+ t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq;
}
static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index ee9cb3795c2b..30d3addfad75 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -70,8 +70,15 @@ static int fanotify_get_response(struct fsnotify_group *group,
wait_event(group->fanotify_data.access_waitq, event->response ||
atomic_read(&group->fanotify_data.bypass_perm));
- if (!event->response) /* bypass_perm set */
+ if (!event->response) { /* bypass_perm set */
+ /*
+ * Event was canceled because group is being destroyed. Remove
+ * it from group's event list because we are responsible for
+ * freeing the permission event.
+ */
+ fsnotify_remove_event(group, &event->fae.fse);
return 0;
+ }
/* userspace responded, convert to something usable */
switch (event->response) {
@@ -210,7 +217,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
return -ENOMEM;
fsn_event = &event->fse;
- ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge);
+ ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
if (ret) {
/* Permission events shouldn't be merged */
BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 3fdc8a3e1134..b13992a41bd9 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -66,7 +66,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
/* held the notification_mutex the whole time, so this is the
* same event we peeked above */
- return fsnotify_remove_notify_event(group);
+ return fsnotify_remove_first_event(group);
}
static int create_fd(struct fsnotify_group *group,
@@ -359,6 +359,11 @@ static int fanotify_release(struct inode *ignored, struct file *file)
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
struct fanotify_perm_event_info *event, *next;
+ /*
+ * There may be still new events arriving in the notification queue
+ * but since userspace cannot use fanotify fd anymore, no event can
+ * enter or leave access_list by now.
+ */
spin_lock(&group->fanotify_data.access_lock);
atomic_inc(&group->fanotify_data.bypass_perm);
@@ -373,6 +378,13 @@ static int fanotify_release(struct inode *ignored, struct file *file)
}
spin_unlock(&group->fanotify_data.access_lock);
+ /*
+ * Since bypass_perm is set, newly queued events will not wait for
+ * access response. Wake up the already sleeping ones now.
+ * synchronize_srcu() in fsnotify_destroy_group() will wait for all
+ * processes sleeping in fanotify_handle_event() waiting for access
+ * response and thus also for all permission events to be freed.
+ */
wake_up(&group->fanotify_data.access_waitq);
#endif
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 74825be65b7b..9ce062218de9 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -232,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
- hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
+ hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list);
out:
fsnotify_recalc_inode_mask_locked(inode);
spin_unlock(&inode->i_lock);
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 43ab1e1a07a2..0f88bc0b4e6c 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -108,7 +108,7 @@ int inotify_handle_event(struct fsnotify_group *group,
if (len)
strcpy(event->name, file_name);
- ret = fsnotify_add_notify_event(group, fsn_event, inotify_merge);
+ ret = fsnotify_add_event(group, fsn_event, inotify_merge);
if (ret) {
/* Our event wasn't used in the end. Free it. */
fsnotify_destroy_event(group, fsn_event);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index cc423a30a0c8..daf76652fe58 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -149,7 +149,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
if (fsnotify_notify_queue_is_empty(group))
return NULL;
- event = fsnotify_peek_notify_event(group);
+ event = fsnotify_peek_first_event(group);
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
@@ -159,7 +159,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
/* held the notification_mutex the whole time, so this is the
* same event we peeked above */
- fsnotify_remove_notify_event(group);
+ fsnotify_remove_first_event(group);
return event;
}
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 1e58402171a5..a95d8e037aeb 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -73,7 +73,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
/* Overflow events are per-group and we don't want to free them */
if (!event || event->mask == FS_Q_OVERFLOW)
return;
-
+ /* If the event is still queued, we have a problem... */
+ WARN_ON(!list_empty(&event->list));
group->ops->free_event(event);
}
@@ -83,10 +84,10 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
* added to the queue, 1 if the event was merged with some other queued event,
* 2 if the queue of events has overflown.
*/
-int fsnotify_add_notify_event(struct fsnotify_group *group,
- struct fsnotify_event *event,
- int (*merge)(struct list_head *,
- struct fsnotify_event *))
+int fsnotify_add_event(struct fsnotify_group *group,
+ struct fsnotify_event *event,
+ int (*merge)(struct list_head *,
+ struct fsnotify_event *))
{
int ret = 0;
struct list_head *list = &group->notification_list;
@@ -125,10 +126,25 @@ queue:
}
/*
+ * Remove @event from group's notification queue. It is the responsibility of
+ * the caller to destroy the event.
+ */
+void fsnotify_remove_event(struct fsnotify_group *group,
+ struct fsnotify_event *event)
+{
+ mutex_lock(&group->notification_mutex);
+ if (!list_empty(&event->list)) {
+ list_del_init(&event->list);
+ group->q_len--;
+ }
+ mutex_unlock(&group->notification_mutex);
+}
+
+/*
* Remove and return the first event from the notification list. It is the
* responsibility of the caller to destroy the obtained event
*/
-struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
+struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
{
struct fsnotify_event *event;
@@ -140,7 +156,7 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
struct fsnotify_event, list);
/*
* We need to init list head for the case of overflow event so that
- * check in fsnotify_add_notify_events() works
+ * check in fsnotify_add_event() works
*/
list_del_init(&event->list);
group->q_len--;
@@ -149,9 +165,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
}
/*
- * This will not remove the event, that must be done with fsnotify_remove_notify_event()
+ * This will not remove the event, that must be done with
+ * fsnotify_remove_first_event()
*/
-struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
+struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
{
BUG_ON(!mutex_is_locked(&group->notification_mutex));
@@ -169,7 +186,7 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
mutex_lock(&group->notification_mutex);
while (!fsnotify_notify_queue_is_empty(group)) {
- event = fsnotify_remove_notify_event(group);
+ event = fsnotify_remove_first_event(group);
fsnotify_destroy_event(group, event);
}
mutex_unlock(&group->notification_mutex);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 68ca5a8704b5..ac851e8376b1 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -191,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
- hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
+ hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list);
out:
fsnotify_recalc_vfsmount_mask_locked(mnt);
spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5c9e2c81cb11..f5ec1ce7a532 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -74,8 +74,6 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
* ntfs_attr_extend_initialized - extend the initialized size of an attribute
* @ni: ntfs inode of the attribute to extend
* @new_init_size: requested new initialized size in bytes
- * @cached_page: store any allocated but unused page here
- * @lru_pvec: lru-buffering pagevec of the caller
*
* Extend the initialized size of an attribute described by the ntfs inode @ni
* to @new_init_size bytes. This involves zeroing any non-sparse space between
@@ -395,7 +393,6 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
* @nr_pages: number of page cache pages to obtain
* @pages: array of pages in which to return the obtained page cache pages
* @cached_page: allocated but as yet unused page
- * @lru_pvec: lru-buffering pagevec of caller
*
* Obtain @nr_pages locked page cache pages from the mapping @mapping and
* starting at index @index.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9d8fcf2f3b94..a93bf9892256 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4961,6 +4961,15 @@ leftright:
el = path_leaf_el(path);
split_index = ocfs2_search_extent_list(el, cpos);
+ if (split_index == -1) {
+ ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+ "Owner %llu has an extent at cpos %u "
+ "which can no longer be found.\n",
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ cpos);
+ ret = -EROFS;
+ goto out;
+ }
goto leftright;
}
out:
@@ -5135,7 +5144,7 @@ int ocfs2_change_extent_flag(handle_t *handle,
el = path_leaf_el(left_path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(sb,
"Owner %llu has an extent at cpos %u which can no "
"longer be found.\n",
@@ -5491,7 +5500,7 @@ int ocfs2_remove_extent(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
"Owner %llu has an extent at cpos %u which can no "
"longer be found.\n",
@@ -5557,7 +5566,7 @@ int ocfs2_remove_extent(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
"Owner %llu: split at cpos %u lost record.",
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index a106b3f2b22a..fae17c640df3 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -331,6 +331,7 @@ struct dlm_lock_resource
u16 state;
char lvb[DLM_LVB_LEN];
unsigned int inflight_locks;
+ unsigned int inflight_assert_workers;
unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
};
@@ -910,6 +911,9 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res);
+void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res);
+
void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 39efc5057a36..3fcf205ee900 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1923,12 +1923,11 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
goto bail;
}
- if (total_backoff >
- msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
+ if (total_backoff > DLM_JOIN_TIMEOUT_MSECS) {
status = -ERESTARTSYS;
mlog(ML_NOTICE, "Timed out joining dlm domain "
"%s after %u msecs\n", dlm->name,
- jiffies_to_msecs(total_backoff));
+ total_backoff);
goto bail;
}
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 3087a21d32f9..3ec906ef5d9a 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -581,6 +581,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
atomic_set(&res->asts_reserved, 0);
res->migration_pending = 0;
res->inflight_locks = 0;
+ res->inflight_assert_workers = 0;
res->dlm = dlm;
@@ -683,6 +684,43 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
wake_up(&res->wq);
}
+void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res)
+{
+ assert_spin_locked(&res->spinlock);
+ res->inflight_assert_workers++;
+ mlog(0, "%s:%.*s: inflight assert worker++: now %u\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ res->inflight_assert_workers);
+}
+
+static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res)
+{
+ spin_lock(&res->spinlock);
+ __dlm_lockres_grab_inflight_worker(dlm, res);
+ spin_unlock(&res->spinlock);
+}
+
+static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res)
+{
+ assert_spin_locked(&res->spinlock);
+ BUG_ON(res->inflight_assert_workers == 0);
+ res->inflight_assert_workers--;
+ mlog(0, "%s:%.*s: inflight assert worker--: now %u\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ res->inflight_assert_workers);
+}
+
+static void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res)
+{
+ spin_lock(&res->spinlock);
+ __dlm_lockres_drop_inflight_worker(dlm, res);
+ spin_unlock(&res->spinlock);
+}
+
/*
* lookup a lock resource by name.
* may already exist in the hashtable.
@@ -1603,7 +1641,8 @@ send_response:
mlog(ML_ERROR, "failed to dispatch assert master work\n");
response = DLM_MASTER_RESP_ERROR;
dlm_lockres_put(res);
- }
+ } else
+ dlm_lockres_grab_inflight_worker(dlm, res);
} else {
if (res)
dlm_lockres_put(res);
@@ -2118,6 +2157,8 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
dlm_lockres_release_ast(dlm, res);
put:
+ dlm_lockres_drop_inflight_worker(dlm, res);
+
dlm_lockres_put(res);
mlog(0, "finished with dlm_assert_master_worker\n");
@@ -2364,6 +2405,10 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
if (res->state & DLM_LOCK_RES_MIGRATING)
return 0;
+ /* delay migration when the lockres is in RECOCERING state */
+ if (res->state & DLM_LOCK_RES_RECOVERING)
+ return 0;
+
if (res->owner != dlm->node_num)
return 0;
@@ -3088,11 +3133,15 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
/* remove it so that only one mle will be found */
__dlm_unlink_mle(dlm, tmp);
__dlm_mle_detach_hb_events(dlm, tmp);
- ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
- mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
- "telling master to get ref for cleared out mle "
- "during migration\n", dlm->name, namelen, name,
- master, new_master);
+ if (tmp->type == DLM_MLE_MASTER) {
+ ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
+ mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
+ "telling master to get ref "
+ "for cleared out mle during "
+ "migration\n", dlm->name,
+ namelen, name, master,
+ new_master);
+ }
}
spin_unlock(&tmp->spinlock);
}
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 5de019437ea5..45067faf5695 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1708,7 +1708,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
mlog_errno(-ENOMEM);
/* retry!? */
BUG();
- }
+ } else
+ __dlm_lockres_grab_inflight_worker(dlm, res);
} else /* put.. incase we are not the master */
dlm_lockres_put(res);
spin_unlock(&res->spinlock);
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 9db869de829d..69aac6f088ad 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -259,12 +259,15 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
* refs on it. */
unused = __dlm_lockres_unused(lockres);
if (!unused ||
- (lockres->state & DLM_LOCK_RES_MIGRATING)) {
+ (lockres->state & DLM_LOCK_RES_MIGRATING) ||
+ (lockres->inflight_assert_workers != 0)) {
mlog(0, "%s: res %.*s is in use or being remastered, "
- "used %d, state %d\n", dlm->name,
- lockres->lockname.len, lockres->lockname.name,
- !unused, lockres->state);
- list_move_tail(&dlm->purge_list, &lockres->purge);
+ "used %d, state %d, assert master workers %u\n",
+ dlm->name, lockres->lockname.len,
+ lockres->lockname.name,
+ !unused, lockres->state,
+ lockres->inflight_assert_workers);
+ list_move_tail(&lockres->purge, &dlm->purge_list);
spin_unlock(&lockres->spinlock);
continue;
}
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 5698b52cf5c9..2e3c9dbab68c 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -191,7 +191,9 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
DLM_UNLOCK_CLEAR_CONVERT_TYPE);
} else if (status == DLM_RECOVERING ||
status == DLM_MIGRATING ||
- status == DLM_FORWARD) {
+ status == DLM_FORWARD ||
+ status == DLM_NOLOCKMGR
+ ) {
/* must clear the actions because this unlock
* is about to be retried. cannot free or do
* any list manipulation. */
@@ -200,7 +202,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
res->lockname.name,
status==DLM_RECOVERING?"recovering":
(status==DLM_MIGRATING?"migrating":
- "forward"));
+ (status == DLM_FORWARD ? "forward" :
+ "nolockmanager")));
actions = 0;
}
if (flags & LKM_CANCEL)
@@ -364,7 +367,10 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
* updated state to the recovery master. this thread
* just needs to finish out the operation and call
* the unlockast. */
- ret = DLM_NORMAL;
+ if (dlm_is_node_dead(dlm, owner))
+ ret = DLM_NORMAL;
+ else
+ ret = DLM_NOLOCKMGR;
} else {
/* something bad. this will BUG in ocfs2 */
ret = dlm_err_to_dlm_status(tmpret);
@@ -638,7 +644,9 @@ retry:
if (status == DLM_RECOVERING ||
status == DLM_MIGRATING ||
- status == DLM_FORWARD) {
+ status == DLM_FORWARD ||
+ status == DLM_NOLOCKMGR) {
+
/* We want to go away for a tiny bit to allow recovery
* / migration to complete on this resource. I don't
* know of any wait queue we could sleep on as this
@@ -650,7 +658,7 @@ retry:
msleep(50);
mlog(0, "retrying unlock due to pending recovery/"
- "migration/in-progress\n");
+ "migration/in-progress/reconnect\n");
goto retry;
}
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 599eb4c4c8be..6219aaadeb08 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -98,7 +98,7 @@ static int __ocfs2_move_extent(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(inode->i_sb,
"Inode %llu has an extent at cpos %u which can no "
"longer be found.\n",
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2060fc398445..8add6f1030d7 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -205,6 +205,21 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
return inode;
}
+static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb,
+ struct dentry *dentry, struct inode *inode)
+{
+ struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
+
+ ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
+ ocfs2_lock_res_free(&dl->dl_lockres);
+ BUG_ON(dl->dl_count != 1);
+ spin_lock(&dentry_attach_lock);
+ dentry->d_fsdata = NULL;
+ spin_unlock(&dentry_attach_lock);
+ kfree(dl);
+ iput(inode);
+}
+
static int ocfs2_mknod(struct inode *dir,
struct dentry *dentry,
umode_t mode,
@@ -231,6 +246,7 @@ static int ocfs2_mknod(struct inode *dir,
sigset_t oldset;
int did_block_signals = 0;
struct posix_acl *default_acl = NULL, *acl = NULL;
+ struct ocfs2_dentry_lock *dl = NULL;
trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
(unsigned long long)OCFS2_I(dir)->ip_blkno,
@@ -423,6 +439,8 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
+ dl = dentry->d_fsdata;
+
status = ocfs2_add_entry(handle, dentry, inode,
OCFS2_I(inode)->ip_blkno, parent_fe_bh,
&lookup);
@@ -469,6 +487,9 @@ leave:
* ocfs2_delete_inode will mutex_lock again.
*/
if ((status < 0) && inode) {
+ if (dl)
+ ocfs2_cleanup_add_entry_failure(osb, dentry, inode);
+
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
clear_nlink(inode);
iput(inode);
@@ -991,6 +1012,65 @@ leave:
return status;
}
+static int ocfs2_check_if_ancestor(struct ocfs2_super *osb,
+ u64 src_inode_no, u64 dest_inode_no)
+{
+ int ret = 0, i = 0;
+ u64 parent_inode_no = 0;
+ u64 child_inode_no = src_inode_no;
+ struct inode *child_inode;
+
+#define MAX_LOOKUP_TIMES 32
+ while (1) {
+ child_inode = ocfs2_iget(osb, child_inode_no, 0, 0);
+ if (IS_ERR(child_inode)) {
+ ret = PTR_ERR(child_inode);
+ break;
+ }
+
+ ret = ocfs2_inode_lock(child_inode, NULL, 0);
+ if (ret < 0) {
+ iput(child_inode);
+ if (ret != -ENOENT)
+ mlog_errno(ret);
+ break;
+ }
+
+ ret = ocfs2_lookup_ino_from_name(child_inode, "..", 2,
+ &parent_inode_no);
+ ocfs2_inode_unlock(child_inode, 0);
+ iput(child_inode);
+ if (ret < 0) {
+ ret = -ENOENT;
+ break;
+ }
+
+ if (parent_inode_no == dest_inode_no) {
+ ret = 1;
+ break;
+ }
+
+ if (parent_inode_no == osb->root_inode->i_ino) {
+ ret = 0;
+ break;
+ }
+
+ child_inode_no = parent_inode_no;
+
+ if (++i >= MAX_LOOKUP_TIMES) {
+ mlog(ML_NOTICE, "max lookup times reached, filesystem "
+ "may have nested directories, "
+ "src inode: %llu, dest inode: %llu.\n",
+ (unsigned long long)src_inode_no,
+ (unsigned long long)dest_inode_no);
+ ret = 0;
+ break;
+ }
+ }
+
+ return ret;
+}
+
/*
* The only place this should be used is rename!
* if they have the same id, then the 1st one is the only one locked.
@@ -1002,6 +1082,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
struct inode *inode2)
{
int status;
+ int inode1_is_ancestor, inode2_is_ancestor;
struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
struct buffer_head **tmpbh;
@@ -1015,9 +1096,26 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
if (*bh2)
*bh2 = NULL;
- /* we always want to lock the one with the lower lockid first. */
+ /* we always want to lock the one with the lower lockid first.
+ * and if they are nested, we lock ancestor first */
if (oi1->ip_blkno != oi2->ip_blkno) {
- if (oi1->ip_blkno < oi2->ip_blkno) {
+ inode1_is_ancestor = ocfs2_check_if_ancestor(osb, oi2->ip_blkno,
+ oi1->ip_blkno);
+ if (inode1_is_ancestor < 0) {
+ status = inode1_is_ancestor;
+ goto bail;
+ }
+
+ inode2_is_ancestor = ocfs2_check_if_ancestor(osb, oi1->ip_blkno,
+ oi2->ip_blkno);
+ if (inode2_is_ancestor < 0) {
+ status = inode2_is_ancestor;
+ goto bail;
+ }
+
+ if ((inode1_is_ancestor == 1) ||
+ (oi1->ip_blkno < oi2->ip_blkno &&
+ inode2_is_ancestor == 0)) {
/* switch id1 and id2 around */
tmpbh = bh2;
bh2 = bh1;
@@ -1098,6 +1196,7 @@ static int ocfs2_rename(struct inode *old_dir,
struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, };
struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
struct ocfs2_dir_lookup_result target_insert = { NULL, };
+ bool should_add_orphan = false;
/* At some point it might be nice to break this function up a
* bit. */
@@ -1134,6 +1233,21 @@ static int ocfs2_rename(struct inode *old_dir,
goto bail;
}
rename_lock = 1;
+
+ /* here we cannot guarantee the inodes haven't just been
+ * changed, so check if they are nested again */
+ status = ocfs2_check_if_ancestor(osb, new_dir->i_ino,
+ old_inode->i_ino);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ } else if (status == 1) {
+ status = -EPERM;
+ trace_ocfs2_rename_not_permitted(
+ (unsigned long long)old_inode->i_ino,
+ (unsigned long long)new_dir->i_ino);
+ goto bail;
+ }
}
/* if old and new are the same, this'll just do one lock. */
@@ -1304,6 +1418,7 @@ static int ocfs2_rename(struct inode *old_dir,
mlog_errno(status);
goto bail;
}
+ should_add_orphan = true;
}
} else {
BUG_ON(new_dentry->d_parent->d_inode != new_dir);
@@ -1348,17 +1463,6 @@ static int ocfs2_rename(struct inode *old_dir,
goto bail;
}
- if (S_ISDIR(new_inode->i_mode) ||
- (ocfs2_read_links_count(newfe) == 1)) {
- status = ocfs2_orphan_add(osb, handle, new_inode,
- newfe_bh, orphan_name,
- &orphan_insert, orphan_dir);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
- }
-
/* change the dirent to point to the correct inode */
status = ocfs2_update_entry(new_dir, handle, &target_lookup_res,
old_inode);
@@ -1373,6 +1477,15 @@ static int ocfs2_rename(struct inode *old_dir,
else
ocfs2_add_links_count(newfe, -1);
ocfs2_journal_dirty(handle, newfe_bh);
+ if (should_add_orphan) {
+ status = ocfs2_orphan_add(osb, handle, new_inode,
+ newfe_bh, orphan_name,
+ &orphan_insert, orphan_dir);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
} else {
/* if the name was not found in new_dir, add it now */
status = ocfs2_add_entry(handle, new_dentry, old_inode,
@@ -1642,6 +1755,7 @@ static int ocfs2_symlink(struct inode *dir,
struct ocfs2_dir_lookup_result lookup = { NULL, };
sigset_t oldset;
int did_block_signals = 0;
+ struct ocfs2_dentry_lock *dl = NULL;
trace_ocfs2_symlink_begin(dir, dentry, symname,
dentry->d_name.len, dentry->d_name.name);
@@ -1830,6 +1944,8 @@ static int ocfs2_symlink(struct inode *dir,
goto bail;
}
+ dl = dentry->d_fsdata;
+
status = ocfs2_add_entry(handle, dentry, inode,
le64_to_cpu(fe->i_blkno), parent_fe_bh,
&lookup);
@@ -1864,6 +1980,9 @@ bail:
if (xattr_ac)
ocfs2_free_alloc_context(xattr_ac);
if ((status < 0) && inode) {
+ if (dl)
+ ocfs2_cleanup_add_entry_failure(osb, dentry, inode);
+
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
clear_nlink(inode);
iput(inode);
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index 1b60c62aa9d6..6cb019b7c6a8 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -2292,6 +2292,8 @@ TRACE_EVENT(ocfs2_rename,
__entry->new_len, __get_str(new_name))
);
+DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_rename_not_permitted);
+
TRACE_EVENT(ocfs2_rename_target_exists,
TP_PROTO(int new_len, const char *new_name),
TP_ARGS(new_len, new_name),
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 714e53b9cc66..d81f6e2a97f5 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3109,7 +3109,7 @@ static int ocfs2_clear_ext_refcount(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(sb,
"Inode %llu has an extent at cpos %u which can no "
"longer be found.\n",
@@ -4288,9 +4288,16 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
goto out;
}
+ error = ocfs2_rw_lock(inode, 1);
+ if (error) {
+ mlog_errno(error);
+ goto out;
+ }
+
error = ocfs2_inode_lock(inode, &old_bh, 1);
if (error) {
mlog_errno(error);
+ ocfs2_rw_unlock(inode, 1);
goto out;
}
@@ -4302,6 +4309,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
up_write(&OCFS2_I(inode)->ip_xattr_sem);
ocfs2_inode_unlock(inode, 1);
+ ocfs2_rw_unlock(inode, 1);
brelse(old_bh);
if (error) {
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 1424c151cccc..a88b2a4fcc85 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -382,7 +382,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
- si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
+ si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
GFP_KERNEL);
if (!si->si_bh) {
status = -ENOMEM;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index c7a89cea5c5d..ddb662b32447 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1925,15 +1925,11 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
ocfs2_shutdown_local_alloc(osb);
+ ocfs2_truncate_log_shutdown(osb);
+
/* This will disable recovery and flush any recovery work. */
ocfs2_recovery_exit(osb);
- /*
- * During dismount, when it recovers another node it will call
- * ocfs2_recover_orphans and queue delayed work osb_truncate_log_wq.
- */
- ocfs2_truncate_log_shutdown(osb);
-
ocfs2_journal_shutdown(osb);
ocfs2_sync_blockdev(sb);
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index ec58c7659183..ba8819702c56 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -321,7 +321,7 @@ static int omfs_get_imap(struct super_block *sb)
goto out;
sbi->s_imap_size = array_size;
- sbi->s_imap = kzalloc(array_size * sizeof(unsigned long *), GFP_KERNEL);
+ sbi->s_imap = kcalloc(array_size, sizeof(unsigned long *), GFP_KERNEL);
if (!sbi->s_imap)
goto nomem;
diff --git a/fs/open.c b/fs/open.c
index 36662d036237..d6fd3acde134 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -263,11 +263,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
return -EPERM;
/*
- * We can not allow to do any fallocate operation on an active
- * swapfile
+ * We cannot allow any fallocate operation on an active swapfile
*/
if (IS_SWAPFILE(inode))
- ret = -ETXTBSY;
+ return -ETXTBSY;
/*
* Revalidate the write permissions, in case security policy has
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 64db2bceac59..cd3653e4f35c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -297,15 +297,11 @@ static void render_cap_t(struct seq_file *m, const char *header,
seq_puts(m, header);
CAP_FOR_EACH_U32(__capi) {
seq_printf(m, "%08x",
- a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
+ a->cap[CAP_LAST_U32 - __capi]);
}
seq_putc(m, '\n');
}
-/* Remove non-existent capabilities */
-#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \
- CAP_TO_MASK(CAP_LAST_CAP + 1) - 1)
-
static inline void task_cap(struct seq_file *m, struct task_struct *p)
{
const struct cred *cred;
@@ -319,11 +315,6 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
cap_bset = cred->cap_bset;
rcu_read_unlock();
- NORM_CAPS(cap_inheritable);
- NORM_CAPS(cap_permitted);
- NORM_CAPS(cap_effective);
- NORM_CAPS(cap_bset);
-
render_cap_t(m, "CapInh:\t", &cap_inheritable);
render_cap_t(m, "CapPrm:\t", &cap_permitted);
render_cap_t(m, "CapEff:\t", &cap_effective);
@@ -473,13 +464,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
priority = task_prio(task);
nice = task_nice(task);
- /* Temporary variable needed for gcc-2.96 */
- /* convert timespec -> nsec*/
- start_time =
- (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
- + task->real_start_time.tv_nsec;
/* convert nsec -> ticks */
- start_time = nsec_to_clock_t(start_time);
+ start_time = nsec_to_clock_t(task->real_start_time);
seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
seq_put_decimal_ll(m, ' ', ppid);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2d696b0c93bf..043c83cb51f9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -105,7 +105,7 @@
*/
struct pid_entry {
- char *name;
+ const char *name;
int len;
umode_t mode;
const struct inode_operations *iop;
@@ -130,10 +130,6 @@ struct pid_entry {
{ .proc_get_link = get_link } )
#define REG(NAME, MODE, fops) \
NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
-#define INF(NAME, MODE, read) \
- NOD(NAME, (S_IFREG|(MODE)), \
- NULL, &proc_info_file_operations, \
- { .proc_read = read } )
#define ONE(NAME, MODE, show) \
NOD(NAME, (S_IFREG|(MODE)), \
NULL, &proc_single_file_operations, \
@@ -200,27 +196,32 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}
-static int proc_pid_cmdline(struct task_struct *task, char *buffer)
+static int proc_pid_cmdline(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
- return get_cmdline(task, buffer, PAGE_SIZE);
+ /*
+ * Rely on struct seq_operations::show() being called once
+ * per internal buffer allocation. See single_open(), traverse().
+ */
+ BUG_ON(m->size < PAGE_SIZE);
+ m->count += get_cmdline(task, m->buf, PAGE_SIZE);
+ return 0;
}
-static int proc_pid_auxv(struct task_struct *task, char *buffer)
+static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ);
- int res = PTR_ERR(mm);
if (mm && !IS_ERR(mm)) {
unsigned int nwords = 0;
do {
nwords += 2;
} while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
- res = nwords * sizeof(mm->saved_auxv[0]);
- if (res > PAGE_SIZE)
- res = PAGE_SIZE;
- memcpy(buffer, mm->saved_auxv, res);
+ seq_write(m, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0]));
mmput(mm);
- }
- return res;
+ return 0;
+ } else
+ return PTR_ERR(mm);
}
@@ -229,7 +230,8 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer)
* Provides a wchan file via kallsyms in a proper one-value-per-file format.
* Returns the resolved symbol. If that fails, simply return the address.
*/
-static int proc_pid_wchan(struct task_struct *task, char *buffer)
+static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
unsigned long wchan;
char symname[KSYM_NAME_LEN];
@@ -240,9 +242,9 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
if (!ptrace_may_access(task, PTRACE_MODE_READ))
return 0;
else
- return sprintf(buffer, "%lu", wchan);
+ return seq_printf(m, "%lu", wchan);
else
- return sprintf(buffer, "%s", symname);
+ return seq_printf(m, "%s", symname);
}
#endif /* CONFIG_KALLSYMS */
@@ -304,9 +306,10 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
/*
* Provides /proc/PID/schedstat
*/
-static int proc_pid_schedstat(struct task_struct *task, char *buffer)
+static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
- return sprintf(buffer, "%llu %llu %lu\n",
+ return seq_printf(m, "%llu %llu %lu\n",
(unsigned long long)task->se.sum_exec_runtime,
(unsigned long long)task->sched_info.run_delay,
task->sched_info.pcount);
@@ -404,7 +407,8 @@ static const struct file_operations proc_cpuset_operations = {
};
#endif
-static int proc_oom_score(struct task_struct *task, char *buffer)
+static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
unsigned long totalpages = totalram_pages + total_swap_pages;
unsigned long points = 0;
@@ -414,12 +418,12 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
points = oom_badness(task, NULL, NULL, totalpages) *
1000 / totalpages;
read_unlock(&tasklist_lock);
- return sprintf(buffer, "%lu\n", points);
+ return seq_printf(m, "%lu\n", points);
}
struct limit_names {
- char *name;
- char *unit;
+ const char *name;
+ const char *unit;
};
static const struct limit_names lnames[RLIM_NLIMITS] = {
@@ -442,12 +446,11 @@ static const struct limit_names lnames[RLIM_NLIMITS] = {
};
/* Display limits for a process */
-static int proc_pid_limits(struct task_struct *task, char *buffer)
+static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
unsigned int i;
- int count = 0;
unsigned long flags;
- char *bufptr = buffer;
struct rlimit rlim[RLIM_NLIMITS];
@@ -459,35 +462,34 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
/*
* print the file header
*/
- count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
+ seq_printf(m, "%-25s %-20s %-20s %-10s\n",
"Limit", "Soft Limit", "Hard Limit", "Units");
for (i = 0; i < RLIM_NLIMITS; i++) {
if (rlim[i].rlim_cur == RLIM_INFINITY)
- count += sprintf(&bufptr[count], "%-25s %-20s ",
+ seq_printf(m, "%-25s %-20s ",
lnames[i].name, "unlimited");
else
- count += sprintf(&bufptr[count], "%-25s %-20lu ",
+ seq_printf(m, "%-25s %-20lu ",
lnames[i].name, rlim[i].rlim_cur);
if (rlim[i].rlim_max == RLIM_INFINITY)
- count += sprintf(&bufptr[count], "%-20s ", "unlimited");
+ seq_printf(m, "%-20s ", "unlimited");
else
- count += sprintf(&bufptr[count], "%-20lu ",
- rlim[i].rlim_max);
+ seq_printf(m, "%-20lu ", rlim[i].rlim_max);
if (lnames[i].unit)
- count += sprintf(&bufptr[count], "%-10s\n",
- lnames[i].unit);
+ seq_printf(m, "%-10s\n", lnames[i].unit);
else
- count += sprintf(&bufptr[count], "\n");
+ seq_putc(m, '\n');
}
- return count;
+ return 0;
}
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
-static int proc_pid_syscall(struct task_struct *task, char *buffer)
+static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
long nr;
unsigned long args[6], sp, pc;
@@ -496,11 +498,11 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
return res;
if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
- res = sprintf(buffer, "running\n");
+ seq_puts(m, "running\n");
else if (nr < 0)
- res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
+ seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
else
- res = sprintf(buffer,
+ seq_printf(m,
"%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
nr,
args[0], args[1], args[2], args[3], args[4], args[5],
@@ -598,43 +600,6 @@ static const struct inode_operations proc_def_inode_operations = {
.setattr = proc_setattr,
};
-#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
-
-static ssize_t proc_info_read(struct file * file, char __user * buf,
- size_t count, loff_t *ppos)
-{
- struct inode * inode = file_inode(file);
- unsigned long page;
- ssize_t length;
- struct task_struct *task = get_proc_task(inode);
-
- length = -ESRCH;
- if (!task)
- goto out_no_task;
-
- if (count > PROC_BLOCK_SIZE)
- count = PROC_BLOCK_SIZE;
-
- length = -ENOMEM;
- if (!(page = __get_free_page(GFP_TEMPORARY)))
- goto out;
-
- length = PROC_I(inode)->op.proc_read(task, (char*)page);
-
- if (length >= 0)
- length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
- free_page(page);
-out:
- put_task_struct(task);
-out_no_task:
- return length;
-}
-
-static const struct file_operations proc_info_file_operations = {
- .read = proc_info_read,
- .llseek = generic_file_llseek,
-};
-
static int proc_single_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
@@ -2056,7 +2021,7 @@ static int show_timer(struct seq_file *m, void *v)
struct k_itimer *timer;
struct timers_private *tp = m->private;
int notify;
- static char *nstr[] = {
+ static const char * const nstr[] = {
[SIGEV_SIGNAL] = "signal",
[SIGEV_NONE] = "none",
[SIGEV_THREAD] = "thread",
@@ -2392,7 +2357,7 @@ static const struct file_operations proc_coredump_filter_operations = {
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
-static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
+static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole)
{
struct task_io_accounting acct = task->ioac;
unsigned long flags;
@@ -2416,7 +2381,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
unlock_task_sighand(task, &flags);
}
- result = sprintf(buffer,
+ result = seq_printf(m,
"rchar: %llu\n"
"wchar: %llu\n"
"syscr: %llu\n"
@@ -2436,20 +2401,22 @@ out_unlock:
return result;
}
-static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
+static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
- return do_io_accounting(task, buffer, 0);
+ return do_io_accounting(task, m, 0);
}
-static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
+static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
- return do_io_accounting(task, buffer, 1);
+ return do_io_accounting(task, m, 1);
}
#endif /* CONFIG_TASK_IO_ACCOUNTING */
#ifdef CONFIG_USER_NS
static int proc_id_map_open(struct inode *inode, struct file *file,
- struct seq_operations *seq_ops)
+ const struct seq_operations *seq_ops)
{
struct user_namespace *ns = NULL;
struct task_struct *task;
@@ -2557,10 +2524,10 @@ static const struct pid_entry tgid_base_stuff[] = {
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
#endif
REG("environ", S_IRUSR, proc_environ_operations),
- INF("auxv", S_IRUSR, proc_pid_auxv),
+ ONE("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality),
- INF("limits", S_IRUGO, proc_pid_limits),
+ ONE("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
@@ -2569,9 +2536,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
- INF("syscall", S_IRUSR, proc_pid_syscall),
+ ONE("syscall", S_IRUSR, proc_pid_syscall),
#endif
- INF("cmdline", S_IRUGO, proc_pid_cmdline),
+ ONE("cmdline", S_IRUGO, proc_pid_cmdline),
ONE("stat", S_IRUGO, proc_tgid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
REG("maps", S_IRUGO, proc_pid_maps_operations),
@@ -2594,13 +2561,13 @@ static const struct pid_entry tgid_base_stuff[] = {
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
#endif
#ifdef CONFIG_KALLSYMS
- INF("wchan", S_IRUGO, proc_pid_wchan),
+ ONE("wchan", S_IRUGO, proc_pid_wchan),
#endif
#ifdef CONFIG_STACKTRACE
ONE("stack", S_IRUSR, proc_pid_stack),
#endif
#ifdef CONFIG_SCHEDSTATS
- INF("schedstat", S_IRUGO, proc_pid_schedstat),
+ ONE("schedstat", S_IRUGO, proc_pid_schedstat),
#endif
#ifdef CONFIG_LATENCYTOP
REG("latency", S_IRUGO, proc_lstats_operations),
@@ -2611,7 +2578,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_CGROUPS
REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif
- INF("oom_score", S_IRUGO, proc_oom_score),
+ ONE("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
@@ -2625,10 +2592,10 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
- INF("io", S_IRUSR, proc_tgid_io_accounting),
+ ONE("io", S_IRUSR, proc_tgid_io_accounting),
#endif
#ifdef CONFIG_HARDWALL
- INF("hardwall", S_IRUGO, proc_pid_hardwall),
+ ONE("hardwall", S_IRUGO, proc_pid_hardwall),
#endif
#ifdef CONFIG_USER_NS
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
@@ -2780,12 +2747,12 @@ out:
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
- int result = 0;
+ int result = -ENOENT;
struct task_struct *task;
unsigned tgid;
struct pid_namespace *ns;
- tgid = name_to_int(dentry);
+ tgid = name_to_int(&dentry->d_name);
if (tgid == ~0U)
goto out;
@@ -2896,18 +2863,18 @@ static const struct pid_entry tid_base_stuff[] = {
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
REG("environ", S_IRUSR, proc_environ_operations),
- INF("auxv", S_IRUSR, proc_pid_auxv),
+ ONE("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality),
- INF("limits", S_IRUGO, proc_pid_limits),
+ ONE("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
- INF("syscall", S_IRUSR, proc_pid_syscall),
+ ONE("syscall", S_IRUSR, proc_pid_syscall),
#endif
- INF("cmdline", S_IRUGO, proc_pid_cmdline),
+ ONE("cmdline", S_IRUGO, proc_pid_cmdline),
ONE("stat", S_IRUGO, proc_tid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
REG("maps", S_IRUGO, proc_tid_maps_operations),
@@ -2932,13 +2899,13 @@ static const struct pid_entry tid_base_stuff[] = {
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
#endif
#ifdef CONFIG_KALLSYMS
- INF("wchan", S_IRUGO, proc_pid_wchan),
+ ONE("wchan", S_IRUGO, proc_pid_wchan),
#endif
#ifdef CONFIG_STACKTRACE
ONE("stack", S_IRUSR, proc_pid_stack),
#endif
#ifdef CONFIG_SCHEDSTATS
- INF("schedstat", S_IRUGO, proc_pid_schedstat),
+ ONE("schedstat", S_IRUGO, proc_pid_schedstat),
#endif
#ifdef CONFIG_LATENCYTOP
REG("latency", S_IRUGO, proc_lstats_operations),
@@ -2949,7 +2916,7 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_CGROUPS
REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif
- INF("oom_score", S_IRUGO, proc_oom_score),
+ ONE("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
@@ -2960,10 +2927,10 @@ static const struct pid_entry tid_base_stuff[] = {
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
- INF("io", S_IRUSR, proc_tid_io_accounting),
+ ONE("io", S_IRUSR, proc_tid_io_accounting),
#endif
#ifdef CONFIG_HARDWALL
- INF("hardwall", S_IRUGO, proc_pid_hardwall),
+ ONE("hardwall", S_IRUGO, proc_pid_hardwall),
#endif
#ifdef CONFIG_USER_NS
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
@@ -3033,7 +3000,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
if (!leader)
goto out_no_task;
- tid = name_to_int(dentry);
+ tid = name_to_int(&dentry->d_name);
if (tid == ~0U)
goto out;
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 0788d093f5d8..955bb55fab8c 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -206,7 +206,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
{
struct task_struct *task = get_proc_task(dir);
int result = -ENOENT;
- unsigned fd = name_to_int(dentry);
+ unsigned fd = name_to_int(&dentry->d_name);
if (!task)
goto out_no_task;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b7f268eb5f45..317b72641ebf 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -27,7 +27,7 @@
#include "internal.h"
-DEFINE_SPINLOCK(proc_subdir_lock);
+static DEFINE_SPINLOCK(proc_subdir_lock);
static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
{
@@ -330,28 +330,28 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
nlink_t nlink)
{
struct proc_dir_entry *ent = NULL;
- const char *fn = name;
- unsigned int len;
-
- /* make sure name is valid */
- if (!name || !strlen(name))
- goto out;
+ const char *fn;
+ struct qstr qstr;
if (xlate_proc_name(name, parent, &fn) != 0)
goto out;
+ qstr.name = fn;
+ qstr.len = strlen(fn);
+ if (qstr.len == 0 || qstr.len >= 256) {
+ WARN(1, "name len %u\n", qstr.len);
+ return NULL;
+ }
+ if (*parent == &proc_root && name_to_int(&qstr) != ~0U) {
+ WARN(1, "create '/proc/%s' by hand\n", qstr.name);
+ return NULL;
+ }
- /* At this point there must not be any '/' characters beyond *fn */
- if (strchr(fn, '/'))
- goto out;
-
- len = strlen(fn);
-
- ent = kzalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
+ ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL);
if (!ent)
goto out;
- memcpy(ent->name, fn, len + 1);
- ent->namelen = len;
+ memcpy(ent->name, fn, qstr.len + 1);
+ ent->namelen = qstr.len;
ent->mode = mode;
ent->nlink = nlink;
atomic_set(&ent->count, 1);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3ab6d14e71c5..a024cf7b260f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -52,7 +52,6 @@ struct proc_dir_entry {
union proc_op {
int (*proc_get_link)(struct dentry *, struct path *);
- int (*proc_read)(struct task_struct *task, char *page);
int (*proc_show)(struct seq_file *m,
struct pid_namespace *ns, struct pid *pid,
struct task_struct *task);
@@ -112,10 +111,10 @@ static inline int task_dumpable(struct task_struct *task)
return 0;
}
-static inline unsigned name_to_int(struct dentry *dentry)
+static inline unsigned name_to_int(const struct qstr *qstr)
{
- const char *name = dentry->d_name.name;
- int len = dentry->d_name.len;
+ const char *name = qstr->name;
+ int len = qstr->len;
unsigned n = 0;
if (len > 1 && *name == '0')
@@ -178,8 +177,6 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i
/*
* generic.c
*/
-extern spinlock_t proc_subdir_lock;
-
extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *,
struct dentry *);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 39e6ef32f0bd..6df8d0722c97 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -172,7 +172,7 @@ get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
- end = ALIGN(end, PAGE_SIZE);
+ end = PAGE_ALIGN(end);
/* overlap check (because we have to align page */
list_for_each_entry(tmp, head, list) {
if (tmp->type != KCORE_VMEMMAP)
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 7445af0b1aa3..aa1eee06420f 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -168,7 +168,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
K(global_page_state(NR_WRITEBACK)),
K(global_page_state(NR_ANON_PAGES)),
K(global_page_state(NR_FILE_MAPPED)),
- K(global_page_state(NR_SHMEM)),
+ K(i.sharedram),
K(global_page_state(NR_SLAB_RECLAIMABLE) +
global_page_state(NR_SLAB_UNRECLAIMABLE)),
K(global_page_state(NR_SLAB_RECLAIMABLE)),
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 71290463a1d3..f92d5dd578a4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -632,7 +632,7 @@ out:
return ret;
}
-static int scan(struct ctl_table_header *head, ctl_table *table,
+static int scan(struct ctl_table_header *head, struct ctl_table *table,
unsigned long *pos, struct file *file,
struct dir_context *ctx)
{
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index cb761f010300..15f327bed8c6 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -18,7 +18,7 @@
/*
* The /proc/tty directory inodes...
*/
-static struct proc_dir_entry *proc_tty_ldisc, *proc_tty_driver;
+static struct proc_dir_entry *proc_tty_driver;
/*
* This is the handler for /proc/tty/drivers
@@ -176,7 +176,7 @@ void __init proc_tty_init(void)
{
if (!proc_mkdir("tty", NULL))
return;
- proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL);
+ proc_mkdir("tty/ldisc", NULL); /* Preserved: it's userspace visible */
/*
* /proc/tty/driver/serial reveals the exact character counts for
* serial links which is just too easy to abuse for inferring
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 5dbadecb234d..574bafc41f0b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -199,10 +199,10 @@ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
{
- if (!proc_lookup(dir, dentry, flags))
+ if (!proc_pid_lookup(dir, dentry, flags))
return NULL;
- return proc_pid_lookup(dir, dentry, flags);
+ return proc_lookup(dir, dentry, flags);
}
static int proc_root_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 9d231e9e5f0e..bf2d03f8fd3e 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -184,29 +184,11 @@ static int show_stat(struct seq_file *p, void *v)
static int stat_open(struct inode *inode, struct file *file)
{
- size_t size = 1024 + 128 * num_possible_cpus();
- char *buf;
- struct seq_file *m;
- int res;
+ size_t size = 1024 + 128 * num_online_cpus();
/* minimum size to display an interrupt count : 2 bytes */
size += 2 * nr_irqs;
-
- /* don't ask for more than the kmalloc() max size */
- if (size > KMALLOC_MAX_SIZE)
- size = KMALLOC_MAX_SIZE;
- buf = kmalloc(size, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- res = single_open(file, show_stat, NULL);
- if (!res) {
- m = file->private_data;
- m->buf = buf;
- m->size = ksize(buf);
- } else
- kfree(buf);
- return res;
+ return single_open_size(file, show_stat, NULL, size);
}
static const struct file_operations proc_stat_operations = {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index cfa63ee92c96..dfc791c42d64 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -925,15 +925,30 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
struct mm_walk *walk)
{
struct pagemapread *pm = walk->private;
- unsigned long addr;
+ unsigned long addr = start;
int err = 0;
- pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
- for (addr = start; addr < end; addr += PAGE_SIZE) {
- err = add_to_pagemap(addr, &pme, pm);
- if (err)
- break;
+ while (addr < end) {
+ struct vm_area_struct *vma = find_vma(walk->mm, addr);
+ pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
+ unsigned long vm_end;
+
+ if (!vma) {
+ vm_end = end;
+ } else {
+ vm_end = min(end, vma->vm_end);
+ if (vma->vm_flags & VM_SOFTDIRTY)
+ pme.pme |= PM_STATUS2(pm->v2, __PM_SOFT_DIRTY);
+ }
+
+ for (; addr < vm_end; addr += PAGE_SIZE) {
+ err = add_to_pagemap(addr, &pme, pm);
+ if (err)
+ goto out;
+ }
}
+
+out:
return err;
}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 382aa890e228..a90d6d354199 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -328,6 +328,82 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz)
* virtually contiguous user-space in ELF layout.
*/
#ifdef CONFIG_MMU
+/*
+ * remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages
+ * reported as not being ram with the zero page.
+ *
+ * @vma: vm_area_struct describing requested mapping
+ * @from: start remapping from
+ * @pfn: page frame number to start remapping to
+ * @size: remapping size
+ * @prot: protection bits
+ *
+ * Returns zero on success, -EAGAIN on failure.
+ */
+static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long map_size;
+ unsigned long pos_start, pos_end, pos;
+ unsigned long zeropage_pfn = my_zero_pfn(0);
+ size_t len = 0;
+
+ pos_start = pfn;
+ pos_end = pfn + (size >> PAGE_SHIFT);
+
+ for (pos = pos_start; pos < pos_end; ++pos) {
+ if (!pfn_is_ram(pos)) {
+ /*
+ * We hit a page which is not ram. Remap the continuous
+ * region between pos_start and pos-1 and replace
+ * the non-ram page at pos with the zero page.
+ */
+ if (pos > pos_start) {
+ /* Remap continuous region */
+ map_size = (pos - pos_start) << PAGE_SHIFT;
+ if (remap_oldmem_pfn_range(vma, from + len,
+ pos_start, map_size,
+ prot))
+ goto fail;
+ len += map_size;
+ }
+ /* Remap the zero page */
+ if (remap_oldmem_pfn_range(vma, from + len,
+ zeropage_pfn,
+ PAGE_SIZE, prot))
+ goto fail;
+ len += PAGE_SIZE;
+ pos_start = pos + 1;
+ }
+ }
+ if (pos > pos_start) {
+ /* Remap the rest */
+ map_size = (pos - pos_start) << PAGE_SHIFT;
+ if (remap_oldmem_pfn_range(vma, from + len, pos_start,
+ map_size, prot))
+ goto fail;
+ }
+ return 0;
+fail:
+ do_munmap(vma->vm_mm, from, len);
+ return -EAGAIN;
+}
+
+static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ /*
+ * Check if oldmem_pfn_is_ram was registered to avoid
+ * looping over all pages without a reason.
+ */
+ if (oldmem_pfn_is_ram)
+ return remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
+ else
+ return remap_oldmem_pfn_range(vma, from, pfn, size, prot);
+}
+
static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
{
size_t size = vma->vm_end - vma->vm_start;
@@ -387,9 +463,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
tsz = min_t(size_t, m->offset + m->size - start, size);
paddr = m->paddr + start - m->offset;
- if (remap_oldmem_pfn_range(vma, vma->vm_start + len,
- paddr >> PAGE_SHIFT, tsz,
- vma->vm_page_prot))
+ if (vmcore_remap_oldmem_pfn(vma, vma->vm_start + len,
+ paddr >> PAGE_SHIFT, tsz,
+ vma->vm_page_prot))
goto fail;
size -= tsz;
start += tsz;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 34a1e5aa848c..9d7b9a83699e 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -394,7 +394,7 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
prot = pgprot_noncached(PAGE_KERNEL);
- pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL);
+ pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
if (!pages) {
pr_err("%s: Failed to allocate array for %u pages\n",
__func__, page_count);
diff --git a/fs/qnx6/Makefile b/fs/qnx6/Makefile
index 9dd06199afc9..5e6bae6fae50 100644
--- a/fs/qnx6/Makefile
+++ b/fs/qnx6/Makefile
@@ -5,3 +5,4 @@
obj-$(CONFIG_QNX6FS_FS) += qnx6.o
qnx6-objs := inode.o dir.o namei.o super_mmi.o
+ccflags-$(CONFIG_QNX6FS_DEBUG) += -DDEBUG
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index 15b7d92ed60d..8d64bb5366bf 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -77,21 +77,20 @@ static int qnx6_dir_longfilename(struct inode *inode,
if (de->de_size != 0xff) {
/* error - long filename entries always have size 0xff
in direntry */
- printk(KERN_ERR "qnx6: invalid direntry size (%i).\n",
- de->de_size);
+ pr_err("invalid direntry size (%i).\n", de->de_size);
return 0;
}
lf = qnx6_longname(s, de, &page);
if (IS_ERR(lf)) {
- printk(KERN_ERR "qnx6:Error reading longname\n");
+ pr_err("Error reading longname\n");
return 0;
}
lf_size = fs16_to_cpu(sbi, lf->lf_size);
if (lf_size > QNX6_LONG_NAME_MAX) {
- QNX6DEBUG((KERN_INFO "file %s\n", lf->lf_fname));
- printk(KERN_ERR "qnx6:Filename too long (%i)\n", lf_size);
+ pr_debug("file %s\n", lf->lf_fname);
+ pr_err("Filename too long (%i)\n", lf_size);
qnx6_put_page(page);
return 0;
}
@@ -100,10 +99,10 @@ static int qnx6_dir_longfilename(struct inode *inode,
mmi 3g filesystem does not have that checksum */
if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) !=
qnx6_lfile_checksum(lf->lf_fname, lf_size))
- printk(KERN_INFO "qnx6: long filename checksum error.\n");
+ pr_info("long filename checksum error.\n");
- QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n",
- lf_size, lf->lf_fname, de_inode));
+ pr_debug("qnx6_readdir:%.*s inode:%u\n",
+ lf_size, lf->lf_fname, de_inode);
if (!dir_emit(ctx, lf->lf_fname, lf_size, de_inode, DT_UNKNOWN)) {
qnx6_put_page(page);
return 0;
@@ -136,7 +135,7 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
int i = start;
if (IS_ERR(page)) {
- printk(KERN_ERR "qnx6_readdir: read failed\n");
+ pr_err("%s(): read failed\n", __func__);
ctx->pos = (n + 1) << PAGE_CACHE_SHIFT;
return PTR_ERR(page);
}
@@ -159,9 +158,9 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
break;
}
} else {
- QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s"
- " inode:%u\n", size, de->de_fname,
- no_inode));
+ pr_debug("%s():%.*s inode:%u\n",
+ __func__, size, de->de_fname,
+ no_inode);
if (!dir_emit(ctx, de->de_fname, size,
no_inode, DT_UNKNOWN)) {
done = true;
@@ -259,8 +258,7 @@ unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
if (ino)
goto found;
} else
- printk(KERN_ERR "qnx6: undefined "
- "filename size in inode.\n");
+ pr_err("undefined filename size in inode.\n");
}
qnx6_put_page(page);
}
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 65cdaab3ed49..44e73923670d 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -73,8 +73,8 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
{
unsigned phys;
- QNX6DEBUG((KERN_INFO "qnx6: qnx6_get_block inode=[%ld] iblock=[%ld]\n",
- inode->i_ino, (unsigned long)iblock));
+ pr_debug("qnx6_get_block inode=[%ld] iblock=[%ld]\n",
+ inode->i_ino, (unsigned long)iblock);
phys = qnx6_block_map(inode, iblock);
if (phys) {
@@ -87,7 +87,7 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
static int qnx6_check_blockptr(__fs32 ptr)
{
if (ptr == ~(__fs32)0) {
- printk(KERN_ERR "qnx6: hit unused blockpointer.\n");
+ pr_err("hit unused blockpointer.\n");
return 0;
}
return 1;
@@ -127,8 +127,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
levelptr = no >> bitdelta;
if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) {
- printk(KERN_ERR "qnx6:Requested file block number (%u) too big.",
- no);
+ pr_err("Requested file block number (%u) too big.", no);
return 0;
}
@@ -137,8 +136,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
for (i = 0; i < depth; i++) {
bh = sb_bread(s, block);
if (!bh) {
- printk(KERN_ERR "qnx6:Error reading block (%u)\n",
- block);
+ pr_err("Error reading block (%u)\n", block);
return 0;
}
bitdelta -= ptrbits;
@@ -207,26 +205,16 @@ void qnx6_superblock_debug(struct qnx6_super_block *sb, struct super_block *s)
{
struct qnx6_sb_info *sbi = QNX6_SB(s);
- QNX6DEBUG((KERN_INFO "magic: %08x\n",
- fs32_to_cpu(sbi, sb->sb_magic)));
- QNX6DEBUG((KERN_INFO "checksum: %08x\n",
- fs32_to_cpu(sbi, sb->sb_checksum)));
- QNX6DEBUG((KERN_INFO "serial: %llx\n",
- fs64_to_cpu(sbi, sb->sb_serial)));
- QNX6DEBUG((KERN_INFO "flags: %08x\n",
- fs32_to_cpu(sbi, sb->sb_flags)));
- QNX6DEBUG((KERN_INFO "blocksize: %08x\n",
- fs32_to_cpu(sbi, sb->sb_blocksize)));
- QNX6DEBUG((KERN_INFO "num_inodes: %08x\n",
- fs32_to_cpu(sbi, sb->sb_num_inodes)));
- QNX6DEBUG((KERN_INFO "free_inodes: %08x\n",
- fs32_to_cpu(sbi, sb->sb_free_inodes)));
- QNX6DEBUG((KERN_INFO "num_blocks: %08x\n",
- fs32_to_cpu(sbi, sb->sb_num_blocks)));
- QNX6DEBUG((KERN_INFO "free_blocks: %08x\n",
- fs32_to_cpu(sbi, sb->sb_free_blocks)));
- QNX6DEBUG((KERN_INFO "inode_levels: %02x\n",
- sb->Inode.levels));
+ pr_debug("magic: %08x\n", fs32_to_cpu(sbi, sb->sb_magic));
+ pr_debug("checksum: %08x\n", fs32_to_cpu(sbi, sb->sb_checksum));
+ pr_debug("serial: %llx\n", fs64_to_cpu(sbi, sb->sb_serial));
+ pr_debug("flags: %08x\n", fs32_to_cpu(sbi, sb->sb_flags));
+ pr_debug("blocksize: %08x\n", fs32_to_cpu(sbi, sb->sb_blocksize));
+ pr_debug("num_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_num_inodes));
+ pr_debug("free_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_free_inodes));
+ pr_debug("num_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_num_blocks));
+ pr_debug("free_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_free_blocks));
+ pr_debug("inode_levels: %02x\n", sb->Inode.levels);
}
#endif
@@ -277,7 +265,7 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
start with the first superblock */
bh = sb_bread(s, offset);
if (!bh) {
- printk(KERN_ERR "qnx6: unable to read the first superblock\n");
+ pr_err("unable to read the first superblock\n");
return NULL;
}
sb = (struct qnx6_super_block *)bh->b_data;
@@ -285,20 +273,16 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
sbi->s_bytesex = BYTESEX_BE;
if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
/* we got a big endian fs */
- QNX6DEBUG((KERN_INFO "qnx6: fs got different"
- " endianness.\n"));
+ pr_debug("fs got different endianness.\n");
return bh;
} else
sbi->s_bytesex = BYTESEX_LE;
if (!silent) {
if (offset == 0) {
- printk(KERN_ERR "qnx6: wrong signature (magic)"
- " in superblock #1.\n");
+ pr_err("wrong signature (magic) in superblock #1.\n");
} else {
- printk(KERN_INFO "qnx6: wrong signature (magic)"
- " at position (0x%lx) - will try"
- " alternative position (0x0000).\n",
- offset * s->s_blocksize);
+ pr_info("wrong signature (magic) at position (0x%lx) - will try alternative position (0x0000).\n",
+ offset * s->s_blocksize);
}
}
brelse(bh);
@@ -329,13 +313,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* Superblock always is 512 Byte long */
if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) {
- printk(KERN_ERR "qnx6: unable to set blocksize\n");
+ pr_err("unable to set blocksize\n");
goto outnobh;
}
/* parse the mount-options */
if (!qnx6_parse_options((char *) data, s)) {
- printk(KERN_ERR "qnx6: invalid mount options.\n");
+ pr_err("invalid mount options.\n");
goto outnobh;
}
if (test_opt(s, MMI_FS)) {
@@ -355,7 +339,7 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* try again without bootblock offset */
bh1 = qnx6_check_first_superblock(s, 0, silent);
if (!bh1) {
- printk(KERN_ERR "qnx6: unable to read the first superblock\n");
+ pr_err("unable to read the first superblock\n");
goto outnobh;
}
/* seems that no bootblock at partition start */
@@ -370,13 +354,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+ pr_err("superblock #1 checksum error\n");
goto out;
}
/* set new blocksize */
if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
- printk(KERN_ERR "qnx6: unable to set blocksize\n");
+ pr_err("unable to set blocksize\n");
goto out;
}
/* blocksize invalidates bh - pull it back in */
@@ -398,21 +382,20 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* next the second superblock */
bh2 = sb_bread(s, offset);
if (!bh2) {
- printk(KERN_ERR "qnx6: unable to read the second superblock\n");
+ pr_err("unable to read the second superblock\n");
goto out;
}
sb2 = (struct qnx6_super_block *)bh2->b_data;
if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
if (!silent)
- printk(KERN_ERR "qnx6: wrong signature (magic)"
- " in superblock #2.\n");
+ pr_err("wrong signature (magic) in superblock #2.\n");
goto out;
}
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb2->sb_checksum) !=
crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #2 checksum error\n");
+ pr_err("superblock #2 checksum error\n");
goto out;
}
@@ -422,25 +405,24 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
sbi->sb_buf = bh1;
sbi->sb = (struct qnx6_super_block *)bh1->b_data;
brelse(bh2);
- printk(KERN_INFO "qnx6: superblock #1 active\n");
+ pr_info("superblock #1 active\n");
} else {
/* superblock #2 active */
sbi->sb_buf = bh2;
sbi->sb = (struct qnx6_super_block *)bh2->b_data;
brelse(bh1);
- printk(KERN_INFO "qnx6: superblock #2 active\n");
+ pr_info("superblock #2 active\n");
}
mmi_success:
/* sanity check - limit maximum indirect pointer levels */
if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) {
- printk(KERN_ERR "qnx6: too many inode levels (max %i, sb %i)\n",
- QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
+ pr_err("too many inode levels (max %i, sb %i)\n",
+ QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
goto out;
}
if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) {
- printk(KERN_ERR "qnx6: too many longfilename levels"
- " (max %i, sb %i)\n",
- QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
+ pr_err("too many longfilename levels (max %i, sb %i)\n",
+ QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
goto out;
}
s->s_op = &qnx6_sops;
@@ -460,7 +442,7 @@ mmi_success:
/* prefetch root inode */
root = qnx6_iget(s, QNX6_ROOT_INO);
if (IS_ERR(root)) {
- printk(KERN_ERR "qnx6: get inode failed\n");
+ pr_err("get inode failed\n");
ret = PTR_ERR(root);
goto out2;
}
@@ -474,7 +456,7 @@ mmi_success:
errmsg = qnx6_checkroot(s);
if (errmsg != NULL) {
if (!silent)
- printk(KERN_ERR "qnx6: %s\n", errmsg);
+ pr_err("%s\n", errmsg);
goto out3;
}
return 0;
@@ -555,8 +537,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
inode->i_mode = 0;
if (ino == 0) {
- printk(KERN_ERR "qnx6: bad inode number on dev %s: %u is "
- "out of range\n",
+ pr_err("bad inode number on dev %s: %u is out of range\n",
sb->s_id, ino);
iget_failed(inode);
return ERR_PTR(-EIO);
@@ -566,8 +547,8 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
mapping = sbi->inodes->i_mapping;
page = read_mapping_page(mapping, n, NULL);
if (IS_ERR(page)) {
- printk(KERN_ERR "qnx6: major problem: unable to read inode from "
- "dev %s\n", sb->s_id);
+ pr_err("major problem: unable to read inode from dev %s\n",
+ sb->s_id);
iget_failed(inode);
return ERR_CAST(page);
}
@@ -689,7 +670,7 @@ static int __init init_qnx6_fs(void)
return err;
}
- printk(KERN_INFO "QNX6 filesystem 1.0.0 registered.\n");
+ pr_info("QNX6 filesystem 1.0.0 registered.\n");
return 0;
}
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
index 0561326a94f5..6c1a323137dd 100644
--- a/fs/qnx6/namei.c
+++ b/fs/qnx6/namei.c
@@ -29,12 +29,12 @@ struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
foundinode = qnx6_iget(dir->i_sb, ino);
qnx6_put_page(page);
if (IS_ERR(foundinode)) {
- QNX6DEBUG((KERN_ERR "qnx6: lookup->iget -> "
- " error %ld\n", PTR_ERR(foundinode)));
+ pr_debug("lookup->iget -> error %ld\n",
+ PTR_ERR(foundinode));
return ERR_CAST(foundinode);
}
} else {
- QNX6DEBUG((KERN_INFO "qnx6_lookup: not found %s\n", name));
+ pr_debug("%s(): not found %s\n", __func__, name);
return NULL;
}
d_add(dentry, foundinode);
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
index b00fcc960d37..d3fb2b698800 100644
--- a/fs/qnx6/qnx6.h
+++ b/fs/qnx6/qnx6.h
@@ -10,6 +10,12 @@
*
*/
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/fs.h>
#include <linux/pagemap.h>
@@ -19,12 +25,6 @@ typedef __u64 __bitwise __fs64;
#include <linux/qnx6_fs.h>
-#ifdef CONFIG_QNX6FS_DEBUG
-#define QNX6DEBUG(X) printk X
-#else
-#define QNX6DEBUG(X) (void) 0
-#endif
-
struct qnx6_sb_info {
struct buffer_head *sb_buf; /* superblock buffer */
struct qnx6_super_block *sb; /* our superblock */
diff --git a/fs/qnx6/super_mmi.c b/fs/qnx6/super_mmi.c
index 29c32cba62d6..62aaf3e3126a 100644
--- a/fs/qnx6/super_mmi.c
+++ b/fs/qnx6/super_mmi.c
@@ -44,15 +44,14 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
start with the first superblock */
bh1 = sb_bread(s, 0);
if (!bh1) {
- printk(KERN_ERR "qnx6: Unable to read first mmi superblock\n");
+ pr_err("Unable to read first mmi superblock\n");
return NULL;
}
sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
sbi = QNX6_SB(s);
if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) {
if (!silent) {
- printk(KERN_ERR "qnx6: wrong signature (magic) in"
- " superblock #1.\n");
+ pr_err("wrong signature (magic) in superblock #1.\n");
goto out;
}
}
@@ -60,7 +59,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+ pr_err("superblock #1 checksum error\n");
goto out;
}
@@ -70,7 +69,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
/* set new blocksize */
if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
- printk(KERN_ERR "qnx6: unable to set blocksize\n");
+ pr_err("unable to set blocksize\n");
goto out;
}
/* blocksize invalidates bh - pull it back in */
@@ -83,27 +82,26 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
/* read second superblock */
bh2 = sb_bread(s, offset);
if (!bh2) {
- printk(KERN_ERR "qnx6: unable to read the second superblock\n");
+ pr_err("unable to read the second superblock\n");
goto out;
}
sb2 = (struct qnx6_mmi_super_block *)bh2->b_data;
if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
if (!silent)
- printk(KERN_ERR "qnx6: wrong signature (magic) in"
- " superblock #2.\n");
+ pr_err("wrong signature (magic) in superblock #2.\n");
goto out;
}
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb2->sb_checksum)
!= crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+ pr_err("superblock #1 checksum error\n");
goto out;
}
qsb = kmalloc(sizeof(*qsb), GFP_KERNEL);
if (!qsb) {
- printk(KERN_ERR "qnx6: unable to allocate memory.\n");
+ pr_err("unable to allocate memory.\n");
goto out;
}
@@ -119,7 +117,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
sbi->sb_buf = bh1;
sbi->sb = (struct qnx6_super_block *)bh1->b_data;
brelse(bh2);
- printk(KERN_INFO "qnx6: superblock #1 active\n");
+ pr_info("superblock #1 active\n");
} else {
/* superblock #2 active */
qnx6_mmi_copy_sb(qsb, sb2);
@@ -131,7 +129,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
sbi->sb_buf = bh2;
sbi->sb = (struct qnx6_super_block *)bh2->b_data;
brelse(bh1);
- printk(KERN_INFO "qnx6: superblock #2 active\n");
+ pr_info("superblock #2 active\n");
}
kfree(qsb);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 9cd5f63715c0..7f30bdc57d13 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -702,6 +702,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
struct dquot *dquot;
unsigned long freed = 0;
+ spin_lock(&dq_list_lock);
head = free_dquots.prev;
while (head != &free_dquots && sc->nr_to_scan) {
dquot = list_entry(head, struct dquot, dq_free);
@@ -713,6 +714,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
freed++;
head = free_dquots.prev;
}
+ spin_unlock(&dq_list_lock);
return freed;
}
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index dda012ad4208..bbafbde3471a 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -222,7 +222,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
/* gang-find the pages */
ret = -ENOMEM;
- pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL);
+ pages = kcalloc(lpages, sizeof(struct page *), GFP_KERNEL);
if (!pages)
goto out_free;
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index d9f5a60dd59b..0a7dc941aaf4 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -9,7 +9,7 @@
#include <linux/stat.h>
#include <linux/buffer_head.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
extern const struct reiserfs_key MIN_KEY;
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 54fdf196bfb2..5739cb99de7b 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -10,7 +10,7 @@
* and using buffers obtained after all above.
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/time.h>
#include "reiserfs.h"
#include <linux/buffer_head.h>
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index db9e80ba53a0..751dd3f4346b 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -6,7 +6,7 @@
#include "reiserfs.h"
#include "acl.h"
#include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/writeback.h>
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index 73231b1ebdbe..b751eea32e20 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -2,7 +2,7 @@
* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/time.h>
#include "reiserfs.h"
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 63b2b0ec49e6..a7eec9888f10 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -11,7 +11,7 @@
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/unaligned.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 501ed6811a2b..6ec8a30a0911 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -7,7 +7,7 @@
#include <linux/mount.h>
#include "reiserfs.h"
#include <linux/time.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/pagemap.h>
#include <linux/compat.h>
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index cfaee912ee09..aca73dd73906 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -54,7 +54,7 @@ static void sd_print_item(struct item_head *ih, char *item)
} else {
struct stat_data *sd = (struct stat_data *)item;
- printk("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd),
+ printk("\t0%-6o | %6llu | %2u | %d | %s\n", sd_v2_mode(sd),
(unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd),
sd_v2_rdev(sd), print_time(sd_v2_mtime(sd)));
}
@@ -408,7 +408,7 @@ static void direntry_print_item(struct item_head *ih, char *item)
namebuf[namelen + 2] = 0;
}
- printk("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n",
+ printk("%d: %-15s%-15d%-15d%-15lld%-15lld(%s)\n",
i, namebuf,
deh_dir_id(deh), deh_objectid(deh),
GET_HASH_VALUE(deh_offset(deh)),
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index d6744c8b24e1..814dda3ec998 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -2,7 +2,7 @@
* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/time.h>
#include "reiserfs.h"
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index c9b47e91baf8..ae1dc841db3a 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -17,7 +17,7 @@ static char off_buf[80];
static char *reiserfs_cpu_offset(struct cpu_key *key)
{
if (cpu_key_k_type(key) == TYPE_DIRENTRY)
- sprintf(off_buf, "%Lu(%Lu)",
+ sprintf(off_buf, "%llu(%llu)",
(unsigned long long)
GET_HASH_VALUE(cpu_key_k_offset(key)),
(unsigned long long)
@@ -34,7 +34,7 @@ static char *le_offset(struct reiserfs_key *key)
version = le_key_version(key);
if (le_key_k_type(version, key) == TYPE_DIRENTRY)
- sprintf(off_buf, "%Lu(%Lu)",
+ sprintf(off_buf, "%llu(%llu)",
(unsigned long long)
GET_HASH_VALUE(le_key_k_offset(version, key)),
(unsigned long long)
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 02b0b7d0f7d5..621b9f381fe1 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -11,7 +11,7 @@
#include <linux/module.h>
#include <linux/time.h>
#include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "reiserfs.h"
#include <linux/init.h>
#include <linux/proc_fs.h>
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index dd44468edc2b..24cbe013240f 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -2006,7 +2006,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
&s_search_path) == POSITION_FOUND);
RFALSE(file_size > ROUND_UP(new_file_size),
- "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
+ "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
update_and_out:
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index a392cef6acc6..709ea92d716f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -15,7 +15,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/time.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "reiserfs.h"
#include "acl.h"
#include "xattr.h"
@@ -331,7 +331,7 @@ static int finish_unfinished(struct super_block *s)
* not completed truncate found. New size was
* committed together with "save" link
*/
- reiserfs_info(s, "Truncating %k to %Ld ..",
+ reiserfs_info(s, "Truncating %k to %lld ..",
INODE_PKEY(inode), inode->i_size);
/* don't update modification time */
@@ -1577,7 +1577,7 @@ static int read_super_block(struct super_block *s, int offset)
rs = (struct reiserfs_super_block *)bh->b_data;
if (sb_blocksize(rs) != s->s_blocksize) {
reiserfs_warning(s, "sh-2011", "can't find a reiserfs "
- "filesystem on (dev %s, block %Lu, size %lu)",
+ "filesystem on (dev %s, block %llu, size %lu)",
s->s_id,
(unsigned long long)bh->b_blocknr,
s->s_blocksize);
@@ -2441,8 +2441,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
struct buffer_head tmp_bh, *bh;
if (!current->journal_info) {
- printk(KERN_WARNING "reiserfs: Quota write (off=%Lu, len=%Lu)"
- " cancelled because transaction is not started.\n",
+ printk(KERN_WARNING "reiserfs: Quota write (off=%llu, len=%llu) cancelled because transaction is not started.\n",
(unsigned long long)off, (unsigned long long)len);
return -EIO;
}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index ca416d099e7d..7c36898af402 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -45,7 +45,7 @@
#include <linux/xattr.h>
#include "xattr.h"
#include "acl.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/checksum.h>
#include <linux/stat.h>
#include <linux/quotaops.h>
@@ -84,6 +84,7 @@ static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
static int xattr_unlink(struct inode *dir, struct dentry *dentry)
{
int error;
+
BUG_ON(!mutex_is_locked(&dir->i_mutex));
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -98,6 +99,7 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
+
BUG_ON(!mutex_is_locked(&dir->i_mutex));
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -117,6 +119,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
{
struct dentry *privroot = REISERFS_SB(sb)->priv_root;
struct dentry *xaroot;
+
if (!privroot->d_inode)
return ERR_PTR(-ENODATA);
@@ -127,6 +130,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
xaroot = ERR_PTR(-ENODATA);
else if (!xaroot->d_inode) {
int err = -ENODATA;
+
if (xattr_may_create(flags))
err = xattr_mkdir(privroot->d_inode, xaroot, 0700);
if (err) {
@@ -157,6 +161,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
if (!IS_ERR(xadir) && !xadir->d_inode) {
int err = -ENODATA;
+
if (xattr_may_create(flags))
err = xattr_mkdir(xaroot->d_inode, xadir, 0700);
if (err) {
@@ -188,6 +193,7 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
{
struct reiserfs_dentry_buf *dbuf = buf;
struct dentry *dentry;
+
WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
@@ -218,6 +224,7 @@ static void
cleanup_dentry_buf(struct reiserfs_dentry_buf *buf)
{
int i;
+
for (i = 0; i < buf->count; i++)
if (buf->dentries[i])
dput(buf->dentries[i]);
@@ -283,11 +290,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
struct reiserfs_transaction_handle th;
+
reiserfs_write_lock(inode->i_sb);
err = journal_begin(&th, inode->i_sb, blocks);
reiserfs_write_unlock(inode->i_sb);
if (!err) {
int jerror;
+
mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
I_MUTEX_XATTR);
err = action(dir, data);
@@ -340,6 +349,7 @@ static int chown_one_xattr(struct dentry *dentry, void *data)
int reiserfs_delete_xattrs(struct inode *inode)
{
int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL);
+
if (err)
reiserfs_warning(inode->i_sb, "jdm-20004",
"Couldn't delete all xattrs (%d)\n", err);
@@ -350,6 +360,7 @@ int reiserfs_delete_xattrs(struct inode *inode)
int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
{
int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs);
+
if (err)
reiserfs_warning(inode->i_sb, "jdm-20007",
"Couldn't chown all xattrs (%d)\n", err);
@@ -439,6 +450,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
static void update_ctime(struct inode *inode)
{
struct timespec now = current_fs_time(inode->i_sb);
+
if (inode_unhashed(inode) || !inode->i_nlink ||
timespec_equal(&inode->i_ctime, &now))
return;
@@ -514,6 +526,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
size_t chunk;
size_t skip = 0;
size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
+
if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE;
else
@@ -530,6 +543,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
if (file_pos == 0) {
struct reiserfs_xattr_header *rxh;
+
skip = file_pos = sizeof(struct reiserfs_xattr_header);
if (chunk + skip > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE - skip;
@@ -659,6 +673,7 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
size_t chunk;
char *data;
size_t skip = 0;
+
if (isize - file_pos > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE;
else
@@ -792,6 +807,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
int reiserfs_removexattr(struct dentry *dentry, const char *name)
{
const struct xattr_handler *handler;
+
handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
@@ -813,9 +829,11 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
{
struct listxattr_buf *b = (struct listxattr_buf *)buf;
size_t size;
+
if (name[0] != '.' ||
(namelen != 1 && (name[1] != '.' || namelen != 2))) {
const struct xattr_handler *handler;
+
handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
name);
if (!handler) /* Unsupported xattr name */
@@ -885,6 +903,7 @@ static int create_privroot(struct dentry *dentry)
{
int err;
struct inode *inode = dentry->d_parent->d_inode;
+
WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
err = xattr_mkdir(inode, dentry, 0700);
@@ -1015,6 +1034,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
mutex_lock(&privroot->d_inode->i_mutex);
if (!REISERFS_SB(s)->xattr_root) {
struct dentry *dentry;
+
dentry = lookup_one_len(XAROOT_NAME, privroot,
strlen(XAROOT_NAME));
if (!IS_ERR(dentry))
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 44503e293790..4b34b9dc03dd 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -9,7 +9,7 @@
#include <linux/posix_acl_xattr.h>
#include "xattr.h"
#include "acl.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th,
struct inode *inode, int type,
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 800a3cef6f62..e7f8939a4cb5 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,7 +6,7 @@
#include <linux/slab.h>
#include "xattr.h"
#include <linux/security.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int
security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a0035719f66b..5eeb0c48ba46 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -5,7 +5,7 @@
#include <linux/pagemap.h>
#include <linux/xattr.h>
#include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int
trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 8667491ae7c3..e50eab046471 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -4,7 +4,7 @@
#include <linux/pagemap.h>
#include <linux/xattr.h>
#include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int
user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index ef90e8bca95a..e98dd88197d5 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -56,6 +56,8 @@
* 2 of the Licence, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/string.h>
#include <linux/fs.h>
@@ -380,7 +382,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
eio:
ret = -EIO;
error:
- printk(KERN_ERR "ROMFS: read error for inode 0x%lx\n", pos);
+ pr_err("read error for inode 0x%lx\n", pos);
return ERR_PTR(ret);
}
@@ -390,6 +392,7 @@ error:
static struct inode *romfs_alloc_inode(struct super_block *sb)
{
struct romfs_inode_info *inode;
+
inode = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL);
return inode ? &inode->vfs_inode : NULL;
}
@@ -400,6 +403,7 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
static void romfs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
+
kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
}
@@ -507,15 +511,13 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 ||
img_size < ROMFH_SIZE) {
if (!silent)
- printk(KERN_WARNING "VFS:"
- " Can't find a romfs filesystem on dev %s.\n",
+ pr_warn("VFS: Can't find a romfs filesystem on dev %s.\n",
sb->s_id);
goto error_rsb_inval;
}
if (romfs_checksum(rsb, min_t(size_t, img_size, 512))) {
- printk(KERN_ERR "ROMFS: bad initial checksum on dev %s.\n",
- sb->s_id);
+ pr_err("bad initial checksum on dev %s.\n", sb->s_id);
goto error_rsb_inval;
}
@@ -523,8 +525,8 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
len = strnlen(rsb->name, ROMFS_MAXFN);
if (!silent)
- printk(KERN_NOTICE "ROMFS: Mounting image '%*.*s' through %s\n",
- (unsigned) len, (unsigned) len, rsb->name, storage);
+ pr_notice("Mounting image '%*.*s' through %s\n",
+ (unsigned) len, (unsigned) len, rsb->name, storage);
kfree(rsb);
rsb = NULL;
@@ -614,7 +616,7 @@ static int __init init_romfs_fs(void)
{
int ret;
- printk(KERN_INFO "ROMFS MTD (C) 2007 Red Hat, Inc.\n");
+ pr_info("ROMFS MTD (C) 2007 Red Hat, Inc.\n");
romfs_inode_cachep =
kmem_cache_create("romfs_i",
@@ -623,13 +625,12 @@ static int __init init_romfs_fs(void)
romfs_i_init_once);
if (!romfs_inode_cachep) {
- printk(KERN_ERR
- "ROMFS error: Failed to initialise inode cache\n");
+ pr_err("Failed to initialise inode cache\n");
return -ENOMEM;
}
ret = register_filesystem(&romfs_fs_type);
if (ret) {
- printk(KERN_ERR "ROMFS error: Failed to register filesystem\n");
+ pr_err("Failed to register filesystem\n");
goto error_register;
}
return 0;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 1d641bb108d2..3857b720cb1b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -8,8 +8,10 @@
#include <linux/fs.h>
#include <linux/export.h>
#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/cred.h>
+#include <linux/mm.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -30,6 +32,16 @@ static void seq_set_overflow(struct seq_file *m)
m->count = m->size;
}
+static void *seq_buf_alloc(unsigned long size)
+{
+ void *buf;
+
+ buf = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
+ if (!buf && size > PAGE_SIZE)
+ buf = vmalloc(size);
+ return buf;
+}
+
/**
* seq_open - initialize sequential file
* @file: file we initialize
@@ -96,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset)
return 0;
}
if (!m->buf) {
- m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+ m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
if (!m->buf)
return -ENOMEM;
}
@@ -135,9 +147,9 @@ static int traverse(struct seq_file *m, loff_t offset)
Eoverflow:
m->op->stop(m, p);
- kfree(m->buf);
+ kvfree(m->buf);
m->count = 0;
- m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+ m->buf = seq_buf_alloc(m->size <<= 1);
return !m->buf ? -ENOMEM : -EAGAIN;
}
@@ -192,7 +204,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
/* grab buffer if we didn't have one */
if (!m->buf) {
- m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+ m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
if (!m->buf)
goto Enomem;
}
@@ -232,9 +244,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
if (m->count < m->size)
goto Fill;
m->op->stop(m, p);
- kfree(m->buf);
+ kvfree(m->buf);
m->count = 0;
- m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+ m->buf = seq_buf_alloc(m->size <<= 1);
if (!m->buf)
goto Enomem;
m->version = 0;
@@ -350,7 +362,7 @@ EXPORT_SYMBOL(seq_lseek);
int seq_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
- kfree(m->buf);
+ kvfree(m->buf);
kfree(m);
return 0;
}
@@ -605,13 +617,13 @@ EXPORT_SYMBOL(single_open);
int single_open_size(struct file *file, int (*show)(struct seq_file *, void *),
void *data, size_t size)
{
- char *buf = kmalloc(size, GFP_KERNEL);
+ char *buf = seq_buf_alloc(size);
int ret;
if (!buf)
return -ENOMEM;
ret = single_open(file, show, data);
if (ret) {
- kfree(buf);
+ kvfree(buf);
return ret;
}
((struct seq_file *)file->private_data)->buf = buf;
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 62a0de6632e1..43e7a7eddac0 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -44,7 +44,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
pages = end_index - start_index + 1;
- page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
+ page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
if (page == NULL)
return res;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 031c8d67fd51..5056babe00df 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -27,6 +27,8 @@
* the filesystem.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/fs.h>
#include <linux/vfs.h>
#include <linux/slab.h>
@@ -448,8 +450,7 @@ static int __init init_squashfs_fs(void)
return err;
}
- printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) "
- "Phillip Lougher\n");
+ pr_info("version 4.0 (2009/01/31) Phillip Lougher\n");
return 0;
}
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 0013142c0475..80c350216ea8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -35,8 +35,9 @@ struct timerfd_ctx {
ktime_t moffs;
wait_queue_head_t wqh;
u64 ticks;
- int expired;
int clockid;
+ short unsigned expired;
+ short unsigned settime_flags; /* to show in fdinfo */
struct rcu_head rcu;
struct list_head clist;
bool might_cancel;
@@ -92,7 +93,7 @@ static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm,
*/
void timerfd_clock_was_set(void)
{
- ktime_t moffs = ktime_get_monotonic_offset();
+ ktime_t moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
struct timerfd_ctx *ctx;
unsigned long flags;
@@ -125,7 +126,7 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx)
{
if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
return false;
- ctx->moffs = ktime_get_monotonic_offset();
+ ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
return true;
}
@@ -196,6 +197,8 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
if (timerfd_canceled(ctx))
return -ECANCELED;
}
+
+ ctx->settime_flags = flags & TFD_SETTIME_FLAGS;
return 0;
}
@@ -284,11 +287,77 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
return res;
}
+#ifdef CONFIG_PROC_FS
+static int timerfd_show(struct seq_file *m, struct file *file)
+{
+ struct timerfd_ctx *ctx = file->private_data;
+ struct itimerspec t;
+
+ spin_lock_irq(&ctx->wqh.lock);
+ t.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
+ t.it_interval = ktime_to_timespec(ctx->tintv);
+ spin_unlock_irq(&ctx->wqh.lock);
+
+ return seq_printf(m,
+ "clockid: %d\n"
+ "ticks: %llu\n"
+ "settime flags: 0%o\n"
+ "it_value: (%llu, %llu)\n"
+ "it_interval: (%llu, %llu)\n",
+ ctx->clockid, (unsigned long long)ctx->ticks,
+ ctx->settime_flags,
+ (unsigned long long)t.it_value.tv_sec,
+ (unsigned long long)t.it_value.tv_nsec,
+ (unsigned long long)t.it_interval.tv_sec,
+ (unsigned long long)t.it_interval.tv_nsec);
+}
+#else
+#define timerfd_show NULL
+#endif
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct timerfd_ctx *ctx = file->private_data;
+ int ret = 0;
+
+ switch (cmd) {
+ case TFD_IOC_SET_TICKS: {
+ u64 ticks;
+
+ if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks)))
+ return -EFAULT;
+ if (!ticks)
+ return -EINVAL;
+
+ spin_lock_irq(&ctx->wqh.lock);
+ if (!timerfd_canceled(ctx)) {
+ ctx->ticks = ticks;
+ if (ticks)
+ wake_up_locked(&ctx->wqh);
+ } else
+ ret = -ECANCELED;
+ spin_unlock_irq(&ctx->wqh.lock);
+ break;
+ }
+ default:
+ ret = -ENOTTY;
+ break;
+ }
+
+ return ret;
+}
+#else
+#define timerfd_ioctl NULL
+#endif
+
static const struct file_operations timerfd_fops = {
.release = timerfd_release,
.poll = timerfd_poll,
.read = timerfd_read,
.llseek = noop_llseek,
+ .show_fdinfo = timerfd_show,
+ .unlocked_ioctl = timerfd_ioctl,
};
static int timerfd_fget(int fd, struct fd *p)
@@ -336,7 +405,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
else
hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS);
- ctx->moffs = ktime_get_monotonic_offset();
+ ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
diff --git a/fs/ufs/Makefile b/fs/ufs/Makefile
index dd39980437fc..4d0e02b022b3 100644
--- a/fs/ufs/Makefile
+++ b/fs/ufs/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_UFS_FS) += ufs.o
ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
namei.o super.o symlink.o truncate.o util.o
+ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 61e8a9b021dd..7c580c97990e 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -158,16 +158,16 @@ out:
/**
* ufs_inode_getfrag() - allocate new fragment(s)
- * @inode - pointer to inode
- * @fragment - number of `fragment' which hold pointer
+ * @inode: pointer to inode
+ * @fragment: number of `fragment' which hold pointer
* to new allocated fragment(s)
- * @new_fragment - number of new allocated fragment(s)
- * @required - how many fragment(s) we require
- * @err - we set it if something wrong
- * @phys - pointer to where we save physical number of new allocated fragments,
+ * @new_fragment: number of new allocated fragment(s)
+ * @required: how many fragment(s) we require
+ * @err: we set it if something wrong
+ * @phys: pointer to where we save physical number of new allocated fragments,
* NULL if we allocate not data(indirect blocks for example).
- * @new - we set it if we allocate new block
- * @locked_page - for ufs_new_fragments()
+ * @new: we set it if we allocate new block
+ * @locked_page: for ufs_new_fragments()
*/
static struct buffer_head *
ufs_inode_getfrag(struct inode *inode, u64 fragment,
@@ -315,16 +315,16 @@ repeat2:
/**
* ufs_inode_getblock() - allocate new block
- * @inode - pointer to inode
- * @bh - pointer to block which hold "pointer" to new allocated block
- * @fragment - number of `fragment' which hold pointer
+ * @inode: pointer to inode
+ * @bh: pointer to block which hold "pointer" to new allocated block
+ * @fragment: number of `fragment' which hold pointer
* to new allocated block
- * @new_fragment - number of new allocated fragment
+ * @new_fragment: number of new allocated fragment
* (block will hold this fragment and also uspi->s_fpb-1)
- * @err - see ufs_inode_getfrag()
- * @phys - see ufs_inode_getfrag()
- * @new - see ufs_inode_getfrag()
- * @locked_page - see ufs_inode_getfrag()
+ * @err: see ufs_inode_getfrag()
+ * @phys: see ufs_inode_getfrag()
+ * @new: see ufs_inode_getfrag()
+ * @locked_page: see ufs_inode_getfrag()
*/
static struct buffer_head *
ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index b879f1ba3439..da73801301d5 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -65,7 +65,6 @@
* Evgeniy Dushistov <dushistov@mail.ru>, 2007
*/
-
#include <linux/exportfs.h>
#include <linux/module.h>
#include <linux/bitops.h>
@@ -172,73 +171,73 @@ static void ufs_print_super_stuff(struct super_block *sb,
{
u32 magic = fs32_to_cpu(sb, usb3->fs_magic);
- printk("ufs_print_super_stuff\n");
- printk(" magic: 0x%x\n", magic);
+ pr_debug("ufs_print_super_stuff\n");
+ pr_debug(" magic: 0x%x\n", magic);
if (fs32_to_cpu(sb, usb3->fs_magic) == UFS2_MAGIC) {
- printk(" fs_size: %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
- printk(" fs_dsize: %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize));
- printk(" bsize: %u\n",
- fs32_to_cpu(sb, usb1->fs_bsize));
- printk(" fsize: %u\n",
- fs32_to_cpu(sb, usb1->fs_fsize));
- printk(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname);
- printk(" fs_sblockloc: %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc));
- printk(" cs_ndir(No of dirs): %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir));
- printk(" cs_nbfree(No of free blocks): %llu\n",
- (unsigned long long)
- fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
- printk(KERN_INFO" cs_nifree(Num of free inodes): %llu\n",
- (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
- printk(KERN_INFO" cs_nffree(Num of free frags): %llu\n",
- (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
- printk(KERN_INFO" fs_maxsymlinklen: %u\n",
- fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen));
+ pr_debug(" fs_size: %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
+ pr_debug(" fs_dsize: %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize));
+ pr_debug(" bsize: %u\n",
+ fs32_to_cpu(sb, usb1->fs_bsize));
+ pr_debug(" fsize: %u\n",
+ fs32_to_cpu(sb, usb1->fs_fsize));
+ pr_debug(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname);
+ pr_debug(" fs_sblockloc: %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc));
+ pr_debug(" cs_ndir(No of dirs): %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir));
+ pr_debug(" cs_nbfree(No of free blocks): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
+ pr_info(" cs_nifree(Num of free inodes): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
+ pr_info(" cs_nffree(Num of free frags): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
+ pr_info(" fs_maxsymlinklen: %u\n",
+ fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen));
} else {
- printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
- printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
- printk(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno));
- printk(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno));
- printk(" cgoffset: %u\n",
- fs32_to_cpu(sb, usb1->fs_cgoffset));
- printk(" ~cgmask: 0x%x\n",
- ~fs32_to_cpu(sb, usb1->fs_cgmask));
- printk(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size));
- printk(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize));
- printk(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg));
- printk(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize));
- printk(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize));
- printk(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag));
- printk(" fragshift: %u\n",
- fs32_to_cpu(sb, usb1->fs_fragshift));
- printk(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask));
- printk(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift));
- printk(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize));
- printk(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc));
- printk(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg));
- printk(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg));
- printk(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg));
- printk(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr));
- printk(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize));
- printk(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize));
- printk(" fstodb: %u\n",
- fs32_to_cpu(sb, usb1->fs_fsbtodb));
- printk(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos));
- printk(" ndir %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir));
- printk(" nifree %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree));
- printk(" nbfree %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree));
- printk(" nffree %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree));
+ pr_debug(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
+ pr_debug(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
+ pr_debug(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno));
+ pr_debug(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno));
+ pr_debug(" cgoffset: %u\n",
+ fs32_to_cpu(sb, usb1->fs_cgoffset));
+ pr_debug(" ~cgmask: 0x%x\n",
+ ~fs32_to_cpu(sb, usb1->fs_cgmask));
+ pr_debug(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size));
+ pr_debug(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize));
+ pr_debug(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg));
+ pr_debug(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize));
+ pr_debug(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize));
+ pr_debug(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag));
+ pr_debug(" fragshift: %u\n",
+ fs32_to_cpu(sb, usb1->fs_fragshift));
+ pr_debug(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask));
+ pr_debug(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift));
+ pr_debug(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize));
+ pr_debug(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc));
+ pr_debug(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg));
+ pr_debug(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg));
+ pr_debug(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg));
+ pr_debug(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr));
+ pr_debug(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize));
+ pr_debug(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize));
+ pr_debug(" fstodb: %u\n",
+ fs32_to_cpu(sb, usb1->fs_fsbtodb));
+ pr_debug(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos));
+ pr_debug(" ndir %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir));
+ pr_debug(" nifree %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree));
+ pr_debug(" nbfree %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree));
+ pr_debug(" nffree %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree));
}
- printk("\n");
+ pr_debug("\n");
}
/*
@@ -247,38 +246,38 @@ static void ufs_print_super_stuff(struct super_block *sb,
static void ufs_print_cylinder_stuff(struct super_block *sb,
struct ufs_cylinder_group *cg)
{
- printk("\nufs_print_cylinder_stuff\n");
- printk("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group));
- printk(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic));
- printk(" time: %u\n", fs32_to_cpu(sb, cg->cg_time));
- printk(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx));
- printk(" ncyl: %u\n", fs16_to_cpu(sb, cg->cg_ncyl));
- printk(" niblk: %u\n", fs16_to_cpu(sb, cg->cg_niblk));
- printk(" ndblk: %u\n", fs32_to_cpu(sb, cg->cg_ndblk));
- printk(" cs_ndir: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir));
- printk(" cs_nbfree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree));
- printk(" cs_nifree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree));
- printk(" cs_nffree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree));
- printk(" rotor: %u\n", fs32_to_cpu(sb, cg->cg_rotor));
- printk(" frotor: %u\n", fs32_to_cpu(sb, cg->cg_frotor));
- printk(" irotor: %u\n", fs32_to_cpu(sb, cg->cg_irotor));
- printk(" frsum: %u, %u, %u, %u, %u, %u, %u, %u\n",
+ pr_debug("\nufs_print_cylinder_stuff\n");
+ pr_debug("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group));
+ pr_debug(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic));
+ pr_debug(" time: %u\n", fs32_to_cpu(sb, cg->cg_time));
+ pr_debug(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx));
+ pr_debug(" ncyl: %u\n", fs16_to_cpu(sb, cg->cg_ncyl));
+ pr_debug(" niblk: %u\n", fs16_to_cpu(sb, cg->cg_niblk));
+ pr_debug(" ndblk: %u\n", fs32_to_cpu(sb, cg->cg_ndblk));
+ pr_debug(" cs_ndir: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir));
+ pr_debug(" cs_nbfree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree));
+ pr_debug(" cs_nifree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree));
+ pr_debug(" cs_nffree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree));
+ pr_debug(" rotor: %u\n", fs32_to_cpu(sb, cg->cg_rotor));
+ pr_debug(" frotor: %u\n", fs32_to_cpu(sb, cg->cg_frotor));
+ pr_debug(" irotor: %u\n", fs32_to_cpu(sb, cg->cg_irotor));
+ pr_debug(" frsum: %u, %u, %u, %u, %u, %u, %u, %u\n",
fs32_to_cpu(sb, cg->cg_frsum[0]), fs32_to_cpu(sb, cg->cg_frsum[1]),
fs32_to_cpu(sb, cg->cg_frsum[2]), fs32_to_cpu(sb, cg->cg_frsum[3]),
fs32_to_cpu(sb, cg->cg_frsum[4]), fs32_to_cpu(sb, cg->cg_frsum[5]),
fs32_to_cpu(sb, cg->cg_frsum[6]), fs32_to_cpu(sb, cg->cg_frsum[7]));
- printk(" btotoff: %u\n", fs32_to_cpu(sb, cg->cg_btotoff));
- printk(" boff: %u\n", fs32_to_cpu(sb, cg->cg_boff));
- printk(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff));
- printk(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff));
- printk(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff));
- printk(" clustersumoff %u\n",
- fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff));
- printk(" clusteroff %u\n",
- fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff));
- printk(" nclusterblks %u\n",
- fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks));
- printk("\n");
+ pr_debug(" btotoff: %u\n", fs32_to_cpu(sb, cg->cg_btotoff));
+ pr_debug(" boff: %u\n", fs32_to_cpu(sb, cg->cg_boff));
+ pr_debug(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff));
+ pr_debug(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff));
+ pr_debug(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff));
+ pr_debug(" clustersumoff %u\n",
+ fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff));
+ pr_debug(" clusteroff %u\n",
+ fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff));
+ pr_debug(" nclusterblks %u\n",
+ fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks));
+ pr_debug("\n");
}
#else
# define ufs_print_super_stuff(sb, usb1, usb2, usb3) /**/
@@ -287,13 +286,12 @@ static void ufs_print_cylinder_stuff(struct super_block *sb,
static const struct super_operations ufs_super_ops;
-static char error_buf[1024];
-
void ufs_error (struct super_block * sb, const char * function,
const char * fmt, ...)
{
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
+ struct va_format vaf;
va_list args;
uspi = UFS_SB(sb)->s_uspi;
@@ -305,20 +303,21 @@ void ufs_error (struct super_block * sb, const char * function,
ufs_mark_sb_dirty(sb);
sb->s_flags |= MS_RDONLY;
}
- va_start (args, fmt);
- vsnprintf (error_buf, sizeof(error_buf), fmt, args);
- va_end (args);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
switch (UFS_SB(sb)->s_mount_opt & UFS_MOUNT_ONERROR) {
case UFS_MOUNT_ONERROR_PANIC:
- panic ("UFS-fs panic (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ panic("panic (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
case UFS_MOUNT_ONERROR_LOCK:
case UFS_MOUNT_ONERROR_UMOUNT:
case UFS_MOUNT_ONERROR_REPAIR:
- printk (KERN_CRIT "UFS-fs error (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
- }
+ pr_crit("error (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+ }
+ va_end(args);
}
void ufs_panic (struct super_block * sb, const char * function,
@@ -326,6 +325,7 @@ void ufs_panic (struct super_block * sb, const char * function,
{
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
+ struct va_format vaf;
va_list args;
uspi = UFS_SB(sb)->s_uspi;
@@ -336,24 +336,27 @@ void ufs_panic (struct super_block * sb, const char * function,
ubh_mark_buffer_dirty(USPI_UBH(uspi));
ufs_mark_sb_dirty(sb);
}
- va_start (args, fmt);
- vsnprintf (error_buf, sizeof(error_buf), fmt, args);
- va_end (args);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
sb->s_flags |= MS_RDONLY;
- printk (KERN_CRIT "UFS-fs panic (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ pr_crit("panic (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+ va_end(args);
}
void ufs_warning (struct super_block * sb, const char * function,
const char * fmt, ...)
{
+ struct va_format vaf;
va_list args;
- va_start (args, fmt);
- vsnprintf (error_buf, sizeof(error_buf), fmt, args);
- va_end (args);
- printk (KERN_WARNING "UFS-fs warning (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ pr_warn("(device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+ va_end(args);
}
enum {
@@ -464,14 +467,12 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
ufs_set_opt (*mount_options, ONERROR_UMOUNT);
break;
case Opt_onerror_repair:
- printk("UFS-fs: Unable to do repair on error, "
- "will lock lock instead\n");
+ pr_err("Unable to do repair on error, will lock lock instead\n");
ufs_clear_opt (*mount_options, ONERROR);
ufs_set_opt (*mount_options, ONERROR_REPAIR);
break;
default:
- printk("UFS-fs: Invalid option: \"%s\" "
- "or missing value\n", p);
+ pr_err("Invalid option: \"%s\" or missing value\n", p);
return 0;
}
}
@@ -788,8 +789,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
#ifndef CONFIG_UFS_FS_WRITE
if (!(sb->s_flags & MS_RDONLY)) {
- printk("ufs was compiled with read-only support, "
- "can't be mounted as read-write\n");
+ pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
return -EROFS;
}
#endif
@@ -812,12 +812,12 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_mount_opt = 0;
ufs_set_opt (sbi->s_mount_opt, ONERROR_LOCK);
if (!ufs_parse_options ((char *) data, &sbi->s_mount_opt)) {
- printk("wrong mount options\n");
+ pr_err("wrong mount options\n");
goto failed;
}
if (!(sbi->s_mount_opt & UFS_MOUNT_UFSTYPE)) {
if (!silent)
- printk("You didn't specify the type of your ufs filesystem\n\n"
+ pr_err("You didn't specify the type of your ufs filesystem\n\n"
"mount -t ufs -o ufstype="
"sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|nextstep-cd|openstep ...\n\n"
">>>WARNING<<< Wrong ufstype may corrupt your filesystem, "
@@ -868,7 +868,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
break;
case UFS_MOUNT_UFSTYPE_SUNOS:
- UFSD(("ufstype=sunos\n"))
+ UFSD("ufstype=sunos\n");
uspi->s_fsize = block_size = 1024;
uspi->s_fmask = ~(1024 - 1);
uspi->s_fshift = 10;
@@ -900,7 +900,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=old is supported read-only\n");
+ pr_info("ufstype=old is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -916,7 +916,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=nextstep is supported read-only\n");
+ pr_info("ufstype=nextstep is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -932,7 +932,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=nextstep-cd is supported read-only\n");
+ pr_info("ufstype=nextstep-cd is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -948,7 +948,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=openstep is supported read-only\n");
+ pr_info("ufstype=openstep is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -963,19 +963,19 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=hp is supported read-only\n");
+ pr_info("ufstype=hp is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
default:
if (!silent)
- printk("unknown ufstype\n");
+ pr_err("unknown ufstype\n");
goto failed;
}
again:
if (!sb_set_blocksize(sb, block_size)) {
- printk(KERN_ERR "UFS: failed to set blocksize\n");
+ pr_err("failed to set blocksize\n");
goto failed;
}
@@ -1034,7 +1034,7 @@ again:
goto again;
}
if (!silent)
- printk("ufs_read_super: bad magic number\n");
+ pr_err("%s(): bad magic number\n", __func__);
goto failed;
magic_found:
@@ -1048,33 +1048,33 @@ magic_found:
uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift);
if (!is_power_of_2(uspi->s_fsize)) {
- printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n",
- uspi->s_fsize);
- goto failed;
+ pr_err("%s(): fragment size %u is not a power of 2\n",
+ __func__, uspi->s_fsize);
+ goto failed;
}
if (uspi->s_fsize < 512) {
- printk(KERN_ERR "ufs_read_super: fragment size %u is too small\n",
- uspi->s_fsize);
+ pr_err("%s(): fragment size %u is too small\n",
+ __func__, uspi->s_fsize);
goto failed;
}
if (uspi->s_fsize > 4096) {
- printk(KERN_ERR "ufs_read_super: fragment size %u is too large\n",
- uspi->s_fsize);
+ pr_err("%s(): fragment size %u is too large\n",
+ __func__, uspi->s_fsize);
goto failed;
}
if (!is_power_of_2(uspi->s_bsize)) {
- printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n",
- uspi->s_bsize);
+ pr_err("%s(): block size %u is not a power of 2\n",
+ __func__, uspi->s_bsize);
goto failed;
}
if (uspi->s_bsize < 4096) {
- printk(KERN_ERR "ufs_read_super: block size %u is too small\n",
- uspi->s_bsize);
+ pr_err("%s(): block size %u is too small\n",
+ __func__, uspi->s_bsize);
goto failed;
}
if (uspi->s_bsize / uspi->s_fsize > 8) {
- printk(KERN_ERR "ufs_read_super: too many fragments per block (%u)\n",
- uspi->s_bsize / uspi->s_fsize);
+ pr_err("%s(): too many fragments per block (%u)\n",
+ __func__, uspi->s_bsize / uspi->s_fsize);
goto failed;
}
if (uspi->s_fsize != block_size || uspi->s_sbsize != super_block_size) {
@@ -1113,20 +1113,21 @@ magic_found:
UFSD("fs is DEC OSF/1\n");
break;
case UFS_FSACTIVE:
- printk("ufs_read_super: fs is active\n");
+ pr_err("%s(): fs is active\n", __func__);
sb->s_flags |= MS_RDONLY;
break;
case UFS_FSBAD:
- printk("ufs_read_super: fs is bad\n");
+ pr_err("%s(): fs is bad\n", __func__);
sb->s_flags |= MS_RDONLY;
break;
default:
- printk("ufs_read_super: can't grok fs_clean 0x%x\n", usb1->fs_clean);
+ pr_err("%s(): can't grok fs_clean 0x%x\n",
+ __func__, usb1->fs_clean);
sb->s_flags |= MS_RDONLY;
break;
}
} else {
- printk("ufs_read_super: fs needs fsck\n");
+ pr_err("%s(): fs needs fsck\n", __func__);
sb->s_flags |= MS_RDONLY;
}
@@ -1299,7 +1300,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
new_mount_opt |= ufstype;
} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
- printk("ufstype can't be changed during remount\n");
+ pr_err("ufstype can't be changed during remount\n");
unlock_ufs(sb);
return -EINVAL;
}
@@ -1328,8 +1329,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
* fs was mounted as ro, remounting rw
*/
#ifndef CONFIG_UFS_FS_WRITE
- printk("ufs was compiled with read-only support, "
- "can't be mounted as read-write\n");
+ pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
unlock_ufs(sb);
return -EINVAL;
#else
@@ -1338,12 +1338,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufstype != UFS_MOUNT_UFSTYPE_44BSD &&
ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
- printk("this ufstype is read-only supported\n");
+ pr_err("this ufstype is read-only supported\n");
unlock_ufs(sb);
return -EINVAL;
}
if (!ufs_read_cylinder_structures(sb)) {
- printk("failed during remounting\n");
+ pr_err("failed during remounting\n");
unlock_ufs(sb);
return -EPERM;
}
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 343e6fc571e5..2a07396d5f9e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -1,6 +1,12 @@
#ifndef _UFS_UFS_H
#define _UFS_UFS_H 1
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#define UFS_MAX_GROUP_LOADED 8
#define UFS_CGNO_EMPTY ((unsigned)-1)
@@ -71,9 +77,9 @@ struct ufs_inode_info {
*/
#ifdef CONFIG_UFS_DEBUG
# define UFSD(f, a...) { \
- printk ("UFSD (%s, %d): %s:", \
+ pr_debug("UFSD (%s, %d): %s:", \
__FILE__, __LINE__, __func__); \
- printk (f, ## a); \
+ pr_debug(f, ## a); \
}
#else
# define UFSD(f, a...) /**/
diff --git a/fs/xattr.c b/fs/xattr.c
index 3377dff18404..c69e6d43a0d2 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -843,7 +843,7 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
/* wrap around? */
len = sizeof(*new_xattr) + size;
- if (len <= sizeof(*new_xattr))
+ if (len < sizeof(*new_xattr))
return NULL;
new_xattr = kmalloc(len, GFP_KERNEL);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 96175df211b1..75c3fe5f3d9d 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4298,8 +4298,8 @@ xfs_bmapi_delay(
}
-int
-__xfs_bmapi_allocate(
+static int
+xfs_bmapi_allocate(
struct xfs_bmalloca *bma)
{
struct xfs_mount *mp = bma->ip->i_mount;
@@ -4578,9 +4578,6 @@ xfs_bmapi_write(
bma.flist = flist;
bma.firstblock = firstblock;
- if (flags & XFS_BMAPI_STACK_SWITCH)
- bma.stack_switch = 1;
-
while (bno < end && n < *nmap) {
inhole = eof || bma.got.br_startoff > bno;
wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 38ba36e9b2f0..b879ca56a64c 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -77,7 +77,6 @@ typedef struct xfs_bmap_free
* from written to unwritten, otherwise convert from unwritten to written.
*/
#define XFS_BMAPI_CONVERT 0x040
-#define XFS_BMAPI_STACK_SWITCH 0x080
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
@@ -86,8 +85,7 @@ typedef struct xfs_bmap_free
{ XFS_BMAPI_PREALLOC, "PREALLOC" }, \
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \
{ XFS_BMAPI_CONTIG, "CONTIG" }, \
- { XFS_BMAPI_CONVERT, "CONVERT" }, \
- { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
+ { XFS_BMAPI_CONVERT, "CONVERT" }
static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 703b3ec1796c..64731ef3324d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -249,59 +249,6 @@ xfs_bmap_rtalloc(
}
/*
- * Stack switching interfaces for allocation
- */
-static void
-xfs_bmapi_allocate_worker(
- struct work_struct *work)
-{
- struct xfs_bmalloca *args = container_of(work,
- struct xfs_bmalloca, work);
- unsigned long pflags;
- unsigned long new_pflags = PF_FSTRANS;
-
- /*
- * we are in a transaction context here, but may also be doing work
- * in kswapd context, and hence we may need to inherit that state
- * temporarily to ensure that we don't block waiting for memory reclaim
- * in any way.
- */
- if (args->kswapd)
- new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
-
- current_set_flags_nested(&pflags, new_pflags);
-
- args->result = __xfs_bmapi_allocate(args);
- complete(args->done);
-
- current_restore_flags_nested(&pflags, new_pflags);
-}
-
-/*
- * Some allocation requests often come in with little stack to work on. Push
- * them off to a worker thread so there is lots of stack to use. Otherwise just
- * call directly to avoid the context switch overhead here.
- */
-int
-xfs_bmapi_allocate(
- struct xfs_bmalloca *args)
-{
- DECLARE_COMPLETION_ONSTACK(done);
-
- if (!args->stack_switch)
- return __xfs_bmapi_allocate(args);
-
-
- args->done = &done;
- args->kswapd = current_is_kswapd();
- INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
- queue_work(xfs_alloc_wq, &args->work);
- wait_for_completion(&done);
- destroy_work_on_stack(&args->work);
- return args->result;
-}
-
-/*
* Check if the endoff is outside the last extent. If so the caller will grow
* the allocation to a stripe unit boundary. All offsets are considered outside
* the end of file for an empty fork, so 1 is returned in *eof in that case.
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 075f72232a64..2fdb72d2c908 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -55,8 +55,6 @@ struct xfs_bmalloca {
bool userdata;/* set if is user data */
bool aeof; /* allocated space at eof */
bool conv; /* overwriting unwritten extents */
- bool stack_switch;
- bool kswapd; /* allocation in kswapd context */
int flags;
struct completion *done;
struct work_struct work;
@@ -66,8 +64,6 @@ struct xfs_bmalloca {
int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
int *committed);
int xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
-int xfs_bmapi_allocate(struct xfs_bmalloca *args);
-int __xfs_bmapi_allocate(struct xfs_bmalloca *args);
int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
int whichfork, int *eof);
int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index bf810c6baf2b..cf893bc1e373 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -33,6 +33,7 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
+#include "xfs_alloc.h"
/*
* Cursor allocation zone.
@@ -2323,7 +2324,7 @@ error1:
* record (to be inserted into parent).
*/
STATIC int /* error */
-xfs_btree_split(
+__xfs_btree_split(
struct xfs_btree_cur *cur,
int level,
union xfs_btree_ptr *ptrp,
@@ -2503,6 +2504,85 @@ error0:
return error;
}
+struct xfs_btree_split_args {
+ struct xfs_btree_cur *cur;
+ int level;
+ union xfs_btree_ptr *ptrp;
+ union xfs_btree_key *key;
+ struct xfs_btree_cur **curp;
+ int *stat; /* success/failure */
+ int result;
+ bool kswapd; /* allocation in kswapd context */
+ struct completion *done;
+ struct work_struct work;
+};
+
+/*
+ * Stack switching interfaces for allocation
+ */
+static void
+xfs_btree_split_worker(
+ struct work_struct *work)
+{
+ struct xfs_btree_split_args *args = container_of(work,
+ struct xfs_btree_split_args, work);
+ unsigned long pflags;
+ unsigned long new_pflags = PF_FSTRANS;
+
+ /*
+ * we are in a transaction context here, but may also be doing work
+ * in kswapd context, and hence we may need to inherit that state
+ * temporarily to ensure that we don't block waiting for memory reclaim
+ * in any way.
+ */
+ if (args->kswapd)
+ new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+
+ current_set_flags_nested(&pflags, new_pflags);
+
+ args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
+ args->key, args->curp, args->stat);
+ complete(args->done);
+
+ current_restore_flags_nested(&pflags, new_pflags);
+}
+
+/*
+ * BMBT split requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. For the other
+ * btree types, just call directly to avoid the context switch overhead here.
+ */
+STATIC int /* error */
+xfs_btree_split(
+ struct xfs_btree_cur *cur,
+ int level,
+ union xfs_btree_ptr *ptrp,
+ union xfs_btree_key *key,
+ struct xfs_btree_cur **curp,
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_split_args args;
+ DECLARE_COMPLETION_ONSTACK(done);
+
+ if (cur->bc_btnum != XFS_BTNUM_BMAP)
+ return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
+
+ args.cur = cur;
+ args.level = level;
+ args.ptrp = ptrp;
+ args.key = key;
+ args.curp = curp;
+ args.stat = stat;
+ args.done = &done;
+ args.kswapd = current_is_kswapd();
+ INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
+ queue_work(xfs_alloc_wq, &args.work);
+ wait_for_completion(&done);
+ destroy_work_on_stack(&args.work);
+ return args.result;
+}
+
+
/*
* Copy the old inode root contents into a real block and make the
* broot point to it.
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 6c5eb4c551e3..6d3ec2b6ee29 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -749,8 +749,7 @@ xfs_iomap_write_allocate(
* pointer that the caller gave to us.
*/
error = xfs_bmapi_write(tp, ip, map_start_fsb,
- count_fsb,
- XFS_BMAPI_STACK_SWITCH,
+ count_fsb, 0,
&first_block, 1,
imap, &nimaps, &free_list);
if (error)
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index c3453b11f563..7703fa6770ff 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -483,10 +483,16 @@ xfs_sb_quota_to_disk(
}
/*
- * GQUOTINO and PQUOTINO cannot be used together in versions
- * of superblock that do not have pquotino. from->sb_flags
- * tells us which quota is active and should be copied to
- * disk.
+ * GQUOTINO and PQUOTINO cannot be used together in versions of
+ * superblock that do not have pquotino. from->sb_flags tells us which
+ * quota is active and should be copied to disk. If neither are active,
+ * make sure we write NULLFSINO to the sb_gquotino field as a quota
+ * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature
+ * bit is set.
+ *
+ * Note that we don't need to handle the sb_uquotino or sb_pquotino here
+ * as they do not require any translation. Hence the main sb field loop
+ * will write them appropriately from the in-core superblock.
*/
if ((*fields & XFS_SB_GQUOTINO) &&
(from->sb_qflags & XFS_GQUOTA_ACCT))
@@ -494,6 +500,17 @@ xfs_sb_quota_to_disk(
else if ((*fields & XFS_SB_PQUOTINO) &&
(from->sb_qflags & XFS_PQUOTA_ACCT))
to->sb_gquotino = cpu_to_be64(from->sb_pquotino);
+ else {
+ /*
+ * We can't rely on just the fields being logged to tell us
+ * that it is safe to write NULLFSINO - we should only do that
+ * if quotas are not actually enabled. Hence only write
+ * NULLFSINO if both in-core quota inodes are NULL.
+ */
+ if (from->sb_gquotino == NULLFSINO &&
+ from->sb_pquotino == NULLFSINO)
+ to->sb_gquotino = cpu_to_be64(NULLFSINO);
+ }
*fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO);
}