diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/inode.c | 2 | ||||
-rw-r--r-- | fs/cifs/cifs_dfs_ref.c | 2 | ||||
-rw-r--r-- | fs/cifs/cifsencrypt.c | 2 | ||||
-rw-r--r-- | fs/cifs/connect.c | 3 | ||||
-rw-r--r-- | fs/direct-io.c | 2 | ||||
-rw-r--r-- | fs/efivarfs/file.c | 70 | ||||
-rw-r--r-- | fs/efivarfs/inode.c | 30 | ||||
-rw-r--r-- | fs/efivarfs/internal.h | 3 | ||||
-rw-r--r-- | fs/efivarfs/super.c | 16 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 7 | ||||
-rw-r--r-- | fs/ext4/crypto.c | 56 | ||||
-rw-r--r-- | fs/ext4/dir.c | 13 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 1 | ||||
-rw-r--r-- | fs/ext4/extents.c | 4 | ||||
-rw-r--r-- | fs/ext4/file.c | 9 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 6 | ||||
-rw-r--r-- | fs/ext4/inode.c | 72 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 2 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 2 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 15 | ||||
-rw-r--r-- | fs/ext4/namei.c | 26 | ||||
-rw-r--r-- | fs/ext4/resize.c | 2 | ||||
-rw-r--r-- | fs/fs-writeback.c | 15 | ||||
-rw-r--r-- | fs/inode.c | 6 | ||||
-rw-r--r-- | fs/nfs/blocklayout/extent_tree.c | 10 | ||||
-rw-r--r-- | fs/nfs/nfs42proc.c | 119 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 4 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 55 | ||||
-rw-r--r-- | fs/notify/mark.c | 53 |
29 files changed, 465 insertions, 142 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 151b7c71b868..d96f5cf38a2d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7986,6 +7986,7 @@ static void btrfs_endio_direct_read(struct bio *bio) kfree(dip); + dio_bio->bi_error = bio->bi_error; dio_end_io(dio_bio, bio->bi_error); if (io_bio->end_io) @@ -8040,6 +8041,7 @@ static void btrfs_endio_direct_write(struct bio *bio) kfree(dip); + dio_bio->bi_error = bio->bi_error; dio_end_io(dio_bio, bio->bi_error); bio_put(bio); } diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 7dc886c9a78f..e956cba94338 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -175,7 +175,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata, * string to the length of the original string to allow for worst case. */ md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN; - mountdata = kzalloc(md_len + 1, GFP_KERNEL); + mountdata = kzalloc(md_len + sizeof("ip=") + 1, GFP_KERNEL); if (mountdata == NULL) { rc = -ENOMEM; goto compose_mount_options_err; diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index afa09fce8151..e682b36a210f 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -714,7 +714,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL); if (!ses->auth_key.response) { - rc = ENOMEM; + rc = -ENOMEM; ses->auth_key.len = 0; goto setup_ntlmv2_rsp_ret; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4fbd92d2e113..a763cd3d9e7c 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2999,8 +2999,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server) if (ses_init_buf) { ses_init_buf->trailer.session_req.called_len = 32; - if (server->server_RFC1001_name && - server->server_RFC1001_name[0] != 0) + if (server->server_RFC1001_name[0] != 0) rfc1002mangle(ses_init_buf->trailer. session_req.called_name, server->server_RFC1001_name, diff --git a/fs/direct-io.c b/fs/direct-io.c index 1b2f7ffc8b84..d6a9012d42ad 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -472,8 +472,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) dio->io_error = -EIO; if (dio->is_async && dio->rw == READ && dio->should_dirty) { - bio_check_pages_dirty(bio); /* transfers ownership */ err = bio->bi_error; + bio_check_pages_dirty(bio); /* transfers ownership */ } else { bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index c424e4813ec8..d48e0d261d78 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -10,6 +10,7 @@ #include <linux/efi.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/mount.h> #include "internal.h" @@ -103,9 +104,78 @@ out_free: return size; } +static int +efivarfs_ioc_getxflags(struct file *file, void __user *arg) +{ + struct inode *inode = file->f_mapping->host; + unsigned int i_flags; + unsigned int flags = 0; + + i_flags = inode->i_flags; + if (i_flags & S_IMMUTABLE) + flags |= FS_IMMUTABLE_FL; + + if (copy_to_user(arg, &flags, sizeof(flags))) + return -EFAULT; + return 0; +} + +static int +efivarfs_ioc_setxflags(struct file *file, void __user *arg) +{ + struct inode *inode = file->f_mapping->host; + unsigned int flags; + unsigned int i_flags = 0; + int error; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (copy_from_user(&flags, arg, sizeof(flags))) + return -EFAULT; + + if (flags & ~FS_IMMUTABLE_FL) + return -EOPNOTSUPP; + + if (!capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + if (flags & FS_IMMUTABLE_FL) + i_flags |= S_IMMUTABLE; + + + error = mnt_want_write_file(file); + if (error) + return error; + + inode_lock(inode); + inode_set_flags(inode, i_flags, S_IMMUTABLE); + inode_unlock(inode); + + mnt_drop_write_file(file); + + return 0; +} + +long +efivarfs_file_ioctl(struct file *file, unsigned int cmd, unsigned long p) +{ + void __user *arg = (void __user *)p; + + switch (cmd) { + case FS_IOC_GETFLAGS: + return efivarfs_ioc_getxflags(file, arg); + case FS_IOC_SETFLAGS: + return efivarfs_ioc_setxflags(file, arg); + } + + return -ENOTTY; +} + const struct file_operations efivarfs_file_operations = { .open = simple_open, .read = efivarfs_file_read, .write = efivarfs_file_write, .llseek = no_llseek, + .unlocked_ioctl = efivarfs_file_ioctl, }; diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 3381b9da9ee6..e2ab6d0497f2 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -15,7 +15,8 @@ #include "internal.h" struct inode *efivarfs_get_inode(struct super_block *sb, - const struct inode *dir, int mode, dev_t dev) + const struct inode *dir, int mode, + dev_t dev, bool is_removable) { struct inode *inode = new_inode(sb); @@ -23,6 +24,7 @@ struct inode *efivarfs_get_inode(struct super_block *sb, inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_flags = is_removable ? 0 : S_IMMUTABLE; switch (mode & S_IFMT) { case S_IFREG: inode->i_fop = &efivarfs_file_operations; @@ -102,22 +104,17 @@ static void efivarfs_hex_to_guid(const char *str, efi_guid_t *guid) static int efivarfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - struct inode *inode; + struct inode *inode = NULL; struct efivar_entry *var; int namelen, i = 0, err = 0; + bool is_removable = false; if (!efivarfs_valid_name(dentry->d_name.name, dentry->d_name.len)) return -EINVAL; - inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0); - if (!inode) - return -ENOMEM; - var = kzalloc(sizeof(struct efivar_entry), GFP_KERNEL); - if (!var) { - err = -ENOMEM; - goto out; - } + if (!var) + return -ENOMEM; /* length of the variable name itself: remove GUID and separator */ namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1; @@ -125,6 +122,16 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, efivarfs_hex_to_guid(dentry->d_name.name + namelen + 1, &var->var.VendorGuid); + if (efivar_variable_is_removable(var->var.VendorGuid, + dentry->d_name.name, namelen)) + is_removable = true; + + inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0, is_removable); + if (!inode) { + err = -ENOMEM; + goto out; + } + for (i = 0; i < namelen; i++) var->var.VariableName[i] = dentry->d_name.name[i]; @@ -138,7 +145,8 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, out: if (err) { kfree(var); - iput(inode); + if (inode) + iput(inode); } return err; } diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h index b5ff16addb7c..b4505188e799 100644 --- a/fs/efivarfs/internal.h +++ b/fs/efivarfs/internal.h @@ -15,7 +15,8 @@ extern const struct file_operations efivarfs_file_operations; extern const struct inode_operations efivarfs_dir_inode_operations; extern bool efivarfs_valid_name(const char *str, int len); extern struct inode *efivarfs_get_inode(struct super_block *sb, - const struct inode *dir, int mode, dev_t dev); + const struct inode *dir, int mode, dev_t dev, + bool is_removable); extern struct list_head efivarfs_list; diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index b8a564f29107..dd029d13ea61 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -118,8 +118,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, struct dentry *dentry, *root = sb->s_root; unsigned long size = 0; char *name; - int len, i; + int len; int err = -ENOMEM; + bool is_removable = false; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) @@ -128,15 +129,17 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, memcpy(entry->var.VariableName, name16, name_size); memcpy(&(entry->var.VendorGuid), &vendor, sizeof(efi_guid_t)); - len = ucs2_strlen(entry->var.VariableName); + len = ucs2_utf8size(entry->var.VariableName); /* name, plus '-', plus GUID, plus NUL*/ name = kmalloc(len + 1 + EFI_VARIABLE_GUID_LEN + 1, GFP_KERNEL); if (!name) goto fail; - for (i = 0; i < len; i++) - name[i] = entry->var.VariableName[i] & 0xFF; + ucs2_as_utf8(name, entry->var.VariableName, len); + + if (efivar_variable_is_removable(entry->var.VendorGuid, name, len)) + is_removable = true; name[len] = '-'; @@ -144,7 +147,8 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, name[len + EFI_VARIABLE_GUID_LEN+1] = '\0'; - inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0); + inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0, + is_removable); if (!inode) goto fail_name; @@ -200,7 +204,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_d_op = &efivarfs_d_ops; sb->s_time_gran = 1; - inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0); + inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0, true); if (!inode) return -ENOMEM; inode->i_op = &efivarfs_dir_inode_operations; diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index ec0668a60678..fe1f50fe764f 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -191,7 +191,6 @@ static int ext4_init_block_bitmap(struct super_block *sb, /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { - ext4_error(sb, "Checksum bad for group %u", block_group); grp = ext4_get_group_info(sb, block_group); if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) percpu_counter_sub(&sbi->s_freeclusters_counter, @@ -442,14 +441,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) } ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); - if (err) + if (err) { + ext4_error(sb, "Failed to init block bitmap for group " + "%u: %d", block_group, err); goto out; + } goto verify; } ext4_unlock_group(sb, block_group); diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index c8021208a7eb..38f7562489bb 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -467,3 +467,59 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size) return size; return 0; } + +/* + * Validate dentries for encrypted directories to make sure we aren't + * potentially caching stale data after a key has been added or + * removed. + */ +static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct inode *dir = d_inode(dentry->d_parent); + struct ext4_crypt_info *ci = EXT4_I(dir)->i_crypt_info; + int dir_has_key, cached_with_key; + + if (!ext4_encrypted_inode(dir)) + return 0; + + if (ci && ci->ci_keyring_key && + (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED) | + (1 << KEY_FLAG_DEAD)))) + ci = NULL; + + /* this should eventually be an flag in d_flags */ + cached_with_key = dentry->d_fsdata != NULL; + dir_has_key = (ci != NULL); + + /* + * If the dentry was cached without the key, and it is a + * negative dentry, it might be a valid name. We can't check + * if the key has since been made available due to locking + * reasons, so we fail the validation so ext4_lookup() can do + * this check. + * + * We also fail the validation if the dentry was created with + * the key present, but we no longer have the key, or vice versa. + */ + if ((!cached_with_key && d_is_negative(dentry)) || + (!cached_with_key && dir_has_key) || + (cached_with_key && !dir_has_key)) { +#if 0 /* Revalidation debug */ + char buf[80]; + char *cp = simple_dname(dentry, buf, sizeof(buf)); + + if (IS_ERR(cp)) + cp = (char *) "???"; + pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata, + cached_with_key, d_is_negative(dentry), + dir_has_key); +#endif + return 0; + } + return 1; +} + +const struct dentry_operations ext4_encrypted_d_ops = { + .d_revalidate = ext4_d_revalidate, +}; diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 1d1bca74f844..33f5e2a50cf8 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -111,6 +111,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) int dir_has_error = 0; struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; + if (ext4_encrypted_inode(inode)) { + err = ext4_get_encryption_info(inode); + if (err && err != -ENOKEY) + return err; + } + if (is_dx_dir(inode)) { err = ext4_dx_readdir(file, ctx); if (err != ERR_BAD_DX_DIR) { @@ -157,8 +163,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) index, 1); file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; bh = ext4_bread(NULL, inode, map.m_lblk, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + bh = NULL; + goto errout; + } } if (!bh) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0662b285dc8a..157b458a69d4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2302,6 +2302,7 @@ struct page *ext4_encrypt(struct inode *inode, int ext4_decrypt(struct page *page); int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, ext4_lblk_t len); +extern const struct dentry_operations ext4_encrypted_d_ops; #ifdef CONFIG_EXT4_FS_ENCRYPTION int ext4_init_crypto(void); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0ffabaf90aa5..3753ceb0b0dd 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3928,7 +3928,7 @@ static int convert_initialized_extent(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, struct ext4_ext_path **ppath, int flags, - unsigned int allocated, ext4_fsblk_t newblock) + unsigned int allocated) { struct ext4_ext_path *path = *ppath; struct ext4_extent *ex; @@ -4347,7 +4347,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { allocated = convert_initialized_extent( handle, inode, map, &path, - flags, allocated, newblock); + flags, allocated); goto out2; } else if (!ext4_ext_is_unwritten(ex)) goto out; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1126436dada1..474f1a4d2ca8 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -350,6 +350,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) struct super_block *sb = inode->i_sb; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct vfsmount *mnt = filp->f_path.mnt; + struct inode *dir = filp->f_path.dentry->d_parent->d_inode; struct path path; char buf[64], *cp; int ret; @@ -393,6 +394,14 @@ static int ext4_file_open(struct inode * inode, struct file * filp) if (ext4_encryption_info(inode) == NULL) return -ENOKEY; } + if (ext4_encrypted_inode(dir) && + !ext4_is_child_context_consistent_with_parent(dir, inode)) { + ext4_warning(inode->i_sb, + "Inconsistent encryption contexts: %lu/%lu\n", + (unsigned long) dir->i_ino, + (unsigned long) inode->i_ino); + return -EPERM; + } /* * Set up the jbd2_inode if we are opening the inode for * writing and the journal is present diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 3fcfd50a2e8a..acc0ad56bf2f 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -76,7 +76,6 @@ static int ext4_init_inode_bitmap(struct super_block *sb, /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { - ext4_error(sb, "Checksum bad for group %u", block_group); grp = ext4_get_group_info(sb, block_group); if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) percpu_counter_sub(&sbi->s_freeclusters_counter, @@ -191,8 +190,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); - if (err) + if (err) { + ext4_error(sb, "Failed to init inode bitmap for group " + "%u: %d", block_group, err); goto out; + } return bh; } ext4_unlock_group(sb, block_group); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 83bc8bfb3bea..9cc57c3b4661 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -686,6 +686,34 @@ out_sem: return retval; } +/* + * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages + * we have to be careful as someone else may be manipulating b_state as well. + */ +static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags) +{ + unsigned long old_state; + unsigned long new_state; + + flags &= EXT4_MAP_FLAGS; + + /* Dummy buffer_head? Set non-atomically. */ + if (!bh->b_page) { + bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags; + return; + } + /* + * Someone else may be modifying b_state. Be careful! This is ugly but + * once we get rid of using bh as a container for mapping information + * to pass to / from get_block functions, this can go away. + */ + do { + old_state = READ_ONCE(bh->b_state); + new_state = (old_state & ~EXT4_MAP_FLAGS) | flags; + } while (unlikely( + cmpxchg(&bh->b_state, old_state, new_state) != old_state)); +} + /* Maximum number of blocks we map for direct IO at once. */ #define DIO_MAX_BLOCKS 4096 @@ -722,7 +750,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, ext4_io_end_t *io_end = ext4_inode_aio(inode); map_bh(bh, inode->i_sb, map.m_pblk); - bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + ext4_update_bh_state(bh, map.m_flags); if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) set_buffer_defer_completion(bh); bh->b_size = inode->i_sb->s_blocksize * map.m_len; @@ -1685,7 +1713,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, return ret; map_bh(bh, inode->i_sb, map.m_pblk); - bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + ext4_update_bh_state(bh, map.m_flags); if (buffer_unwritten(bh)) { /* A delayed write to unwritten bh should be marked @@ -3253,29 +3281,29 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * case, we allocate an io_end structure to hook to the iocb. */ iocb->private = NULL; - ext4_inode_aio_set(inode, NULL); - if (!is_sync_kiocb(iocb)) { - io_end = ext4_init_io_end(inode, GFP_NOFS); - if (!io_end) { - ret = -ENOMEM; - goto retake_lock; - } - /* - * Grab reference for DIO. Will be dropped in ext4_end_io_dio() - */ - iocb->private = ext4_get_io_end(io_end); - /* - * we save the io structure for current async direct - * IO, so that later ext4_map_blocks() could flag the - * io structure whether there is a unwritten extents - * needs to be converted when IO is completed. - */ - ext4_inode_aio_set(inode, io_end); - } - if (overwrite) { get_block_func = ext4_get_block_overwrite; } else { + ext4_inode_aio_set(inode, NULL); + if (!is_sync_kiocb(iocb)) { + io_end = ext4_init_io_end(inode, GFP_NOFS); + if (!io_end) { + ret = -ENOMEM; + goto retake_lock; + } + /* + * Grab reference for DIO. Will be dropped in + * ext4_end_io_dio() + */ + iocb->private = ext4_get_io_end(io_end); + /* + * we save the io structure for current async direct + * IO, so that later ext4_map_blocks() could flag the + * io structure whether there is a unwritten extents + * needs to be converted when IO is completed. + */ + ext4_inode_aio_set(inode, io_end); + } get_block_func = ext4_get_block_write; dio_flags = DIO_LOCKING; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 0f6c36922c24..a99b010e2194 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -208,7 +208,7 @@ static int ext4_ioctl_setflags(struct inode *inode, { struct ext4_inode_info *ei = EXT4_I(inode); handle_t *handle = NULL; - int err = EPERM, migrate = 0; + int err = -EPERM, migrate = 0; struct ext4_iloc iloc; unsigned int oldflags, mask, i; unsigned int jflag; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 61eaf74dca37..4424b7bf8ac6 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2285,7 +2285,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) if (group == 0) seq_puts(seq, "#group: free frags first [" " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 " - " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]"); + " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n"); i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + sizeof(struct ext4_group_info); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index fb6f11709ae6..e032a0423e35 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -265,11 +265,12 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, ext4_lblk_t orig_blk_offset, donor_blk_offset; unsigned long blocksize = orig_inode->i_sb->s_blocksize; unsigned int tmp_data_size, data_size, replaced_size; - int err2, jblocks, retries = 0; + int i, err2, jblocks, retries = 0; int replaced_count = 0; int from = data_offset_in_page << orig_inode->i_blkbits; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; struct super_block *sb = orig_inode->i_sb; + struct buffer_head *bh = NULL; /* * It needs twice the amount of ordinary journal buffers because @@ -380,8 +381,16 @@ data_copy: } /* Perform all necessary steps similar write_begin()/write_end() * but keeping in mind that i_size will not change */ - *err = __block_write_begin(pagep[0], from, replaced_size, - ext4_get_block); + if (!page_has_buffers(pagep[0])) + create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0); + bh = page_buffers(pagep[0]); + for (i = 0; i < data_offset_in_page; i++) + bh = bh->b_this_page; + for (i = 0; i < block_len_in_page; i++) { + *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0); + if (*err < 0) + break; + } if (!*err) *err = block_commit_write(pagep[0], from, from + replaced_size); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 06574dd77614..48e4b8907826 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1558,6 +1558,24 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi struct ext4_dir_entry_2 *de; struct buffer_head *bh; + if (ext4_encrypted_inode(dir)) { + int res = ext4_get_encryption_info(dir); + + /* + * This should be a properly defined flag for + * dentry->d_flags when we uplift this to the VFS. + * d_fsdata is set to (void *) 1 if if the dentry is + * created while the directory was encrypted and we + * don't have access to the key. + */ + dentry->d_fsdata = NULL; + if (ext4_encryption_info(dir)) + dentry->d_fsdata = (void *) 1; + d_set_d_op(dentry, &ext4_encrypted_d_ops); + if (res && res != -ENOKEY) + return ERR_PTR(res); + } + if (dentry->d_name.len > EXT4_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); @@ -1585,11 +1603,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi return ERR_PTR(-EFSCORRUPTED); } if (!IS_ERR(inode) && ext4_encrypted_inode(dir) && - (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) && + (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !ext4_is_child_context_consistent_with_parent(dir, inode)) { + int nokey = ext4_encrypted_inode(inode) && + !ext4_encryption_info(inode); + iput(inode); + if (nokey) + return ERR_PTR(-ENOKEY); ext4_warning(inode->i_sb, "Inconsistent encryption contexts: %lu/%lu\n", (unsigned long) dir->i_ino, diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ad62d7acc315..34038e3598d5 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -198,7 +198,7 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) if (flex_gd == NULL) goto out3; - if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) + if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data)) goto out2; flex_gd->count = flexbg_size; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 6915c950e6e8..1f76d8950a57 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -317,6 +317,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) struct inode_switch_wbs_context *isw = container_of(work, struct inode_switch_wbs_context, work); struct inode *inode = isw->inode; + struct super_block *sb = inode->i_sb; struct address_space *mapping = inode->i_mapping; struct bdi_writeback *old_wb = inode->i_wb; struct bdi_writeback *new_wb = isw->new_wb; @@ -423,6 +424,7 @@ skip_switch: wb_put(new_wb); iput(inode); + deactivate_super(sb); kfree(isw); } @@ -469,11 +471,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) /* while holding I_WB_SWITCH, no one else can update the association */ spin_lock(&inode->i_lock); + if (inode->i_state & (I_WB_SWITCH | I_FREEING) || - inode_to_wb(inode) == isw->new_wb) { - spin_unlock(&inode->i_lock); - goto out_free; - } + inode_to_wb(inode) == isw->new_wb) + goto out_unlock; + + if (!atomic_inc_not_zero(&inode->i_sb->s_active)) + goto out_unlock; + inode->i_state |= I_WB_SWITCH; spin_unlock(&inode->i_lock); @@ -489,6 +494,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); return; +out_unlock: + spin_unlock(&inode->i_lock); out_free: if (isw->new_wb) wb_put(isw->new_wb); diff --git a/fs/inode.c b/fs/inode.c index 9f62db3bcc3e..69b8b526c194 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -154,6 +154,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_rdev = 0; inode->dirtied_when = 0; +#ifdef CONFIG_CGROUP_WRITEBACK + inode->i_wb_frn_winner = 0; + inode->i_wb_frn_avg_time = 0; + inode->i_wb_frn_history = 0; +#endif + if (security_inode_alloc(inode)) goto out; spin_lock_init(&inode->i_lock); diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index c59a59c37f3d..35ab51c04814 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -476,6 +476,7 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg, for (i = 0; i < nr_pages; i++) put_page(arg->layoutupdate_pages[i]); + vfree(arg->start_p); kfree(arg->layoutupdate_pages); } else { put_page(arg->layoutupdate_page); @@ -559,10 +560,15 @@ retry: if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) { void *p = start_p, *end = p + arg->layoutupdate_len; + struct page *page = NULL; int i = 0; - for ( ; p < end; p += PAGE_SIZE) - arg->layoutupdate_pages[i++] = vmalloc_to_page(p); + arg->start_p = start_p; + for ( ; p < end; p += PAGE_SIZE) { + page = vmalloc_to_page(p); + arg->layoutupdate_pages[i++] = page; + get_page(page); + } } dprintk("%s found %zu ranges\n", __func__, count); diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index bd25dc7077f7..dff83460e5a6 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -16,29 +16,8 @@ #define NFSDBG_FACILITY NFSDBG_PROC -static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, - fmode_t fmode) -{ - struct nfs_open_context *open; - struct nfs_lock_context *lock; - int ret; - - open = get_nfs_open_context(nfs_file_open_context(file)); - lock = nfs_get_lock_context(open); - if (IS_ERR(lock)) { - put_nfs_open_context(open); - return PTR_ERR(lock); - } - - ret = nfs4_set_rw_stateid(dst, open, lock, fmode); - - nfs_put_lock_context(lock); - put_nfs_open_context(open); - return ret; -} - static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, - loff_t offset, loff_t len) + struct nfs_lock_context *lock, loff_t offset, loff_t len) { struct inode *inode = file_inode(filep); struct nfs_server *server = NFS_SERVER(inode); @@ -56,7 +35,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, msg->rpc_argp = &args; msg->rpc_resp = &res; - status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE); + status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context, + lock, FMODE_WRITE); if (status) return status; @@ -78,15 +58,26 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, { struct nfs_server *server = NFS_SERVER(file_inode(filep)); struct nfs4_exception exception = { }; + struct nfs_lock_context *lock; int err; + lock = nfs_get_lock_context(nfs_file_open_context(filep)); + if (IS_ERR(lock)) + return PTR_ERR(lock); + + exception.inode = file_inode(filep); + exception.state = lock->open_context->state; + do { - err = _nfs42_proc_fallocate(msg, filep, offset, len); - if (err == -ENOTSUPP) - return -EOPNOTSUPP; + err = _nfs42_proc_fallocate(msg, filep, lock, offset, len); + if (err == -ENOTSUPP) { + err = -EOPNOTSUPP; + break; + } err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); + nfs_put_lock_context(lock); return err; } @@ -135,7 +126,8 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) return err; } -static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) +static loff_t _nfs42_proc_llseek(struct file *filep, + struct nfs_lock_context *lock, loff_t offset, int whence) { struct inode *inode = file_inode(filep); struct nfs42_seek_args args = { @@ -156,7 +148,8 @@ static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) if (!nfs_server_capable(inode, NFS_CAP_SEEK)) return -ENOTSUPP; - status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ); + status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context, + lock, FMODE_READ); if (status) return status; @@ -175,17 +168,28 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) { struct nfs_server *server = NFS_SERVER(file_inode(filep)); struct nfs4_exception exception = { }; + struct nfs_lock_context *lock; loff_t err; + lock = nfs_get_lock_context(nfs_file_open_context(filep)); + if (IS_ERR(lock)) + return PTR_ERR(lock); + + exception.inode = file_inode(filep); + exception.state = lock->open_context->state; + do { - err = _nfs42_proc_llseek(filep, offset, whence); + err = _nfs42_proc_llseek(filep, lock, offset, whence); if (err >= 0) break; - if (err == -ENOTSUPP) - return -EOPNOTSUPP; + if (err == -ENOTSUPP) { + err = -EOPNOTSUPP; + break; + } err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); + nfs_put_lock_context(lock); return err; } @@ -298,8 +302,9 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, } static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, - struct file *dst_f, loff_t src_offset, - loff_t dst_offset, loff_t count) + struct file *dst_f, struct nfs_lock_context *src_lock, + struct nfs_lock_context *dst_lock, loff_t src_offset, + loff_t dst_offset, loff_t count) { struct inode *src_inode = file_inode(src_f); struct inode *dst_inode = file_inode(dst_f); @@ -320,11 +325,13 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, msg->rpc_argp = &args; msg->rpc_resp = &res; - status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ); + status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context, + src_lock, FMODE_READ); if (status) return status; - status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE); + status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context, + dst_lock, FMODE_WRITE); if (status) return status; @@ -349,22 +356,48 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f, }; struct inode *inode = file_inode(src_f); struct nfs_server *server = NFS_SERVER(file_inode(src_f)); - struct nfs4_exception exception = { }; - int err; + struct nfs_lock_context *src_lock; + struct nfs_lock_context *dst_lock; + struct nfs4_exception src_exception = { }; + struct nfs4_exception dst_exception = { }; + int err, err2; if (!nfs_server_capable(inode, NFS_CAP_CLONE)) return -EOPNOTSUPP; + src_lock = nfs_get_lock_context(nfs_file_open_context(src_f)); + if (IS_ERR(src_lock)) + return PTR_ERR(src_lock); + + src_exception.inode = file_inode(src_f); + src_exception.state = src_lock->open_context->state; + + dst_lock = nfs_get_lock_context(nfs_file_open_context(dst_f)); + if (IS_ERR(dst_lock)) { + err = PTR_ERR(dst_lock); + goto out_put_src_lock; + } + + dst_exception.inode = file_inode(dst_f); + dst_exception.state = dst_lock->open_context->state; + do { - err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset, - dst_offset, count); + err = _nfs42_proc_clone(&msg, src_f, dst_f, src_lock, dst_lock, + src_offset, dst_offset, count); if (err == -ENOTSUPP || err == -EOPNOTSUPP) { NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE; - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + break; } - err = nfs4_handle_exception(server, err, &exception); - } while (exception.retry); - return err; + err2 = nfs4_handle_exception(server, err, &src_exception); + err = nfs4_handle_exception(server, err, &dst_exception); + if (!err) + err = err2; + } while (src_exception.retry || dst_exception.retry); + nfs_put_lock_context(dst_lock); +out_put_src_lock: + nfs_put_lock_context(src_lock); + return err; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4bfc33ad0563..14881594dd07 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2466,9 +2466,9 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, dentry = d_add_unique(dentry, igrab(state->inode)); if (dentry == NULL) { dentry = opendata->dentry; - } else if (dentry != ctx->dentry) { + } else { dput(ctx->dentry); - ctx->dentry = dget(dentry); + ctx->dentry = dentry; } nfs_set_verifier(dentry, nfs_save_change_attribute(d_inode(opendata->dir))); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 482b6e94bb37..2fa483e6dbe2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -252,6 +252,27 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) } } +/* + * Mark a pnfs_layout_hdr and all associated layout segments as invalid + * + * In order to continue using the pnfs_layout_hdr, a full recovery + * is required. + * Note that caller must hold inode->i_lock. + */ +static int +pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, + struct list_head *lseg_list) +{ + struct pnfs_layout_range range = { + .iomode = IOMODE_ANY, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + + set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); + return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range); +} + static int pnfs_iomode_to_fail_bit(u32 iomode) { @@ -554,9 +575,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) spin_lock(&nfsi->vfs_inode.i_lock); lo = nfsi->layout; if (lo) { - lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ - pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); pnfs_get_layout_hdr(lo); + pnfs_mark_layout_stateid_invalid(lo, &tmp_list); pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); spin_unlock(&nfsi->vfs_inode.i_lock); @@ -617,11 +637,6 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, { struct pnfs_layout_hdr *lo; struct inode *inode; - struct pnfs_layout_range range = { - .iomode = IOMODE_ANY, - .offset = 0, - .length = NFS4_MAX_UINT64, - }; LIST_HEAD(lseg_list); int ret = 0; @@ -636,11 +651,11 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, spin_lock(&inode->i_lock); list_del_init(&lo->plh_bulk_destroy); - lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ - if (is_bulk_recall) - set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); - if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range)) + if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { + if (is_bulk_recall) + set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); ret = -EAGAIN; + } spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&lseg_list); /* Free all lsegs that are attached to commit buckets */ @@ -1738,8 +1753,19 @@ pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode) if (lo->plh_return_iomode != 0) iomode = IOMODE_ANY; lo->plh_return_iomode = iomode; + set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); } +/** + * pnfs_mark_matching_lsegs_return - Free or return matching layout segments + * @lo: pointer to layout header + * @tmp_list: list header to be used with pnfs_free_lseg_list() + * @return_range: describe layout segment ranges to be returned + * + * This function is mainly intended for use by layoutrecall. It attempts + * to free the layout segment immediately, or else to mark it for return + * as soon as its reference count drops to zero. + */ int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, @@ -1762,12 +1788,11 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, lseg, lseg->pls_range.iomode, lseg->pls_range.offset, lseg->pls_range.length); + if (mark_lseg_invalid(lseg, tmp_list)) + continue; + remaining++; set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); pnfs_set_plh_return_iomode(lo, return_range->iomode); - if (!mark_lseg_invalid(lseg, tmp_list)) - remaining++; - set_bit(NFS_LAYOUT_RETURN_REQUESTED, - &lo->plh_flags); } return remaining; } diff --git a/fs/notify/mark.c b/fs/notify/mark.c index cfcbf114676e..7115c5d7d373 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -91,7 +91,14 @@ #include <linux/fsnotify_backend.h> #include "fsnotify.h" +#define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ + struct srcu_struct fsnotify_mark_srcu; +static DEFINE_SPINLOCK(destroy_lock); +static LIST_HEAD(destroy_list); + +static void fsnotify_mark_destroy(struct work_struct *work); +static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy); void fsnotify_get_mark(struct fsnotify_mark *mark) { @@ -165,19 +172,10 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) atomic_dec(&group->num_marks); } -static void -fsnotify_mark_free_rcu(struct rcu_head *rcu) -{ - struct fsnotify_mark *mark; - - mark = container_of(rcu, struct fsnotify_mark, g_rcu); - fsnotify_put_mark(mark); -} - /* - * Free fsnotify mark. The freeing is actually happening from a call_srcu - * callback. Caller must have a reference to the mark or be protected by - * fsnotify_mark_srcu. + * Free fsnotify mark. The freeing is actually happening from a kthread which + * first waits for srcu period end. Caller must have a reference to the mark + * or be protected by fsnotify_mark_srcu. */ void fsnotify_free_mark(struct fsnotify_mark *mark) { @@ -192,7 +190,11 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; spin_unlock(&mark->lock); - call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); + spin_lock(&destroy_lock); + list_add(&mark->g_list, &destroy_list); + spin_unlock(&destroy_lock); + queue_delayed_work(system_unbound_wq, &reaper_work, + FSNOTIFY_REAPER_DELAY); /* * Some groups like to know that marks are being freed. This is a @@ -388,7 +390,12 @@ err: spin_unlock(&mark->lock); - call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); + spin_lock(&destroy_lock); + list_add(&mark->g_list, &destroy_list); + spin_unlock(&destroy_lock); + queue_delayed_work(system_unbound_wq, &reaper_work, + FSNOTIFY_REAPER_DELAY); + return ret; } @@ -491,3 +498,21 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, atomic_set(&mark->refcnt, 1); mark->free_mark = free_mark; } + +static void fsnotify_mark_destroy(struct work_struct *work) +{ + struct fsnotify_mark *mark, *next; + struct list_head private_destroy_list; + + spin_lock(&destroy_lock); + /* exchange the list head */ + list_replace_init(&destroy_list, &private_destroy_list); + spin_unlock(&destroy_lock); + + synchronize_srcu(&fsnotify_mark_srcu); + + list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { + list_del_init(&mark->g_list); + fsnotify_put_mark(mark); + } +} |