summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c3
-rw-r--r--fs/btrfs/block-group.c40
-rw-r--r--fs/btrfs/block-group.h4
-rw-r--r--fs/btrfs/compression.c8
-rw-r--r--fs/btrfs/disk-io.c5
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/inode.c9
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/cachefiles/namei.c33
-rw-r--r--fs/cachefiles/xattr.c2
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/fscache/Kconfig3
-rw-r--r--fs/fscache/cache.c2
-rw-r--r--fs/fscache/cookie.c4
-rw-r--r--fs/fscache/internal.h4
-rw-r--r--fs/fscache/io.c5
-rw-r--r--fs/io-wq.h1
-rw-r--r--fs/io_uring.c617
-rw-r--r--fs/namei.c22
-rw-r--r--fs/nfs/Kconfig4
-rw-r--r--fs/nfs/dir.c19
-rw-r--r--fs/nfs/inode.c1
-rw-r--r--fs/nfs/internal.h10
-rw-r--r--fs/nfs/nfs42xattr.c2
-rw-r--r--fs/nfs/nfs4file.c6
-rw-r--r--fs/nfs/nfs4proc.c2
-rw-r--r--fs/nfs/unlink.c1
-rw-r--r--fs/nfsd/filecache.c25
-rw-r--r--fs/nfsd/nfs2acl.c24
-rw-r--r--fs/stat.c19
-rw-r--r--fs/sysfs/file.c13
32 files changed, 352 insertions, 543 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 6bcf1475511b..4763132ca57e 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -616,8 +616,7 @@ static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping,
_debug("write discard %x @%llx [%llx]", len, start, i_size);
/* The dirty region was entirely beyond the EOF. */
- fscache_clear_page_bits(afs_vnode_cache(vnode),
- mapping, start, len, caching);
+ fscache_clear_page_bits(mapping, start, len, caching);
afs_pages_written_back(vnode, start, len);
ret = 0;
}
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index c22d287e020b..0dd6de994199 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2503,12 +2503,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
return ERR_PTR(ret);
}
- /*
- * New block group is likely to be used soon. Try to activate it now.
- * Failure is OK for now.
- */
- btrfs_zone_activate(cache);
-
ret = exclude_super_stripes(cache);
if (ret) {
/* We may have excluded something, so call this just in case */
@@ -2946,7 +2940,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
struct btrfs_path *path = NULL;
LIST_HEAD(dirty);
struct list_head *io = &cur_trans->io_bgs;
- int num_started = 0;
int loops = 0;
spin_lock(&cur_trans->dirty_bgs_lock);
@@ -3012,7 +3005,6 @@ again:
cache->io_ctl.inode = NULL;
ret = btrfs_write_out_cache(trans, cache, path);
if (ret == 0 && cache->io_ctl.inode) {
- num_started++;
should_put = 0;
/*
@@ -3113,7 +3105,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
int should_put;
struct btrfs_path *path;
struct list_head *io = &cur_trans->io_bgs;
- int num_started = 0;
path = btrfs_alloc_path();
if (!path)
@@ -3171,7 +3162,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
cache->io_ctl.inode = NULL;
ret = btrfs_write_out_cache(trans, cache, path);
if (ret == 0 && cache->io_ctl.inode) {
- num_started++;
should_put = 0;
list_add_tail(&cache->io_list, io);
} else {
@@ -3455,7 +3445,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
}
-static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
+static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
{
struct btrfs_block_group *bg;
int ret;
@@ -3542,7 +3532,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
out:
btrfs_trans_release_chunk_metadata(trans);
- return ret;
+ if (ret)
+ return ERR_PTR(ret);
+
+ btrfs_get_block_group(bg);
+ return bg;
}
/*
@@ -3657,10 +3651,17 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *space_info;
+ struct btrfs_block_group *ret_bg;
bool wait_for_alloc = false;
bool should_alloc = false;
+ bool from_extent_allocation = false;
int ret = 0;
+ if (force == CHUNK_ALLOC_FORCE_FOR_EXTENT) {
+ from_extent_allocation = true;
+ force = CHUNK_ALLOC_FORCE;
+ }
+
/* Don't re-enter if we're already allocating a chunk */
if (trans->allocating_chunk)
return -ENOSPC;
@@ -3750,9 +3751,22 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
force_metadata_allocation(fs_info);
}
- ret = do_chunk_alloc(trans, flags);
+ ret_bg = do_chunk_alloc(trans, flags);
trans->allocating_chunk = false;
+ if (IS_ERR(ret_bg)) {
+ ret = PTR_ERR(ret_bg);
+ } else if (from_extent_allocation) {
+ /*
+ * New block group is likely to be used soon. Try to activate
+ * it now. Failure is OK for now.
+ */
+ btrfs_zone_activate(ret_bg);
+ }
+
+ if (!ret)
+ btrfs_put_block_group(ret_bg);
+
spin_lock(&space_info->lock);
if (ret < 0) {
if (ret == -ENOSPC)
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 93aabc68bb6a..e8308f2ad07d 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -35,11 +35,15 @@ enum btrfs_discard_state {
* the FS with empty chunks
*
* CHUNK_ALLOC_FORCE means it must try to allocate one
+ *
+ * CHUNK_ALLOC_FORCE_FOR_EXTENT like CHUNK_ALLOC_FORCE but called from
+ * find_free_extent() that also activaes the zone
*/
enum btrfs_chunk_alloc_enum {
CHUNK_ALLOC_NO_FORCE,
CHUNK_ALLOC_LIMITED,
CHUNK_ALLOC_FORCE,
+ CHUNK_ALLOC_FORCE_FOR_EXTENT,
};
struct btrfs_caching_control {
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index be476f094300..19bf36d8ffea 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -537,6 +537,9 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
cb->orig_bio = NULL;
cb->nr_pages = nr_pages;
+ if (blkcg_css)
+ kthread_associate_blkcg(blkcg_css);
+
while (cur_disk_bytenr < disk_start + compressed_len) {
u64 offset = cur_disk_bytenr - disk_start;
unsigned int index = offset >> PAGE_SHIFT;
@@ -555,6 +558,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
bio = NULL;
goto finish_cb;
}
+ if (blkcg_css)
+ bio->bi_opf |= REQ_CGROUP_PUNT;
}
/*
* We should never reach next_stripe_start start as we will
@@ -612,6 +617,9 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
return 0;
finish_cb:
+ if (blkcg_css)
+ kthread_associate_blkcg(NULL);
+
if (bio) {
bio->bi_status = ret;
bio_endio(bio);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b30309f187cf..126f244cdf88 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1850,9 +1850,10 @@ again:
ret = btrfs_insert_fs_root(fs_info, root);
if (ret) {
- btrfs_put_root(root);
- if (ret == -EEXIST)
+ if (ret == -EEXIST) {
+ btrfs_put_root(root);
goto again;
+ }
goto fail;
}
return root;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f477035a2ac2..6aa92f84f465 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4082,7 +4082,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
}
ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
- CHUNK_ALLOC_FORCE);
+ CHUNK_ALLOC_FORCE_FOR_EXTENT);
/* Do not bail out on ENOSPC since we can do more. */
if (ret == -ENOSPC)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 17d5557f98ec..5082b9c70f8c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2016,8 +2016,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
* to use run_delalloc_nocow() here, like for regular
* preallocated inodes.
*/
- ASSERT(!zoned ||
- (zoned && btrfs_is_data_reloc_root(inode->root)));
+ ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, nr_written);
} else if (!inode_can_compress(inode) ||
@@ -7444,6 +7443,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
u64 block_start, orig_start, orig_block_len, ram_bytes;
bool can_nocow = false;
bool space_reserved = false;
+ u64 prev_len;
int ret = 0;
/*
@@ -7471,6 +7471,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
can_nocow = true;
}
+ prev_len = len;
if (can_nocow) {
struct extent_map *em2;
@@ -7500,8 +7501,6 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
goto out;
}
} else {
- const u64 prev_len = len;
-
/* Our caller expects us to free the input extent map. */
free_extent_map(em);
*map = NULL;
@@ -7532,7 +7531,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
* We have created our ordered extent, so we can now release our reservation
* for an outstanding extent.
*/
- btrfs_delalloc_release_extents(BTRFS_I(inode), len);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), prev_len);
/*
* Need to update the i_size under the extent lock so buffered
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f46e71061942..be6c24577dbe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5456,8 +5456,6 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_fs_info(fs_info, argp);
case BTRFS_IOC_DEV_INFO:
return btrfs_ioctl_dev_info(fs_info, argp);
- case BTRFS_IOC_BALANCE:
- return btrfs_ioctl_balance(file, NULL);
case BTRFS_IOC_TREE_SEARCH:
return btrfs_ioctl_tree_search(inode, argp);
case BTRFS_IOC_TREE_SEARCH_V2:
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2cfbc74a3b4e..a8cc736731fd 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4430,10 +4430,12 @@ static int balance_kthread(void *data)
struct btrfs_fs_info *fs_info = data;
int ret = 0;
+ sb_start_write(fs_info->sb);
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl)
ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
mutex_unlock(&fs_info->balance_mutex);
+ sb_end_write(fs_info->sb);
return ret;
}
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index f256c8aff7bb..ca9f3e4ec4b3 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -57,6 +57,16 @@ static void __cachefiles_unmark_inode_in_use(struct cachefiles_object *object,
trace_cachefiles_mark_inactive(object, inode);
}
+static void cachefiles_do_unmark_inode_in_use(struct cachefiles_object *object,
+ struct dentry *dentry)
+{
+ struct inode *inode = d_backing_inode(dentry);
+
+ inode_lock(inode);
+ __cachefiles_unmark_inode_in_use(object, dentry);
+ inode_unlock(inode);
+}
+
/*
* Unmark a backing inode and tell cachefilesd that there's something that can
* be culled.
@@ -68,9 +78,7 @@ void cachefiles_unmark_inode_in_use(struct cachefiles_object *object,
struct inode *inode = file_inode(file);
if (inode) {
- inode_lock(inode);
- __cachefiles_unmark_inode_in_use(object, file->f_path.dentry);
- inode_unlock(inode);
+ cachefiles_do_unmark_inode_in_use(object, file->f_path.dentry);
if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) {
atomic_long_add(inode->i_blocks, &cache->b_released);
@@ -484,7 +492,7 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
object, d_backing_inode(path.dentry), ret,
cachefiles_trace_trunc_error);
file = ERR_PTR(ret);
- goto out_dput;
+ goto out_unuse;
}
}
@@ -494,15 +502,20 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
trace_cachefiles_vfs_error(object, d_backing_inode(path.dentry),
PTR_ERR(file),
cachefiles_trace_open_error);
- goto out_dput;
+ goto out_unuse;
}
if (unlikely(!file->f_op->read_iter) ||
unlikely(!file->f_op->write_iter)) {
fput(file);
pr_notice("Cache does not support read_iter and write_iter\n");
file = ERR_PTR(-EINVAL);
+ goto out_unuse;
}
+ goto out_dput;
+
+out_unuse:
+ cachefiles_do_unmark_inode_in_use(object, path.dentry);
out_dput:
dput(path.dentry);
out:
@@ -590,14 +603,16 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
check_failed:
fscache_cookie_lookup_negative(object->cookie);
cachefiles_unmark_inode_in_use(object, file);
- if (ret == -ESTALE) {
- fput(file);
- dput(dentry);
+ fput(file);
+ dput(dentry);
+ if (ret == -ESTALE)
return cachefiles_create_file(object);
- }
+ return false;
+
error_fput:
fput(file);
error:
+ cachefiles_do_unmark_inode_in_use(object, dentry);
dput(dentry);
return false;
}
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 35465109d9c4..00b087c14995 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -203,7 +203,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume)
if (!buf)
return false;
buf->reserved = cpu_to_be32(0);
- memcpy(buf->data, p, len);
+ memcpy(buf->data, p, volume->vcookie->coherency_len);
ret = cachefiles_inject_write_error();
if (ret == 0)
diff --git a/fs/file_table.c b/fs/file_table.c
index 7d2e692b66a9..ada8fe814db9 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -412,6 +412,7 @@ void __fput_sync(struct file *file)
}
EXPORT_SYMBOL(fput);
+EXPORT_SYMBOL(__fput_sync);
void __init files_init(void)
{
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig
index 76316c4a3fb7..b313a978ae0a 100644
--- a/fs/fscache/Kconfig
+++ b/fs/fscache/Kconfig
@@ -38,6 +38,3 @@ config FSCACHE_DEBUG
enabled by setting bits in /sys/modules/fscache/parameter/debug.
See Documentation/filesystems/caching/fscache.rst for more information.
-
-config FSCACHE_OLD_API
- bool
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
index 2749933852a9..d645f8b302a2 100644
--- a/fs/fscache/cache.c
+++ b/fs/fscache/cache.c
@@ -214,7 +214,7 @@ void fscache_relinquish_cache(struct fscache_cache *cache)
cache->ops = NULL;
cache->cache_priv = NULL;
- smp_store_release(&cache->state, FSCACHE_CACHE_IS_NOT_PRESENT);
+ fscache_set_cache_state(cache, FSCACHE_CACHE_IS_NOT_PRESENT);
fscache_put_cache(cache, where);
}
EXPORT_SYMBOL(fscache_relinquish_cache);
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 9bb1ab5fe5ed..9d3cf0111709 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -30,7 +30,7 @@ static DEFINE_SPINLOCK(fscache_cookie_lru_lock);
DEFINE_TIMER(fscache_cookie_lru_timer, fscache_cookie_lru_timed_out);
static DECLARE_WORK(fscache_cookie_lru_work, fscache_cookie_lru_worker);
static const char fscache_cookie_states[FSCACHE_COOKIE_STATE__NR] = "-LCAIFUWRD";
-unsigned int fscache_lru_cookie_timeout = 10 * HZ;
+static unsigned int fscache_lru_cookie_timeout = 10 * HZ;
void fscache_print_cookie(struct fscache_cookie *cookie, char prefix)
{
@@ -1069,6 +1069,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
}
EXPORT_SYMBOL(__fscache_invalidate);
+#ifdef CONFIG_PROC_FS
/*
* Generate a list of extant cookies in /proc/fs/fscache/cookies
*/
@@ -1145,3 +1146,4 @@ const struct seq_operations fscache_cookies_seq_ops = {
.stop = fscache_cookies_seq_stop,
.show = fscache_cookies_seq_show,
};
+#endif
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index ed1c9ed737f2..1336f517e9b1 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -56,7 +56,9 @@ static inline bool fscache_set_cache_state_maybe(struct fscache_cache *cache,
* cookie.c
*/
extern struct kmem_cache *fscache_cookie_jar;
+#ifdef CONFIG_PROC_FS
extern const struct seq_operations fscache_cookies_seq_ops;
+#endif
extern struct timer_list fscache_cookie_lru_timer;
extern void fscache_print_cookie(struct fscache_cookie *cookie, char prefix);
@@ -137,7 +139,9 @@ int fscache_stats_show(struct seq_file *m, void *v);
/*
* volume.c
*/
+#ifdef CONFIG_PROC_FS
extern const struct seq_operations fscache_volumes_seq_ops;
+#endif
struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
enum fscache_volume_trace where);
diff --git a/fs/fscache/io.c b/fs/fscache/io.c
index c8c7fe9e9a6e..3af3b08a9bb3 100644
--- a/fs/fscache/io.c
+++ b/fs/fscache/io.c
@@ -235,8 +235,7 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
{
struct fscache_write_request *wreq = priv;
- fscache_clear_page_bits(fscache_cres_cookie(&wreq->cache_resources),
- wreq->mapping, wreq->start, wreq->len,
+ fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len,
wreq->set_bits);
if (wreq->term_func)
@@ -296,7 +295,7 @@ abandon_end:
abandon_free:
kfree(wreq);
abandon:
- fscache_clear_page_bits(cookie, mapping, start, len, cond);
+ fscache_clear_page_bits(mapping, start, len, cond);
if (term_func)
term_func(term_func_priv, ret, false);
}
diff --git a/fs/io-wq.h b/fs/io-wq.h
index dbecd27656c7..04d374e65e54 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -155,6 +155,7 @@ struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack)
struct io_wq_work {
struct io_wq_work_node list;
unsigned flags;
+ int fd;
};
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a8413f006417..659f8ecba5b7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -63,7 +63,6 @@
#include <net/sock.h>
#include <net/af_unix.h>
#include <net/scm.h>
-#include <net/busy_poll.h>
#include <linux/anon_inodes.h>
#include <linux/sched/mm.h>
#include <linux/uaccess.h>
@@ -112,8 +111,7 @@
IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS)
#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
- REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \
- REQ_F_ASYNC_DATA)
+ REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA)
#define IO_TCTX_REFS_CACHE_NR (1U << 10)
@@ -412,11 +410,6 @@ struct io_ring_ctx {
struct list_head sqd_list;
unsigned long check_cq_overflow;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- /* used to track busy poll napi_id */
- struct list_head napi_list;
- spinlock_t napi_lock; /* napi_list lock */
-#endif
struct {
unsigned cached_cq_tail;
@@ -500,7 +493,6 @@ struct io_uring_task {
const struct io_ring_ctx *last;
struct io_wq *io_wq;
struct percpu_counter inflight;
- atomic_t inflight_tracked;
atomic_t in_idle;
spinlock_t task_lock;
@@ -592,7 +584,8 @@ struct io_rw {
/* NOTE: kiocb has the file as the first member, so don't do it here */
struct kiocb kiocb;
u64 addr;
- u64 len;
+ u32 len;
+ u32 flags;
};
struct io_connect {
@@ -654,10 +647,10 @@ struct io_epoll {
struct io_splice {
struct file *file_out;
- struct file *file_in;
loff_t off_out;
loff_t off_in;
u64 len;
+ int splice_fd_in;
unsigned int flags;
};
@@ -1182,8 +1175,11 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
struct io_uring_rsrc_update2 *up,
unsigned nr_args);
static void io_clean_op(struct io_kiocb *req);
-static struct file *io_file_get(struct io_ring_ctx *ctx,
- struct io_kiocb *req, int fd, bool fixed);
+static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
+ unsigned issue_flags);
+static inline struct file *io_file_get_normal(struct io_kiocb *req, int fd);
+static void io_drop_inflight_file(struct io_kiocb *req);
+static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags);
static void __io_queue_sqe(struct io_kiocb *req);
static void io_rsrc_put_work(struct work_struct *work);
@@ -1313,13 +1309,20 @@ static void io_rsrc_refs_refill(struct io_ring_ctx *ctx)
}
static inline void io_req_set_rsrc_node(struct io_kiocb *req,
- struct io_ring_ctx *ctx)
+ struct io_ring_ctx *ctx,
+ unsigned int issue_flags)
{
if (!req->fixed_rsrc_refs) {
req->fixed_rsrc_refs = &ctx->rsrc_node->refs;
- ctx->rsrc_cached_refs--;
- if (unlikely(ctx->rsrc_cached_refs < 0))
- io_rsrc_refs_refill(ctx);
+
+ if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+ lockdep_assert_held(&ctx->uring_lock);
+ ctx->rsrc_cached_refs--;
+ if (unlikely(ctx->rsrc_cached_refs < 0))
+ io_rsrc_refs_refill(ctx);
+ } else {
+ percpu_ref_get(req->fixed_rsrc_refs);
+ }
}
}
@@ -1424,29 +1427,9 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
bool cancel_all)
__must_hold(&req->ctx->timeout_lock)
{
- struct io_kiocb *req;
-
if (task && head->task != task)
return false;
- if (cancel_all)
- return true;
-
- io_for_each_link(req, head) {
- if (req->flags & REQ_F_INFLIGHT)
- return true;
- }
- return false;
-}
-
-static bool io_match_linked(struct io_kiocb *head)
-{
- struct io_kiocb *req;
-
- io_for_each_link(req, head) {
- if (req->flags & REQ_F_INFLIGHT)
- return true;
- }
- return false;
+ return cancel_all;
}
/*
@@ -1456,24 +1439,9 @@ static bool io_match_linked(struct io_kiocb *head)
static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
bool cancel_all)
{
- bool matched;
-
if (task && head->task != task)
return false;
- if (cancel_all)
- return true;
-
- if (head->flags & REQ_F_LINK_TIMEOUT) {
- struct io_ring_ctx *ctx = head->ctx;
-
- /* protect against races with linked timeouts */
- spin_lock_irq(&ctx->timeout_lock);
- matched = io_match_linked(head);
- spin_unlock_irq(&ctx->timeout_lock);
- } else {
- matched = io_match_linked(head);
- }
- return matched;
+ return cancel_all;
}
static inline bool req_has_async_data(struct io_kiocb *req)
@@ -1595,10 +1563,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_WQ_LIST(&ctx->locked_free_list);
INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
-#ifdef CONFIG_NET_RX_BUSY_POLL
- INIT_LIST_HEAD(&ctx->napi_list);
- spin_lock_init(&ctx->napi_lock);
-#endif
return ctx;
err:
kfree(ctx->dummy_ubuf);
@@ -1636,14 +1600,6 @@ static inline bool io_req_ffs_set(struct io_kiocb *req)
return req->flags & REQ_F_FIXED_FILE;
}
-static inline void io_req_track_inflight(struct io_kiocb *req)
-{
- if (!(req->flags & REQ_F_INFLIGHT)) {
- req->flags |= REQ_F_INFLIGHT;
- atomic_inc(&current->io_uring->inflight_tracked);
- }
-}
-
static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
{
if (WARN_ON_ONCE(!req->link))
@@ -1687,14 +1643,6 @@ static void io_prep_async_work(struct io_kiocb *req)
if (def->unbound_nonreg_file)
req->work.flags |= IO_WQ_WORK_UNBOUND;
}
-
- switch (req->opcode) {
- case IORING_OP_SPLICE:
- case IORING_OP_TEE:
- if (!S_ISREG(file_inode(req->splice.file_in)->i_mode))
- req->work.flags |= IO_WQ_WORK_UNBOUND;
- break;
- }
}
static void io_prep_async_link(struct io_kiocb *req)
@@ -1788,12 +1736,11 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
__must_hold(&ctx->completion_lock)
{
u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+ struct io_kiocb *req, *tmp;
spin_lock_irq(&ctx->timeout_lock);
- while (!list_empty(&ctx->timeout_list)) {
+ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
u32 events_needed, events_got;
- struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
- struct io_kiocb, timeout.list);
if (io_is_timeout_noseq(req))
break;
@@ -1810,7 +1757,6 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
if (events_got < events_needed)
break;
- list_del_init(&req->timeout.list);
io_kill_timeout(req, 0);
}
ctx->cq_last_tm_flush = seq;
@@ -2562,6 +2508,8 @@ static void io_req_task_work_add(struct io_kiocb *req, bool priority)
WARN_ON_ONCE(!tctx);
+ io_drop_inflight_file(req);
+
spin_lock_irqsave(&tctx->task_lock, flags);
if (priority)
wq_list_add_tail(&req->io_task_work.node, &tctx->prior_task_list);
@@ -3186,42 +3134,11 @@ static inline bool io_file_supports_nowait(struct io_kiocb *req)
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_ring_ctx *ctx = req->ctx;
struct kiocb *kiocb = &req->rw.kiocb;
- struct file *file = req->file;
unsigned ioprio;
int ret;
- if (!io_req_ffs_set(req))
- req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
-
kiocb->ki_pos = READ_ONCE(sqe->off);
- kiocb->ki_flags = iocb_flags(file);
- ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
- if (unlikely(ret))
- return ret;
-
- /*
- * If the file is marked O_NONBLOCK, still allow retry for it if it
- * supports async. Otherwise it's impossible to use O_NONBLOCK files
- * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
- */
- if ((kiocb->ki_flags & IOCB_NOWAIT) ||
- ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req)))
- req->flags |= REQ_F_NOWAIT;
-
- if (ctx->flags & IORING_SETUP_IOPOLL) {
- if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll)
- return -EOPNOTSUPP;
-
- kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
- kiocb->ki_complete = io_complete_rw_iopoll;
- req->iopoll_completed = 0;
- } else {
- if (kiocb->ki_flags & IOCB_HIPRI)
- return -EINVAL;
- kiocb->ki_complete = io_complete_rw;
- }
ioprio = READ_ONCE(sqe->ioprio);
if (ioprio) {
@@ -3237,6 +3154,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
req->imu = NULL;
req->rw.addr = READ_ONCE(sqe->addr);
req->rw.len = READ_ONCE(sqe->len);
+ req->rw.flags = READ_ONCE(sqe->rw_flags);
req->buf_index = READ_ONCE(sqe->buf_index);
return 0;
}
@@ -3367,7 +3285,8 @@ static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter
return 0;
}
-static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
+static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter,
+ unsigned int issue_flags)
{
struct io_mapped_ubuf *imu = req->imu;
u16 index, buf_index = req->buf_index;
@@ -3377,7 +3296,7 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
if (unlikely(buf_index >= ctx->nr_user_bufs))
return -EFAULT;
- io_req_set_rsrc_node(req, ctx);
+ io_req_set_rsrc_node(req, ctx, issue_flags);
index = array_index_nospec(buf_index, ctx->nr_user_bufs);
imu = READ_ONCE(ctx->user_bufs[index]);
req->imu = imu;
@@ -3539,7 +3458,7 @@ static struct iovec *__io_import_iovec(int rw, struct io_kiocb *req,
ssize_t ret;
if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
- ret = io_import_fixed(req, rw, iter);
+ ret = io_import_fixed(req, rw, iter, issue_flags);
if (ret)
return ERR_PTR(ret);
return NULL;
@@ -3740,13 +3659,6 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
return 0;
}
-static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- if (unlikely(!(req->file->f_mode & FMODE_READ)))
- return -EBADF;
- return io_prep_rw(req, sqe);
-}
-
/*
* This is our waitqueue callback handler, registered through __folio_lock_async()
* when we initially tried to do the IO with the iocb armed our waitqueue.
@@ -3834,6 +3746,49 @@ static bool need_read_all(struct io_kiocb *req)
S_ISBLK(file_inode(req->file)->i_mode);
}
+static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
+{
+ struct kiocb *kiocb = &req->rw.kiocb;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct file *file = req->file;
+ int ret;
+
+ if (unlikely(!file || !(file->f_mode & mode)))
+ return -EBADF;
+
+ if (!io_req_ffs_set(req))
+ req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
+
+ kiocb->ki_flags = iocb_flags(file);
+ ret = kiocb_set_rw_flags(kiocb, req->rw.flags);
+ if (unlikely(ret))
+ return ret;
+
+ /*
+ * If the file is marked O_NONBLOCK, still allow retry for it if it
+ * supports async. Otherwise it's impossible to use O_NONBLOCK files
+ * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
+ */
+ if ((kiocb->ki_flags & IOCB_NOWAIT) ||
+ ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req)))
+ req->flags |= REQ_F_NOWAIT;
+
+ if (ctx->flags & IORING_SETUP_IOPOLL) {
+ if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll)
+ return -EOPNOTSUPP;
+
+ kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
+ kiocb->ki_complete = io_complete_rw_iopoll;
+ req->iopoll_completed = 0;
+ } else {
+ if (kiocb->ki_flags & IOCB_HIPRI)
+ return -EINVAL;
+ kiocb->ki_complete = io_complete_rw;
+ }
+
+ return 0;
+}
+
static int io_read(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_rw_state __s, *s = &__s;
@@ -3869,6 +3824,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
iov_iter_restore(&s->iter, &s->iter_state);
iovec = NULL;
}
+ ret = io_rw_init_file(req, FMODE_READ);
+ if (unlikely(ret))
+ return ret;
req->result = iov_iter_count(&s->iter);
if (force_nonblock) {
@@ -3972,13 +3930,6 @@ out_free:
return 0;
}
-static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
- return -EBADF;
- return io_prep_rw(req, sqe);
-}
-
static int io_write(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_rw_state __s, *s = &__s;
@@ -3999,6 +3950,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
iov_iter_restore(&s->iter, &s->iter_state);
iovec = NULL;
}
+ ret = io_rw_init_file(req, FMODE_WRITE);
+ if (unlikely(ret))
+ return ret;
req->result = iov_iter_count(&s->iter);
if (force_nonblock) {
@@ -4369,18 +4323,11 @@ static int __io_splice_prep(struct io_kiocb *req,
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- sp->file_in = NULL;
sp->len = READ_ONCE(sqe->len);
sp->flags = READ_ONCE(sqe->splice_flags);
-
if (unlikely(sp->flags & ~valid_flags))
return -EINVAL;
-
- sp->file_in = io_file_get(req->ctx, req, READ_ONCE(sqe->splice_fd_in),
- (sp->flags & SPLICE_F_FD_IN_FIXED));
- if (!sp->file_in)
- return -EBADF;
- req->flags |= REQ_F_NEED_CLEANUP;
+ sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in);
return 0;
}
@@ -4395,20 +4342,29 @@ static int io_tee_prep(struct io_kiocb *req,
static int io_tee(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_splice *sp = &req->splice;
- struct file *in = sp->file_in;
struct file *out = sp->file_out;
unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
+ struct file *in;
long ret = 0;
if (issue_flags & IO_URING_F_NONBLOCK)
return -EAGAIN;
+
+ if (sp->flags & SPLICE_F_FD_IN_FIXED)
+ in = io_file_get_fixed(req, sp->splice_fd_in, IO_URING_F_UNLOCKED);
+ else
+ in = io_file_get_normal(req, sp->splice_fd_in);
+ if (!in) {
+ ret = -EBADF;
+ goto done;
+ }
+
if (sp->len)
ret = do_tee(in, out, sp->len, flags);
if (!(sp->flags & SPLICE_F_FD_IN_FIXED))
io_put_file(in);
- req->flags &= ~REQ_F_NEED_CLEANUP;
-
+done:
if (ret != sp->len)
req_set_fail(req);
io_req_complete(req, ret);
@@ -4427,15 +4383,24 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_splice *sp = &req->splice;
- struct file *in = sp->file_in;
struct file *out = sp->file_out;
unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
loff_t *poff_in, *poff_out;
+ struct file *in;
long ret = 0;
if (issue_flags & IO_URING_F_NONBLOCK)
return -EAGAIN;
+ if (sp->flags & SPLICE_F_FD_IN_FIXED)
+ in = io_file_get_fixed(req, sp->splice_fd_in, IO_URING_F_UNLOCKED);
+ else
+ in = io_file_get_normal(req, sp->splice_fd_in);
+ if (!in) {
+ ret = -EBADF;
+ goto done;
+ }
+
poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
@@ -4444,8 +4409,7 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
if (!(sp->flags & SPLICE_F_FD_IN_FIXED))
io_put_file(in);
- req->flags &= ~REQ_F_NEED_CLEANUP;
-
+done:
if (ret != sp->len)
req_set_fail(req);
io_req_complete(req, ret);
@@ -4513,9 +4477,6 @@ static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
- if (!req->file)
- return -EBADF;
-
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
@@ -5757,108 +5718,6 @@ IO_NETOP_FN(send);
IO_NETOP_FN(recv);
#endif /* CONFIG_NET */
-#ifdef CONFIG_NET_RX_BUSY_POLL
-
-#define NAPI_TIMEOUT (60 * SEC_CONVERSION)
-
-struct napi_entry {
- struct list_head list;
- unsigned int napi_id;
- unsigned long timeout;
-};
-
-/*
- * Add busy poll NAPI ID from sk.
- */
-static void io_add_napi(struct file *file, struct io_ring_ctx *ctx)
-{
- unsigned int napi_id;
- struct socket *sock;
- struct sock *sk;
- struct napi_entry *ne;
-
- if (!net_busy_loop_on())
- return;
-
- sock = sock_from_file(file);
- if (!sock)
- return;
-
- sk = sock->sk;
- if (!sk)
- return;
-
- napi_id = READ_ONCE(sk->sk_napi_id);
-
- /* Non-NAPI IDs can be rejected */
- if (napi_id < MIN_NAPI_ID)
- return;
-
- spin_lock(&ctx->napi_lock);
- list_for_each_entry(ne, &ctx->napi_list, list) {
- if (ne->napi_id == napi_id) {
- ne->timeout = jiffies + NAPI_TIMEOUT;
- goto out;
- }
- }
-
- ne = kmalloc(sizeof(*ne), GFP_NOWAIT);
- if (!ne)
- goto out;
-
- ne->napi_id = napi_id;
- ne->timeout = jiffies + NAPI_TIMEOUT;
- list_add_tail(&ne->list, &ctx->napi_list);
-out:
- spin_unlock(&ctx->napi_lock);
-}
-
-static inline void io_check_napi_entry_timeout(struct napi_entry *ne)
-{
- if (time_after(jiffies, ne->timeout)) {
- list_del(&ne->list);
- kfree(ne);
- }
-}
-
-/*
- * Busy poll if globally on and supporting sockets found
- */
-static bool io_napi_busy_loop(struct list_head *napi_list)
-{
- struct napi_entry *ne, *n;
-
- list_for_each_entry_safe(ne, n, napi_list, list) {
- napi_busy_loop(ne->napi_id, NULL, NULL, true,
- BUSY_POLL_BUDGET);
- io_check_napi_entry_timeout(ne);
- }
- return !list_empty(napi_list);
-}
-
-static void io_free_napi_list(struct io_ring_ctx *ctx)
-{
- spin_lock(&ctx->napi_lock);
- while (!list_empty(&ctx->napi_list)) {
- struct napi_entry *ne =
- list_first_entry(&ctx->napi_list, struct napi_entry,
- list);
-
- list_del(&ne->list);
- kfree(ne);
- }
- spin_unlock(&ctx->napi_lock);
-}
-#else
-static inline void io_add_napi(struct file *file, struct io_ring_ctx *ctx)
-{
-}
-
-static inline void io_free_napi_list(struct io_ring_ctx *ctx)
-{
-}
-#endif /* CONFIG_NET_RX_BUSY_POLL */
-
struct io_poll_table {
struct poll_table_struct pt;
struct io_kiocb *req;
@@ -5972,7 +5831,7 @@ static void io_poll_remove_entries(struct io_kiocb *req)
* either spurious wakeup or multishot CQE is served. 0 when it's done with
* the request, then the mask is stored in req->result.
*/
-static int io_poll_check_events(struct io_kiocb *req)
+static int io_poll_check_events(struct io_kiocb *req, bool locked)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_poll_iocb *poll = io_poll_get_single(req);
@@ -5994,7 +5853,10 @@ static int io_poll_check_events(struct io_kiocb *req)
if (!req->result) {
struct poll_table_struct pt = { ._key = req->cflags };
- req->result = vfs_poll(req->file, &pt) & req->cflags;
+ if (unlikely(!io_assign_file(req, IO_URING_F_UNLOCKED)))
+ req->result = -EBADF;
+ else
+ req->result = vfs_poll(req->file, &pt) & req->cflags;
}
/* multishot, just fill an CQE and proceed */
@@ -6010,7 +5872,6 @@ static int io_poll_check_events(struct io_kiocb *req)
if (unlikely(!filled))
return -ECANCELED;
io_cqring_ev_posted(ctx);
- io_add_napi(req->file, ctx);
} else if (req->result) {
return 0;
}
@@ -6029,7 +5890,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
struct io_ring_ctx *ctx = req->ctx;
int ret;
- ret = io_poll_check_events(req);
+ ret = io_poll_check_events(req, *locked);
if (ret > 0)
return;
@@ -6054,7 +5915,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
struct io_ring_ctx *ctx = req->ctx;
int ret;
- ret = io_poll_check_events(req);
+ ret = io_poll_check_events(req, *locked);
if (ret > 0)
return;
@@ -6261,7 +6122,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
__io_poll_execute(req, mask, poll->events);
return 0;
}
- io_add_napi(req->file, req->ctx);
/*
* Release ownership. If someone tried to queue a tw while it was
@@ -6766,6 +6626,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
return -EINVAL;
+ INIT_LIST_HEAD(&req->timeout.list);
data->mode = io_translate_timeout_mode(flags);
hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
@@ -6992,11 +6853,10 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
case IORING_OP_READV:
case IORING_OP_READ_FIXED:
case IORING_OP_READ:
- return io_read_prep(req, sqe);
case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED:
case IORING_OP_WRITE:
- return io_write_prep(req, sqe);
+ return io_prep_rw(req, sqe);
case IORING_OP_POLL_ADD:
return io_poll_add_prep(req, sqe);
case IORING_OP_POLL_REMOVE:
@@ -7179,11 +7039,6 @@ static void io_clean_op(struct io_kiocb *req)
kfree(io->free_iov);
break;
}
- case IORING_OP_SPLICE:
- case IORING_OP_TEE:
- if (!(req->splice.flags & SPLICE_F_FD_IN_FIXED))
- io_put_file(req->splice.file_in);
- break;
case IORING_OP_OPENAT:
case IORING_OP_OPENAT2:
if (req->open.filename)
@@ -7218,11 +7073,6 @@ static void io_clean_op(struct io_kiocb *req)
kfree(req->apoll);
req->apoll = NULL;
}
- if (req->flags & REQ_F_INFLIGHT) {
- struct io_uring_task *tctx = req->task->io_uring;
-
- atomic_dec(&tctx->inflight_tracked);
- }
if (req->flags & REQ_F_CREDS)
put_cred(req->creds);
if (req->flags & REQ_F_ASYNC_DATA) {
@@ -7232,6 +7082,23 @@ static void io_clean_op(struct io_kiocb *req)
req->flags &= ~IO_REQ_CLEAN_FLAGS;
}
+static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags)
+{
+ if (req->file || !io_op_defs[req->opcode].needs_file)
+ return true;
+
+ if (req->flags & REQ_F_FIXED_FILE)
+ req->file = io_file_get_fixed(req, req->work.fd, issue_flags);
+ else
+ req->file = io_file_get_normal(req, req->work.fd);
+ if (req->file)
+ return true;
+
+ req_set_fail(req);
+ req->result = -EBADF;
+ return false;
+}
+
static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
{
const struct cred *creds = NULL;
@@ -7242,6 +7109,8 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
if (!io_op_defs[req->opcode].audit_skip)
audit_uring_entry(req->opcode);
+ if (unlikely(!io_assign_file(req, issue_flags)))
+ return -EBADF;
switch (req->opcode) {
case IORING_OP_NOP:
@@ -7386,10 +7255,11 @@ static struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
static void io_wq_submit_work(struct io_wq_work *work)
{
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ const struct io_op_def *def = &io_op_defs[req->opcode];
unsigned int issue_flags = IO_URING_F_UNLOCKED;
bool needs_poll = false;
struct io_kiocb *timeout;
- int ret = 0;
+ int ret = 0, err = -ECANCELED;
/* one will be dropped by ->io_free_work() after returning to io-wq */
if (!(req->flags & REQ_F_REFCOUNT))
@@ -7401,14 +7271,18 @@ static void io_wq_submit_work(struct io_wq_work *work)
if (timeout)
io_queue_linked_timeout(timeout);
+ if (!io_assign_file(req, issue_flags)) {
+ err = -EBADF;
+ work->flags |= IO_WQ_WORK_CANCEL;
+ }
+
/* either cancelled or io-wq is dying, so don't touch tctx->iowq */
if (work->flags & IO_WQ_WORK_CANCEL) {
- io_req_task_queue_fail(req, -ECANCELED);
+ io_req_task_queue_fail(req, err);
return;
}
if (req->flags & REQ_F_FORCE_ASYNC) {
- const struct io_op_def *def = &io_op_defs[req->opcode];
bool opcode_poll = def->pollin || def->pollout;
if (opcode_poll && file_can_poll(req->file)) {
@@ -7465,46 +7339,56 @@ static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file
file_slot->file_ptr = file_ptr;
}
-static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx,
- struct io_kiocb *req, int fd)
+static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
+ unsigned int issue_flags)
{
- struct file *file;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct file *file = NULL;
unsigned long file_ptr;
+ if (issue_flags & IO_URING_F_UNLOCKED)
+ mutex_lock(&ctx->uring_lock);
+
if (unlikely((unsigned int)fd >= ctx->nr_user_files))
- return NULL;
+ goto out;
fd = array_index_nospec(fd, ctx->nr_user_files);
file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
file = (struct file *) (file_ptr & FFS_MASK);
file_ptr &= ~FFS_MASK;
/* mask in overlapping REQ_F and FFS bits */
req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT);
- io_req_set_rsrc_node(req, ctx);
+ io_req_set_rsrc_node(req, ctx, 0);
+out:
+ if (issue_flags & IO_URING_F_UNLOCKED)
+ mutex_unlock(&ctx->uring_lock);
return file;
}
-static struct file *io_file_get_normal(struct io_ring_ctx *ctx,
- struct io_kiocb *req, int fd)
+/*
+ * Drop the file for requeue operations. Only used of req->file is the
+ * io_uring descriptor itself.
+ */
+static void io_drop_inflight_file(struct io_kiocb *req)
+{
+ if (unlikely(req->flags & REQ_F_INFLIGHT)) {
+ fput(req->file);
+ req->file = NULL;
+ req->flags &= ~REQ_F_INFLIGHT;
+ }
+}
+
+static struct file *io_file_get_normal(struct io_kiocb *req, int fd)
{
struct file *file = fget(fd);
- trace_io_uring_file_get(ctx, req, req->user_data, fd);
+ trace_io_uring_file_get(req->ctx, req, req->user_data, fd);
/* we don't allow fixed io_uring files */
- if (file && unlikely(file->f_op == &io_uring_fops))
- io_req_track_inflight(req);
+ if (file && file->f_op == &io_uring_fops)
+ req->flags |= REQ_F_INFLIGHT;
return file;
}
-static inline struct file *io_file_get(struct io_ring_ctx *ctx,
- struct io_kiocb *req, int fd, bool fixed)
-{
- if (fixed)
- return io_file_get_fixed(ctx, req, fd);
- else
- return io_file_get_normal(ctx, req, fd);
-}
-
static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
{
struct io_kiocb *prev = req->timeout.prev;
@@ -7744,6 +7628,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
if (io_op_defs[opcode].needs_file) {
struct io_submit_state *state = &ctx->submit_state;
+ req->work.fd = READ_ONCE(sqe->fd);
+
/*
* Plug now if we have more than 2 IO left after this, and the
* target is potentially a read/write to block based storage.
@@ -7753,11 +7639,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
state->need_plug = false;
blk_start_plug_nr_ios(&state->plug, state->submit_nr);
}
-
- req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd),
- (sqe_flags & IOSQE_FIXED_FILE));
- if (unlikely(!req->file))
- return -EBADF;
}
personality = READ_ONCE(sqe->personality);
@@ -8032,13 +7913,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
!(ctx->flags & IORING_SETUP_R_DISABLED))
ret = io_submit_sqes(ctx, to_submit);
mutex_unlock(&ctx->uring_lock);
-#ifdef CONFIG_NET_RX_BUSY_POLL
- spin_lock(&ctx->napi_lock);
- if (!list_empty(&ctx->napi_list) &&
- io_napi_busy_loop(&ctx->napi_list))
- ++ret;
- spin_unlock(&ctx->napi_lock);
-#endif
+
if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait))
wake_up(&ctx->sqo_sq_wait);
if (creds)
@@ -8176,9 +8051,6 @@ struct io_wait_queue {
struct io_ring_ctx *ctx;
unsigned cq_tail;
unsigned nr_timeouts;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- unsigned busy_poll_to;
-#endif
};
static inline bool io_should_wake(struct io_wait_queue *iowq)
@@ -8240,87 +8112,6 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
return 1;
}
-#ifdef CONFIG_NET_RX_BUSY_POLL
-static void io_adjust_busy_loop_timeout(struct timespec64 *ts,
- struct io_wait_queue *iowq)
-{
- unsigned busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
- struct timespec64 pollto = ns_to_timespec64(1000 * (s64)busy_poll_to);
-
- if (timespec64_compare(ts, &pollto) > 0) {
- *ts = timespec64_sub(*ts, pollto);
- iowq->busy_poll_to = busy_poll_to;
- } else {
- u64 to = timespec64_to_ns(ts);
-
- do_div(to, 1000);
- iowq->busy_poll_to = to;
- ts->tv_sec = 0;
- ts->tv_nsec = 0;
- }
-}
-
-static inline bool io_busy_loop_timeout(unsigned long start_time,
- unsigned long bp_usec)
-{
- if (bp_usec) {
- unsigned long end_time = start_time + bp_usec;
- unsigned long now = busy_loop_current_time();
-
- return time_after(now, end_time);
- }
- return true;
-}
-
-static bool io_busy_loop_end(void *p, unsigned long start_time)
-{
- struct io_wait_queue *iowq = p;
-
- return signal_pending(current) ||
- io_should_wake(iowq) ||
- io_busy_loop_timeout(start_time, iowq->busy_poll_to);
-}
-
-static void io_blocking_napi_busy_loop(struct list_head *napi_list,
- struct io_wait_queue *iowq)
-{
- unsigned long start_time =
- list_is_singular(napi_list) ? 0 :
- busy_loop_current_time();
-
- do {
- if (list_is_singular(napi_list)) {
- struct napi_entry *ne =
- list_first_entry(napi_list,
- struct napi_entry, list);
-
- napi_busy_loop(ne->napi_id, io_busy_loop_end, iowq,
- true, BUSY_POLL_BUDGET);
- io_check_napi_entry_timeout(ne);
- break;
- }
- } while (io_napi_busy_loop(napi_list) &&
- !io_busy_loop_end(iowq, start_time));
-}
-
-static void io_putback_napi_list(struct io_ring_ctx *ctx,
- struct list_head *napi_list)
-{
- struct napi_entry *cne, *lne;
-
- spin_lock(&ctx->napi_lock);
- list_for_each_entry(cne, &ctx->napi_list, list)
- list_for_each_entry(lne, napi_list, list)
- if (cne->napi_id == lne->napi_id) {
- list_del(&lne->list);
- kfree(lne);
- break;
- }
- list_splice(napi_list, &ctx->napi_list);
- spin_unlock(&ctx->napi_lock);
-}
-#endif /* CONFIG_NET_RX_BUSY_POLL */
-
/*
* Wait until events become available, if we don't already have some. The
* application must reap them itself, as they reside on the shared cq ring.
@@ -8333,9 +8124,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
struct io_rings *rings = ctx->rings;
ktime_t timeout = KTIME_MAX;
int ret;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- LIST_HEAD(local_napi_list);
-#endif
do {
io_cqring_overflow_flush(ctx);
@@ -8358,29 +8146,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return ret;
}
-#ifdef CONFIG_NET_RX_BUSY_POLL
- iowq.busy_poll_to = 0;
- if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
- spin_lock(&ctx->napi_lock);
- list_splice_init(&ctx->napi_list, &local_napi_list);
- spin_unlock(&ctx->napi_lock);
- }
-#endif
if (uts) {
struct timespec64 ts;
if (get_timespec64(&ts, uts))
return -EFAULT;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- if (!list_empty(&local_napi_list))
- io_adjust_busy_loop_timeout(&ts, &iowq);
-#endif
timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
}
-#ifdef CONFIG_NET_RX_BUSY_POLL
- else if (!list_empty(&local_napi_list))
- iowq.busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
-#endif
init_waitqueue_func_entry(&iowq.wq, io_wake_function);
iowq.wq.private = current;
@@ -8390,12 +8162,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events;
trace_io_uring_cqring_wait(ctx, min_events);
-#ifdef CONFIG_NET_RX_BUSY_POLL
- if (iowq.busy_poll_to)
- io_blocking_napi_busy_loop(&local_napi_list, &iowq);
- if (!list_empty(&local_napi_list))
- io_putback_napi_list(ctx, &local_napi_list);
-#endif
do {
/* if we can't even flush overflow, don't wait for more */
if (!io_cqring_overflow_flush(ctx)) {
@@ -8864,8 +8630,12 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
skb_queue_head(&sk->sk_receive_queue, skb);
- for (i = 0; i < nr_files; i++)
- fput(fpl->fp[i]);
+ for (i = 0; i < nr; i++) {
+ struct file *file = io_file_from_index(ctx, i + offset);
+
+ if (file)
+ fput(file);
+ }
} else {
kfree_skb(skb);
free_uid(fpl->user);
@@ -9156,13 +8926,15 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
struct io_rsrc_node *node, void *rsrc)
{
+ u64 *tag_slot = io_get_tag_slot(data, idx);
struct io_rsrc_put *prsrc;
prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
if (!prsrc)
return -ENOMEM;
- prsrc->tag = *io_get_tag_slot(data, idx);
+ prsrc->tag = *tag_slot;
+ *tag_slot = 0;
prsrc->rsrc = rsrc;
list_add(&prsrc->list, &node->rsrc_list);
return 0;
@@ -9231,7 +9003,7 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
struct io_fixed_file *file_slot;
struct file *file;
- int ret, i;
+ int ret;
io_ring_submit_lock(ctx, needs_lock);
ret = -ENXIO;
@@ -9244,8 +9016,8 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
if (ret)
goto out;
- i = array_index_nospec(offset, ctx->nr_user_files);
- file_slot = io_fixed_file_slot(&ctx->file_table, i);
+ offset = array_index_nospec(offset, ctx->nr_user_files);
+ file_slot = io_fixed_file_slot(&ctx->file_table, offset);
ret = -EBADF;
if (!file_slot->file_ptr)
goto out;
@@ -9301,8 +9073,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (file_slot->file_ptr) {
file = (struct file *)(file_slot->file_ptr & FFS_MASK);
- err = io_queue_rsrc_removal(data, up->offset + done,
- ctx->rsrc_node, file);
+ err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file);
if (err)
break;
file_slot->file_ptr = 0;
@@ -9327,7 +9098,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
err = -EBADF;
break;
}
- *io_get_tag_slot(data, up->offset + done) = tag;
+ *io_get_tag_slot(data, i) = tag;
io_fixed_file_set(file_slot, file);
err = io_sqe_file_register(ctx, file, i);
if (err) {
@@ -9411,7 +9182,6 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task,
xa_init(&tctx->xa);
init_waitqueue_head(&tctx->wait);
atomic_set(&tctx->in_idle, 0);
- atomic_set(&tctx->inflight_tracked, 0);
task->io_uring = tctx;
spin_lock_init(&tctx->task_lock);
INIT_WQ_LIST(&tctx->task_list);
@@ -9986,7 +9756,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
i = array_index_nospec(offset, ctx->nr_user_bufs);
if (ctx->user_bufs[i] != ctx->dummy_ubuf) {
- err = io_queue_rsrc_removal(ctx->buf_data, offset,
+ err = io_queue_rsrc_removal(ctx->buf_data, i,
ctx->rsrc_node, ctx->user_bufs[i]);
if (unlikely(err)) {
io_buffer_unmap(ctx, &imu);
@@ -10181,7 +9951,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_req_caches_free(ctx);
if (ctx->hash_map)
io_wq_put_hash(ctx->hash_map);
- io_free_napi_list(ctx);
kfree(ctx->cancel_hash);
kfree(ctx->dummy_ubuf);
kfree(ctx->io_buffers);
@@ -10604,7 +10373,7 @@ static __cold void io_uring_clean_tctx(struct io_uring_task *tctx)
static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
{
if (tracked)
- return atomic_read(&tctx->inflight_tracked);
+ return 0;
return percpu_counter_sum(&tctx->inflight);
}
@@ -11707,7 +11476,15 @@ static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx,
if (len > cpumask_size())
len = cpumask_size();
- if (copy_from_user(new_mask, arg, len)) {
+ if (in_compat_syscall()) {
+ ret = compat_get_bitmap(cpumask_bits(new_mask),
+ (const compat_ulong_t __user *)arg,
+ len * 8 /* CHAR_BIT */);
+ } else {
+ ret = copy_from_user(new_mask, arg, len);
+ }
+
+ if (ret) {
free_cpumask_var(new_mask);
return -EFAULT;
}
diff --git a/fs/namei.c b/fs/namei.c
index 3f1829b3ab5b..509657fdf4f5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3673,18 +3673,14 @@ static struct dentry *filename_create(int dfd, struct filename *name,
{
struct dentry *dentry = ERR_PTR(-EEXIST);
struct qstr last;
+ bool want_dir = lookup_flags & LOOKUP_DIRECTORY;
+ unsigned int reval_flag = lookup_flags & LOOKUP_REVAL;
+ unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL;
int type;
int err2;
int error;
- bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
- /*
- * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any
- * other flags passed in are ignored!
- */
- lookup_flags &= LOOKUP_REVAL;
-
- error = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
+ error = filename_parentat(dfd, name, reval_flag, path, &last, &type);
if (error)
return ERR_PTR(error);
@@ -3698,11 +3694,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
/* don't fail immediately if it's r/o, at least try to report other errors */
err2 = mnt_want_write(path->mnt);
/*
- * Do the final lookup.
+ * Do the final lookup. Suppress 'create' if there is a trailing
+ * '/', and a directory wasn't requested.
*/
- lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
+ if (last.name[last.len] && !want_dir)
+ create_flags = 0;
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
- dentry = __lookup_hash(&last, path->dentry, lookup_flags);
+ dentry = __lookup_hash(&last, path->dentry, reval_flag | create_flags);
if (IS_ERR(dentry))
goto unlock;
@@ -3716,7 +3714,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
* all is fine. Let's be bastards - you had / on the end, you've
* been asking for (non-existent) directory. -ENOENT for you.
*/
- if (unlikely(!is_dir && last.name[last.len])) {
+ if (unlikely(!create_flags)) {
error = -ENOENT;
goto fail;
}
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 47a53b3362b6..14a72224b657 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -4,10 +4,6 @@ config NFS_FS
depends on INET && FILE_LOCKING && MULTIUSER
select LOCKD
select SUNRPC
- select CRYPTO
- select CRYPTO_HASH
- select XXHASH
- select CRYPTO_XXHASH
select NFS_ACL_SUPPORT if NFS_V3_ACL
help
Choose Y here if you want to access files residing on other
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index bac4cf1a308e..c6b263b5faf1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -39,7 +39,7 @@
#include <linux/sched.h>
#include <linux/kmemleak.h>
#include <linux/xattr.h>
-#include <linux/xxhash.h>
+#include <linux/hash.h>
#include "delegation.h"
#include "iostat.h"
@@ -350,10 +350,7 @@ out:
* of directory cookies. Content is addressed by the value of the
* cookie index of the first readdir entry in a page.
*
- * The xxhash algorithm is chosen because it is fast, and is supposed
- * to result in a decent flat distribution of hashes.
- *
- * We then select only the first 18 bits to avoid issues with excessive
+ * We select only the first 18 bits to avoid issues with excessive
* memory use for the page cache XArray. 18 bits should allow the caching
* of 262144 pages of sequences of readdir entries. Since each page holds
* 127 readdir entries for a typical 64-bit system, that works out to a
@@ -363,7 +360,7 @@ static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie)
{
if (cookie == 0)
return 0;
- return xxhash(&cookie, sizeof(cookie), 0) & NFS_READDIR_COOKIE_MASK;
+ return hash_64(cookie, 18);
}
static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie,
@@ -1991,16 +1988,6 @@ const struct dentry_operations nfs4_dentry_operations = {
};
EXPORT_SYMBOL_GPL(nfs4_dentry_operations);
-static fmode_t flags_to_mode(int flags)
-{
- fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
- if ((flags & O_ACCMODE) != O_WRONLY)
- res |= FMODE_READ;
- if ((flags & O_ACCMODE) != O_RDONLY)
- res |= FMODE_WRITE;
- return res;
-}
-
static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp)
{
return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7eb3b08d702f..b4e46b0ffa2d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1180,7 +1180,6 @@ int nfs_open(struct inode *inode, struct file *filp)
nfs_fscache_open_file(inode, filp);
return 0;
}
-EXPORT_SYMBOL_GPL(nfs_open);
/*
* This function is called whenever some part of NFS notices that
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 57b0497105c8..7eefa16ed381 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -42,6 +42,16 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry)
return true;
}
+static inline fmode_t flags_to_mode(int flags)
+{
+ fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
+ if ((flags & O_ACCMODE) != O_WRONLY)
+ res |= FMODE_READ;
+ if ((flags & O_ACCMODE) != O_RDONLY)
+ res |= FMODE_WRITE;
+ return res;
+}
+
/*
* Note: RFC 1813 doesn't limit the number of auth flavors that
* a server can return, so make something up.
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
index ad3405c64b9e..e7b34f7e0614 100644
--- a/fs/nfs/nfs42xattr.c
+++ b/fs/nfs/nfs42xattr.c
@@ -997,7 +997,7 @@ int __init nfs4_xattr_cache_init(void)
nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache",
sizeof(struct nfs4_xattr_cache), 0,
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
nfs4_xattr_cache_init_once);
if (nfs4_xattr_cache_cachep == NULL)
return -ENOMEM;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index d258933cf8c8..7b861e4f0533 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -32,6 +32,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
struct dentry *parent = NULL;
struct inode *dir;
unsigned openflags = filp->f_flags;
+ fmode_t f_mode;
struct iattr attr;
int err;
@@ -50,8 +51,9 @@ nfs4_file_open(struct inode *inode, struct file *filp)
if (err)
return err;
+ f_mode = filp->f_mode;
if ((openflags & O_ACCMODE) == 3)
- return nfs_open(inode, filp);
+ f_mode |= flags_to_mode(openflags);
/* We can't create new files here */
openflags &= ~(O_CREAT|O_EXCL);
@@ -59,7 +61,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
parent = dget_parent(dentry);
dir = d_inode(parent);
- ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp);
+ ctx = alloc_nfs_open_context(file_dentry(filp), f_mode, filp);
err = PTR_ERR(ctx);
if (IS_ERR(ctx))
goto out;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e3f5b380cefe..16106f805ffa 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -9615,6 +9615,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0);
task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task))
+ return ERR_CAST(task);
status = rpc_wait_for_completion_task(task);
if (status != 0)
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 5fa11e1aca4c..6f325e10056c 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -347,6 +347,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (data == NULL)
return ERR_PTR(-ENOMEM);
+ task_setup_data.task = &data->task;
task_setup_data.callback_data = data;
data->cred = get_current_cred();
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index c08882f5867b..2c1b027774d4 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -237,6 +237,13 @@ nfsd_file_check_write_error(struct nfsd_file *nf)
}
static void
+nfsd_file_flush(struct nfsd_file *nf)
+{
+ if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0)
+ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+}
+
+static void
nfsd_file_do_unhash(struct nfsd_file *nf)
{
lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
@@ -295,19 +302,15 @@ nfsd_file_put_noref(struct nfsd_file *nf)
void
nfsd_file_put(struct nfsd_file *nf)
{
- bool is_hashed;
-
set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
- if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) {
+ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
+ nfsd_file_flush(nf);
nfsd_file_put_noref(nf);
- return;
+ } else {
+ nfsd_file_put_noref(nf);
+ if (nf->nf_file)
+ nfsd_file_schedule_laundrette();
}
-
- filemap_flush(nf->nf_file->f_mapping);
- is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
- nfsd_file_put_noref(nf);
- if (is_hashed)
- nfsd_file_schedule_laundrette();
if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
nfsd_file_gc();
}
@@ -328,6 +331,7 @@ nfsd_file_dispose_list(struct list_head *dispose)
while(!list_empty(dispose)) {
nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
list_del(&nf->nf_lru);
+ nfsd_file_flush(nf);
nfsd_file_put_noref(nf);
}
}
@@ -341,6 +345,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose)
while(!list_empty(dispose)) {
nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
list_del(&nf->nf_lru);
+ nfsd_file_flush(nf);
if (!refcount_dec_and_test(&nf->nf_ref))
continue;
if (nfsd_file_free(nf))
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 367551bddfc6..b5760801d377 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -249,34 +249,34 @@ nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
int w;
if (!svcxdr_encode_stat(xdr, resp->status))
- return 0;
+ return false;
if (dentry == NULL || d_really_is_negative(dentry))
- return 1;
+ return true;
inode = d_inode(dentry);
if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
- return 0;
+ return false;
if (xdr_stream_encode_u32(xdr, resp->mask) < 0)
- return 0;
+ return false;
rqstp->rq_res.page_len = w = nfsacl_size(
(resp->mask & NFS_ACL) ? resp->acl_access : NULL,
(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
while (w > 0) {
if (!*(rqstp->rq_next_page++))
- return 1;
+ return true;
w -= PAGE_SIZE;
}
if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access,
resp->mask & NFS_ACL, 0))
- return 0;
+ return false;
if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default,
resp->mask & NFS_DFACL, NFS_ACL_DEFAULT))
- return 0;
+ return false;
- return 1;
+ return true;
}
/* ACCESS */
@@ -286,17 +286,17 @@ nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
struct nfsd3_accessres *resp = rqstp->rq_resp;
if (!svcxdr_encode_stat(xdr, resp->status))
- return 0;
+ return false;
switch (resp->status) {
case nfs_ok:
if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
- return 0;
+ return false;
if (xdr_stream_encode_u32(xdr, resp->access) < 0)
- return 0;
+ return false;
break;
}
- return 1;
+ return true;
}
/*
diff --git a/fs/stat.c b/fs/stat.c
index 7f734be0e57e..5c2c94464e8b 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -348,9 +348,6 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat
# define choose_32_64(a,b) b
#endif
-#define valid_dev(x) choose_32_64(old_valid_dev(x),true)
-#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x)
-
#ifndef INIT_STRUCT_STAT_PADDING
# define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st))
#endif
@@ -359,7 +356,9 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
{
struct stat tmp;
- if (!valid_dev(stat->dev) || !valid_dev(stat->rdev))
+ if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev))
+ return -EOVERFLOW;
+ if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev))
return -EOVERFLOW;
#if BITS_PER_LONG == 32
if (stat->size > MAX_NON_LFS)
@@ -367,7 +366,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
#endif
INIT_STRUCT_STAT_PADDING(tmp);
- tmp.st_dev = encode_dev(stat->dev);
+ tmp.st_dev = new_encode_dev(stat->dev);
tmp.st_ino = stat->ino;
if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
return -EOVERFLOW;
@@ -377,7 +376,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
return -EOVERFLOW;
SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
- tmp.st_rdev = encode_dev(stat->rdev);
+ tmp.st_rdev = new_encode_dev(stat->rdev);
tmp.st_size = stat->size;
tmp.st_atime = stat->atime.tv_sec;
tmp.st_mtime = stat->mtime.tv_sec;
@@ -665,11 +664,13 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
{
struct compat_stat tmp;
- if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
+ if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev))
+ return -EOVERFLOW;
+ if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev))
return -EOVERFLOW;
memset(&tmp, 0, sizeof(tmp));
- tmp.st_dev = old_encode_dev(stat->dev);
+ tmp.st_dev = new_encode_dev(stat->dev);
tmp.st_ino = stat->ino;
if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
return -EOVERFLOW;
@@ -679,7 +680,7 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
return -EOVERFLOW;
SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
- tmp.st_rdev = old_encode_dev(stat->rdev);
+ tmp.st_rdev = new_encode_dev(stat->rdev);
if ((u64) stat->size > MAX_NON_LFS)
return -EOVERFLOW;
tmp.st_size = stat->size;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 42dcf96881b6..a12ac0356c69 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -703,19 +703,6 @@ int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid)
ktype = get_ktype(kobj);
if (ktype) {
- struct attribute **kattr;
-
- /*
- * Change owner of the default attributes associated with the
- * ktype of @kobj.
- */
- for (kattr = ktype->default_attrs; kattr && *kattr; kattr++) {
- error = sysfs_file_change_owner(kobj, (*kattr)->name,
- kuid, kgid);
- if (error)
- return error;
- }
-
/*
* Change owner of the default groups associated with the
* ktype of @kobj.