summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig.binfmt5
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/cell.c6
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/rxrpc.c18
-rw-r--r--fs/afs/server.c6
-rw-r--r--fs/aio.c39
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/btrfs/backref.c1
-rw-r--r--fs/btrfs/extent-tree.c14
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/raid56.c1
-rw-r--r--fs/btrfs/sysfs.c8
-rw-r--r--fs/btrfs/transaction.c20
-rw-r--r--fs/ceph/file.c9
-rw-r--r--fs/d_path.c470
-rw-r--r--fs/dcache.c971
-rw-r--r--fs/dcookies.c11
-rw-r--r--fs/debugfs/inode.c5
-rw-r--r--fs/dlm/lowcomms.c7
-rw-r--r--fs/eventfd.c9
-rw-r--r--fs/eventpoll.c23
-rw-r--r--fs/fcntl.c12
-rw-r--r--fs/file.c17
-rw-r--r--fs/fscache/cookie.c7
-rw-r--r--fs/hostfs/hostfs.h2
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hostfs/hostfs_user.c2
-rw-r--r--fs/hugetlbfs/inode.c17
-rw-r--r--fs/internal.h14
-rw-r--r--fs/ioctl.c7
-rw-r--r--fs/minix/Kconfig2
-rw-r--r--fs/namei.c107
-rw-r--r--fs/namespace.c19
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/pagelist.c6
-rw-r--r--fs/nfs/pnfs.c13
-rw-r--r--fs/nfs/pnfs_nfs.c2
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfs/write.c89
-rw-r--r--fs/nfsd/nfs4state.c62
-rw-r--r--fs/notify/fanotify/fanotify_user.c14
-rw-r--r--fs/notify/inotify/inotify_user.c9
-rw-r--r--fs/nsfs.c1
-rw-r--r--fs/ocfs2/cluster/tcp.c6
-rw-r--r--fs/ocfs2/filecheck.c9
-rw-r--r--fs/open.c77
-rw-r--r--fs/pipe.c9
-rw-r--r--fs/proc/base.c9
-rw-r--r--fs/quota/compat.c13
-rw-r--r--fs/quota/quota.c10
-rw-r--r--fs/read_write.c45
-rw-r--r--fs/readdir.c11
-rw-r--r--fs/select.c29
-rw-r--r--fs/signalfd.c31
-rw-r--r--fs/splice.c12
-rw-r--r--fs/stat.c12
-rw-r--r--fs/sync.c19
-rw-r--r--fs/sysfs/symlink.c1
-rw-r--r--fs/utimes.c25
62 files changed, 1325 insertions, 1038 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 58c2bbd385ad..57a27c42b5ac 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -1,6 +1,6 @@
config BINFMT_ELF
bool "Kernel support for ELF binaries"
- depends on MMU && (BROKEN || !FRV)
+ depends on MMU
select ELFCORE
default y
---help---
@@ -35,7 +35,7 @@ config ARCH_BINFMT_ELF_STATE
config BINFMT_ELF_FDPIC
bool "Kernel support for FDPIC ELF binaries"
default y if !BINFMT_ELF
- depends on (ARM || FRV || BLACKFIN || (SUPERH32 && !MMU) || C6X)
+ depends on (ARM || (SUPERH32 && !MMU) || C6X)
select ELFCORE
help
ELF FDPIC binaries are based on ELF, but allow the individual load
@@ -90,7 +90,6 @@ config BINFMT_SCRIPT
config BINFMT_FLAT
bool "Kernel support for flat binaries"
depends on !MMU || ARM || M68K
- depends on !FRV || BROKEN
help
Support uClinux FLAT format binaries.
diff --git a/fs/Makefile b/fs/Makefile
index add789ea270a..c9375fd2c8c4 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
ioctl.o readdir.o select.o dcache.o inode.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
- pnode.o splice.o sync.o utimes.o \
+ pnode.o splice.o sync.o utimes.o d_path.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
ifeq ($(CONFIG_BLOCK),y)
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 9bb921d120d0..3d2c5e0e854e 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -25,7 +25,7 @@ static void afs_manage_cell(struct work_struct *);
static void afs_dec_cells_outstanding(struct afs_net *net)
{
if (atomic_dec_and_test(&net->cells_outstanding))
- wake_up_atomic_t(&net->cells_outstanding);
+ wake_up_var(&net->cells_outstanding);
}
/*
@@ -764,7 +764,7 @@ void afs_cell_purge(struct afs_net *net)
afs_queue_cell_manager(net);
_debug("wait");
- wait_on_atomic_t(&net->cells_outstanding, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_var_event(&net->cells_outstanding,
+ !atomic_read(&net->cells_outstanding));
_leave("");
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index f38d6a561a84..72217170b155 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -118,6 +118,7 @@ struct afs_call {
bool ret_reply0; /* T if should return reply[0] on success */
bool upgrade; /* T to request service upgrade */
u16 service_id; /* Actual service ID (after upgrade) */
+ unsigned int debug_id; /* Trace ID */
u32 operation_ID; /* operation ID for an incoming call */
u32 count; /* count for use in unmarshalling */
__be32 tmp; /* place to extract temporary data */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e1126659f043..f7ae54b6a393 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -103,8 +103,8 @@ void afs_close_socket(struct afs_net *net)
}
_debug("outstanding %u", atomic_read(&net->nr_outstanding_calls));
- wait_on_atomic_t(&net->nr_outstanding_calls, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_var_event(&net->nr_outstanding_calls,
+ !atomic_read(&net->nr_outstanding_calls));
_debug("no outstanding calls");
kernel_sock_shutdown(net->socket, SHUT_RDWR);
@@ -131,6 +131,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
call->type = type;
call->net = net;
+ call->debug_id = atomic_inc_return(&rxrpc_debug_id);
atomic_set(&call->usage, 1);
INIT_WORK(&call->async_work, afs_process_async_call);
init_waitqueue_head(&call->waitq);
@@ -169,13 +170,14 @@ void afs_put_call(struct afs_call *call)
afs_put_server(call->net, call->cm_server);
afs_put_cb_interest(call->net, call->cbi);
kfree(call->request);
- kfree(call);
- o = atomic_dec_return(&net->nr_outstanding_calls);
trace_afs_call(call, afs_call_trace_free, 0, o,
__builtin_return_address(0));
+ kfree(call);
+
+ o = atomic_dec_return(&net->nr_outstanding_calls);
if (o == 0)
- wake_up_atomic_t(&net->nr_outstanding_calls);
+ wake_up_var(&net->nr_outstanding_calls);
}
}
@@ -378,7 +380,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
(async ?
afs_wake_up_async_call :
afs_wake_up_call_waiter),
- call->upgrade);
+ call->upgrade,
+ call->debug_id);
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
goto error_kill_call;
@@ -727,7 +730,8 @@ void afs_charge_preallocation(struct work_struct *work)
afs_wake_up_async_call,
afs_rx_attach,
(unsigned long)call,
- GFP_KERNEL) < 0)
+ GFP_KERNEL,
+ call->debug_id) < 0)
break;
call = NULL;
}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 1880f1b6a9f1..a43ef77dabae 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -25,7 +25,7 @@ static void afs_inc_servers_outstanding(struct afs_net *net)
static void afs_dec_servers_outstanding(struct afs_net *net)
{
if (atomic_dec_and_test(&net->servers_outstanding))
- wake_up_atomic_t(&net->servers_outstanding);
+ wake_up_var(&net->servers_outstanding);
}
/*
@@ -521,8 +521,8 @@ void afs_purge_servers(struct afs_net *net)
afs_queue_server_manager(net);
_debug("wait");
- wait_on_atomic_t(&net->servers_outstanding, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_var_event(&net->servers_outstanding,
+ !atomic_read(&net->servers_outstanding));
_leave("");
}
diff --git a/fs/aio.c b/fs/aio.c
index a062d75109cb..88d7927ffbc6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -68,9 +68,9 @@ struct aio_ring {
#define AIO_RING_PAGES 8
struct kioctx_table {
- struct rcu_head rcu;
- unsigned nr;
- struct kioctx *table[];
+ struct rcu_head rcu;
+ unsigned nr;
+ struct kioctx __rcu *table[];
};
struct kioctx_cpu {
@@ -115,7 +115,7 @@ struct kioctx {
struct page **ring_pages;
long nr_pages;
- struct work_struct free_work;
+ struct rcu_work free_rwork; /* see free_ioctx() */
/*
* signals when all in-flight requests are done
@@ -329,7 +329,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
for (i = 0; i < table->nr; i++) {
struct kioctx *ctx;
- ctx = table->table[i];
+ ctx = rcu_dereference(table->table[i]);
if (ctx && ctx->aio_ring_file == file) {
if (!atomic_read(&ctx->dead)) {
ctx->user_id = ctx->mmap_base = vma->vm_start;
@@ -588,10 +588,15 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
return cancel(&kiocb->common);
}
+/*
+ * free_ioctx() should be RCU delayed to synchronize against the RCU
+ * protected lookup_ioctx() and also needs process context to call
+ * aio_free_ring(). Use rcu_work.
+ */
static void free_ioctx(struct work_struct *work)
{
- struct kioctx *ctx = container_of(work, struct kioctx, free_work);
-
+ struct kioctx *ctx = container_of(to_rcu_work(work), struct kioctx,
+ free_rwork);
pr_debug("freeing %p\n", ctx);
aio_free_ring(ctx);
@@ -609,8 +614,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
complete(&ctx->rq_wait->comp);
- INIT_WORK(&ctx->free_work, free_ioctx);
- schedule_work(&ctx->free_work);
+ /* Synchronize against RCU protected table->table[] dereferences */
+ INIT_RCU_WORK(&ctx->free_rwork, free_ioctx);
+ queue_rcu_work(system_wq, &ctx->free_rwork);
}
/*
@@ -651,9 +657,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
while (1) {
if (table)
for (i = 0; i < table->nr; i++)
- if (!table->table[i]) {
+ if (!rcu_access_pointer(table->table[i])) {
ctx->id = i;
- table->table[i] = ctx;
+ rcu_assign_pointer(table->table[i], ctx);
spin_unlock(&mm->ioctx_lock);
/* While kioctx setup is in progress,
@@ -834,11 +840,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
}
table = rcu_dereference_raw(mm->ioctx_table);
- WARN_ON(ctx != table->table[ctx->id]);
- table->table[ctx->id] = NULL;
+ WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
+ RCU_INIT_POINTER(table->table[ctx->id], NULL);
spin_unlock(&mm->ioctx_lock);
- /* percpu_ref_kill() will do the necessary call_rcu() */
+ /* free_ioctx_reqs() will do the necessary RCU synchronization */
wake_up_all(&ctx->wait);
/*
@@ -880,7 +886,8 @@ void exit_aio(struct mm_struct *mm)
skipped = 0;
for (i = 0; i < table->nr; ++i) {
- struct kioctx *ctx = table->table[i];
+ struct kioctx *ctx =
+ rcu_dereference_protected(table->table[i], true);
if (!ctx) {
skipped++;
@@ -1069,7 +1076,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
if (!table || id >= table->nr)
goto out;
- ctx = table->table[id];
+ ctx = rcu_dereference(table->table[id]);
if (ctx && ctx->user_id == ctx_id) {
percpu_ref_get(&ctx->users);
ret = ctx;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index b7c816f39404..26f6b4f41ce6 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -310,7 +310,7 @@ static int autofs_dev_ioctl_closemount(struct file *fp,
struct autofs_sb_info *sbi,
struct autofs_dev_ioctl *param)
{
- return sys_close(param->ioctlfd);
+ return ksys_close(param->ioctlfd);
}
/*
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index a7c5a9861bef..a41b48f82a70 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -241,7 +241,7 @@ ret:
return retval;
error:
if (fd_binary > 0)
- sys_close(fd_binary);
+ ksys_close(fd_binary);
bprm->interp_flags = 0;
bprm->interp_data = 0;
goto ret;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index f94b2d8c744a..26484648d090 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1519,6 +1519,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
if (!node)
break;
bytenr = node->val;
+ shared.share_count = 0;
cond_resched();
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c1618ab9fecf..e0460d7b5622 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3990,7 +3990,7 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
bg = btrfs_lookup_block_group(fs_info, bytenr);
ASSERT(bg);
if (atomic_dec_and_test(&bg->nocow_writers))
- wake_up_atomic_t(&bg->nocow_writers);
+ wake_up_var(&bg->nocow_writers);
/*
* Once for our lookup and once for the lookup done by a previous call
* to btrfs_inc_nocow_writers()
@@ -4001,8 +4001,7 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
{
- wait_on_atomic_t(&bg->nocow_writers, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
}
static const char *alloc_name(u64 flags)
@@ -6526,7 +6525,7 @@ void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
bg = btrfs_lookup_block_group(fs_info, start);
ASSERT(bg);
if (atomic_dec_and_test(&bg->reservations))
- wake_up_atomic_t(&bg->reservations);
+ wake_up_var(&bg->reservations);
btrfs_put_block_group(bg);
}
@@ -6552,8 +6551,7 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
down_write(&space_info->groups_sem);
up_write(&space_info->groups_sem);
- wait_on_atomic_t(&bg->reservations, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_var_event(&bg->reservations, !atomic_read(&bg->reservations));
}
/**
@@ -11061,7 +11059,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
ret = btrfs_start_write_no_snapshotting(root);
if (ret)
break;
- wait_on_atomic_t(&root->will_be_snapshotted, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_var_event(&root->will_be_snapshotted,
+ !atomic_read(&root->will_be_snapshotted));
}
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 111ee282b777..3278ae592a2c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -723,7 +723,7 @@ fail:
btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
dec_and_free:
if (atomic_dec_and_test(&root->will_be_snapshotted))
- wake_up_atomic_t(&root->will_be_snapshotted);
+ wake_up_var(&root->will_be_snapshotted);
free_pending:
kfree(pending_snapshot->root_item);
btrfs_free_path(pending_snapshot->path);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index dec0907dfb8a..fcfc20de2df3 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1370,6 +1370,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
stripe_start = stripe->physical;
if (physical >= stripe_start &&
physical < stripe_start + rbio->stripe_len &&
+ stripe->dev->bdev &&
bio->bi_disk == stripe->dev->bdev->bd_disk &&
bio->bi_partno == stripe->dev->bdev->bd_partno) {
return i;
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index d11c70bff5a9..a8bafed931f4 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -423,7 +423,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
- return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->nodesize);
+ return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize);
}
BTRFS_ATTR(, nodesize, btrfs_nodesize_show);
@@ -433,7 +433,8 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
- return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->sectorsize);
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ fs_info->super_copy->sectorsize);
}
BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show);
@@ -443,7 +444,8 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
- return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->sectorsize);
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ fs_info->super_copy->sectorsize);
}
BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 9220f004001c..04f07144b45c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1722,23 +1722,19 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
super = fs_info->super_copy;
- /* update latest btrfs_super_block::chunk_root refs */
root_item = &fs_info->chunk_root->root_item;
- btrfs_set_super_chunk_root(super, root_item->bytenr);
- btrfs_set_super_chunk_root_generation(super, root_item->generation);
- btrfs_set_super_chunk_root_level(super, root_item->level);
+ super->chunk_root = root_item->bytenr;
+ super->chunk_root_generation = root_item->generation;
+ super->chunk_root_level = root_item->level;
- /* update latest btrfs_super_block::root refs */
root_item = &fs_info->tree_root->root_item;
- btrfs_set_super_root(super, root_item->bytenr);
- btrfs_set_super_generation(super, root_item->generation);
- btrfs_set_super_root_level(super, root_item->level);
-
+ super->root = root_item->bytenr;
+ super->generation = root_item->generation;
+ super->root_level = root_item->level;
if (btrfs_test_opt(fs_info, SPACE_CACHE))
- btrfs_set_super_cache_generation(super, root_item->generation);
+ super->cache_generation = root_item->generation;
if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
- btrfs_set_super_uuid_tree_generation(super,
- root_item->generation);
+ super->uuid_tree_generation = root_item->generation;
}
int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6639926eed4e..b67eec3532a1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -640,7 +640,8 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
struct ceph_aio_request {
struct kiocb *iocb;
size_t total_len;
- int write;
+ bool write;
+ bool should_dirty;
int error;
struct list_head osd_reqs;
unsigned num_reqs;
@@ -750,7 +751,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
}
}
- ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write);
+ ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty);
ceph_osdc_put_request(req);
if (rc < 0)
@@ -847,6 +848,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
size_t count = iov_iter_count(iter);
loff_t pos = iocb->ki_pos;
bool write = iov_iter_rw(iter) == WRITE;
+ bool should_dirty = !write && iter_is_iovec(iter);
if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
return -EROFS;
@@ -914,6 +916,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (aio_req) {
aio_req->iocb = iocb;
aio_req->write = write;
+ aio_req->should_dirty = should_dirty;
INIT_LIST_HEAD(&aio_req->osd_reqs);
if (write) {
aio_req->mtime = mtime;
@@ -971,7 +974,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
len = ret;
}
- ceph_put_page_vector(pages, num_pages, !write);
+ ceph_put_page_vector(pages, num_pages, should_dirty);
ceph_osdc_put_request(req);
if (ret < 0)
diff --git a/fs/d_path.c b/fs/d_path.c
new file mode 100644
index 000000000000..e8fce6b1174f
--- /dev/null
+++ b/fs/d_path.c
@@ -0,0 +1,470 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/syscalls.h>
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <linux/fs_struct.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/prefetch.h>
+#include "mount.h"
+
+static int prepend(char **buffer, int *buflen, const char *str, int namelen)
+{
+ *buflen -= namelen;
+ if (*buflen < 0)
+ return -ENAMETOOLONG;
+ *buffer -= namelen;
+ memcpy(*buffer, str, namelen);
+ return 0;
+}
+
+/**
+ * prepend_name - prepend a pathname in front of current buffer pointer
+ * @buffer: buffer pointer
+ * @buflen: allocated length of the buffer
+ * @name: name string and length qstr structure
+ *
+ * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
+ * make sure that either the old or the new name pointer and length are
+ * fetched. However, there may be mismatch between length and pointer.
+ * The length cannot be trusted, we need to copy it byte-by-byte until
+ * the length is reached or a null byte is found. It also prepends "/" at
+ * the beginning of the name. The sequence number check at the caller will
+ * retry it again when a d_move() does happen. So any garbage in the buffer
+ * due to mismatched pointer and length will be discarded.
+ *
+ * Load acquire is needed to make sure that we see that terminating NUL.
+ */
+static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
+{
+ const char *dname = smp_load_acquire(&name->name); /* ^^^ */
+ u32 dlen = READ_ONCE(name->len);
+ char *p;
+
+ *buflen -= dlen + 1;
+ if (*buflen < 0)
+ return -ENAMETOOLONG;
+ p = *buffer -= dlen + 1;
+ *p++ = '/';
+ while (dlen--) {
+ char c = *dname++;
+ if (!c)
+ break;
+ *p++ = c;
+ }
+ return 0;
+}
+
+/**
+ * prepend_path - Prepend path string to a buffer
+ * @path: the dentry/vfsmount to report
+ * @root: root vfsmnt/dentry
+ * @buffer: pointer to the end of the buffer
+ * @buflen: pointer to buffer length
+ *
+ * The function will first try to write out the pathname without taking any
+ * lock other than the RCU read lock to make sure that dentries won't go away.
+ * It only checks the sequence number of the global rename_lock as any change
+ * in the dentry's d_seq will be preceded by changes in the rename_lock
+ * sequence number. If the sequence number had been changed, it will restart
+ * the whole pathname back-tracing sequence again by taking the rename_lock.
+ * In this case, there is no need to take the RCU read lock as the recursive
+ * parent pointer references will keep the dentry chain alive as long as no
+ * rename operation is performed.
+ */
+static int prepend_path(const struct path *path,
+ const struct path *root,
+ char **buffer, int *buflen)
+{
+ struct dentry *dentry;
+ struct vfsmount *vfsmnt;
+ struct mount *mnt;
+ int error = 0;
+ unsigned seq, m_seq = 0;
+ char *bptr;
+ int blen;
+
+ rcu_read_lock();
+restart_mnt:
+ read_seqbegin_or_lock(&mount_lock, &m_seq);
+ seq = 0;
+ rcu_read_lock();
+restart:
+ bptr = *buffer;
+ blen = *buflen;
+ error = 0;
+ dentry = path->dentry;
+ vfsmnt = path->mnt;
+ mnt = real_mount(vfsmnt);
+ read_seqbegin_or_lock(&rename_lock, &seq);
+ while (dentry != root->dentry || vfsmnt != root->mnt) {
+ struct dentry * parent;
+
+ if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+ struct mount *parent = READ_ONCE(mnt->mnt_parent);
+ /* Escaped? */
+ if (dentry != vfsmnt->mnt_root) {
+ bptr = *buffer;
+ blen = *buflen;
+ error = 3;
+ break;
+ }
+ /* Global root? */
+ if (mnt != parent) {
+ dentry = READ_ONCE(mnt->mnt_mountpoint);
+ mnt = parent;
+ vfsmnt = &mnt->mnt;
+ continue;
+ }
+ if (!error)
+ error = is_mounted(vfsmnt) ? 1 : 2;
+ break;
+ }
+ parent = dentry->d_parent;
+ prefetch(parent);
+ error = prepend_name(&bptr, &blen, &dentry->d_name);
+ if (error)
+ break;
+
+ dentry = parent;
+ }
+ if (!(seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&rename_lock, seq)) {
+ seq = 1;
+ goto restart;
+ }
+ done_seqretry(&rename_lock, seq);
+
+ if (!(m_seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&mount_lock, m_seq)) {
+ m_seq = 1;
+ goto restart_mnt;
+ }
+ done_seqretry(&mount_lock, m_seq);
+
+ if (error >= 0 && bptr == *buffer) {
+ if (--blen < 0)
+ error = -ENAMETOOLONG;
+ else
+ *--bptr = '/';
+ }
+ *buffer = bptr;
+ *buflen = blen;
+ return error;
+}
+
+/**
+ * __d_path - return the path of a dentry
+ * @path: the dentry/vfsmount to report
+ * @root: root vfsmnt/dentry
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * Convert a dentry into an ASCII path name.
+ *
+ * Returns a pointer into the buffer or an error code if the
+ * path was too long.
+ *
+ * "buflen" should be positive.
+ *
+ * If the path is not reachable from the supplied root, return %NULL.
+ */
+char *__d_path(const struct path *path,
+ const struct path *root,
+ char *buf, int buflen)
+{
+ char *res = buf + buflen;
+ int error;
+
+ prepend(&res, &buflen, "\0", 1);
+ error = prepend_path(path, root, &res, &buflen);
+
+ if (error < 0)
+ return ERR_PTR(error);
+ if (error > 0)
+ return NULL;
+ return res;
+}
+
+char *d_absolute_path(const struct path *path,
+ char *buf, int buflen)
+{
+ struct path root = {};
+ char *res = buf + buflen;
+ int error;
+
+ prepend(&res, &buflen, "\0", 1);
+ error = prepend_path(path, &root, &res, &buflen);
+
+ if (error > 1)
+ error = -EINVAL;
+ if (error < 0)
+ return ERR_PTR(error);
+ return res;
+}
+
+/*
+ * same as __d_path but appends "(deleted)" for unlinked files.
+ */
+static int path_with_deleted(const struct path *path,
+ const struct path *root,
+ char **buf, int *buflen)
+{
+ prepend(buf, buflen, "\0", 1);
+ if (d_unlinked(path->dentry)) {
+ int error = prepend(buf, buflen, " (deleted)", 10);
+ if (error)
+ return error;
+ }
+
+ return prepend_path(path, root, buf, buflen);
+}
+
+static int prepend_unreachable(char **buffer, int *buflen)
+{
+ return prepend(buffer, buflen, "(unreachable)", 13);
+}
+
+static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
+{
+ unsigned seq;
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ *root = fs->root;
+ } while (read_seqcount_retry(&fs->seq, seq));
+}
+
+/**
+ * d_path - return the path of a dentry
+ * @path: path to report
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * Convert a dentry into an ASCII path name. If the entry has been deleted
+ * the string " (deleted)" is appended. Note that this is ambiguous.
+ *
+ * Returns a pointer into the buffer or an error code if the path was
+ * too long. Note: Callers should use the returned pointer, not the passed
+ * in buffer, to use the name! The implementation often starts at an offset
+ * into the buffer, and may leave 0 bytes at the start.
+ *
+ * "buflen" should be positive.
+ */
+char *d_path(const struct path *path, char *buf, int buflen)
+{
+ char *res = buf + buflen;
+ struct path root;
+ int error;
+
+ /*
+ * We have various synthetic filesystems that never get mounted. On
+ * these filesystems dentries are never used for lookup purposes, and
+ * thus don't need to be hashed. They also don't need a name until a
+ * user wants to identify the object in /proc/pid/fd/. The little hack
+ * below allows us to generate a name for these objects on demand:
+ *
+ * Some pseudo inodes are mountable. When they are mounted
+ * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
+ * and instead have d_path return the mounted path.
+ */
+ if (path->dentry->d_op && path->dentry->d_op->d_dname &&
+ (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
+ return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
+
+ rcu_read_lock();
+ get_fs_root_rcu(current->fs, &root);
+ error = path_with_deleted(path, &root, &res, &buflen);
+ rcu_read_unlock();
+
+ if (error < 0)
+ res = ERR_PTR(error);
+ return res;
+}
+EXPORT_SYMBOL(d_path);
+
+/*
+ * Helper function for dentry_operations.d_dname() members
+ */
+char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
+ const char *fmt, ...)
+{
+ va_list args;
+ char temp[64];
+ int sz;
+
+ va_start(args, fmt);
+ sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
+ va_end(args);
+
+ if (sz > sizeof(temp) || sz > buflen)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ buffer += buflen - sz;
+ return memcpy(buffer, temp, sz);
+}
+
+char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+ char *end = buffer + buflen;
+ /* these dentries are never renamed, so d_lock is not needed */
+ if (prepend(&end, &buflen, " (deleted)", 11) ||
+ prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
+ prepend(&end, &buflen, "/", 1))
+ end = ERR_PTR(-ENAMETOOLONG);
+ return end;
+}
+EXPORT_SYMBOL(simple_dname);
+
+/*
+ * Write full pathname from the root of the filesystem into the buffer.
+ */
+static char *__dentry_path(struct dentry *d, char *buf, int buflen)
+{
+ struct dentry *dentry;
+ char *end, *retval;
+ int len, seq = 0;
+ int error = 0;
+
+ if (buflen < 2)
+ goto Elong;
+
+ rcu_read_lock();
+restart:
+ dentry = d;
+ end = buf + buflen;
+ len = buflen;
+ prepend(&end, &len, "\0", 1);
+ /* Get '/' right */
+ retval = end-1;
+ *retval = '/';
+ read_seqbegin_or_lock(&rename_lock, &seq);
+ while (!IS_ROOT(dentry)) {
+ struct dentry *parent = dentry->d_parent;
+
+ prefetch(parent);
+ error = prepend_name(&end, &len, &dentry->d_name);
+ if (error)
+ break;
+
+ retval = end;
+ dentry = parent;
+ }
+ if (!(seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&rename_lock, seq)) {
+ seq = 1;
+ goto restart;
+ }
+ done_seqretry(&rename_lock, seq);
+ if (error)
+ goto Elong;
+ return retval;
+Elong:
+ return ERR_PTR(-ENAMETOOLONG);
+}
+
+char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
+{
+ return __dentry_path(dentry, buf, buflen);
+}
+EXPORT_SYMBOL(dentry_path_raw);
+
+char *dentry_path(struct dentry *dentry, char *buf, int buflen)
+{
+ char *p = NULL;
+ char *retval;
+
+ if (d_unlinked(dentry)) {
+ p = buf + buflen;
+ if (prepend(&p, &buflen, "//deleted", 10) != 0)
+ goto Elong;
+ buflen++;
+ }
+ retval = __dentry_path(dentry, buf, buflen);
+ if (!IS_ERR(retval) && p)
+ *p = '/'; /* restore '/' overriden with '\0' */
+ return retval;
+Elong:
+ return ERR_PTR(-ENAMETOOLONG);
+}
+
+static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
+ struct path *pwd)
+{
+ unsigned seq;
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ *root = fs->root;
+ *pwd = fs->pwd;
+ } while (read_seqcount_retry(&fs->seq, seq));
+}
+
+/*
+ * NOTE! The user-level library version returns a
+ * character pointer. The kernel system call just
+ * returns the length of the buffer filled (which
+ * includes the ending '\0' character), or a negative
+ * error value. So libc would do something like
+ *
+ * char *getcwd(char * buf, size_t size)
+ * {
+ * int retval;
+ *
+ * retval = sys_getcwd(buf, size);
+ * if (retval >= 0)
+ * return buf;
+ * errno = -retval;
+ * return NULL;
+ * }
+ */
+SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
+{
+ int error;
+ struct path pwd, root;
+ char *page = __getname();
+
+ if (!page)
+ return -ENOMEM;
+
+ rcu_read_lock();
+ get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
+
+ error = -ENOENT;
+ if (!d_unlinked(pwd.dentry)) {
+ unsigned long len;
+ char *cwd = page + PATH_MAX;
+ int buflen = PATH_MAX;
+
+ prepend(&cwd, &buflen, "\0", 1);
+ error = prepend_path(&pwd, &root, &cwd, &buflen);
+ rcu_read_unlock();
+
+ if (error < 0)
+ goto out;
+
+ /* Unreachable from current root */
+ if (error > 0) {
+ error = prepend_unreachable(&cwd, &buflen);
+ if (error)
+ goto out;
+ }
+
+ error = -ERANGE;
+ len = PATH_MAX + page - cwd;
+ if (len <= size) {
+ error = len;
+ if (copy_to_user(buf, cwd, len))
+ error = -EFAULT;
+ }
+ } else {
+ rcu_read_unlock();
+ }
+
+out:
+ __putname(page);
+ return error;
+}
diff --git a/fs/dcache.c b/fs/dcache.c
index 7c38f39958bc..593079176123 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -14,7 +14,7 @@
* the dcache entry is deleted or garbage collected.
*/
-#include <linux/syscalls.h>
+#include <linux/ratelimit.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fs.h>
@@ -24,18 +24,11 @@
#include <linux/hash.h>
#include <linux/cache.h>
#include <linux/export.h>
-#include <linux/mount.h>
-#include <linux/file.h>
-#include <linux/uaccess.h>
#include <linux/security.h>
#include <linux/seqlock.h>
-#include <linux/swap.h>
#include <linux/bootmem.h>
-#include <linux/fs_struct.h>
#include <linux/bit_spinlock.h>
#include <linux/rculist_bl.h>
-#include <linux/prefetch.h>
-#include <linux/ratelimit.h>
#include <linux/list_lru.h>
#include "internal.h"
#include "mount.h"
@@ -74,9 +67,7 @@
* dentry->d_lock
*
* If no ancestor relationship:
- * if (dentry1 < dentry2)
- * dentry1->d_lock
- * dentry2->d_lock
+ * arbitrary, since it's serialized on rename_lock
*/
int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
@@ -440,17 +431,6 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
list_lru_isolate_move(lru, &dentry->d_lru, list);
}
-/*
- * dentry_lru_(add|del)_list) must be called with d_lock held.
- */
-static void dentry_lru_add(struct dentry *dentry)
-{
- if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
- d_lru_add(dentry);
- else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED)))
- dentry->d_flags |= DCACHE_REFERENCED;
-}
-
/**
* d_drop - drop a dentry
* @dentry: dentry to drop
@@ -470,30 +450,29 @@ static void dentry_lru_add(struct dentry *dentry)
*/
static void ___d_drop(struct dentry *dentry)
{
- if (!d_unhashed(dentry)) {
- struct hlist_bl_head *b;
- /*
- * Hashed dentries are normally on the dentry hashtable,
- * with the exception of those newly allocated by
- * d_obtain_root, which are always IS_ROOT:
- */
- if (unlikely(IS_ROOT(dentry)))
- b = &dentry->d_sb->s_roots;
- else
- b = d_hash(dentry->d_name.hash);
+ struct hlist_bl_head *b;
+ /*
+ * Hashed dentries are normally on the dentry hashtable,
+ * with the exception of those newly allocated by
+ * d_obtain_root, which are always IS_ROOT:
+ */
+ if (unlikely(IS_ROOT(dentry)))
+ b = &dentry->d_sb->s_roots;
+ else
+ b = d_hash(dentry->d_name.hash);
- hlist_bl_lock(b);
- __hlist_bl_del(&dentry->d_hash);
- hlist_bl_unlock(b);
- /* After this call, in-progress rcu-walk path lookup will fail. */
- write_seqcount_invalidate(&dentry->d_seq);
- }
+ hlist_bl_lock(b);
+ __hlist_bl_del(&dentry->d_hash);
+ hlist_bl_unlock(b);
}
void __d_drop(struct dentry *dentry)
{
- ___d_drop(dentry);
- dentry->d_hash.pprev = NULL;
+ if (!d_unhashed(dentry)) {
+ ___d_drop(dentry);
+ dentry->d_hash.pprev = NULL;
+ write_seqcount_invalidate(&dentry->d_seq);
+ }
}
EXPORT_SYMBOL(__d_drop);
@@ -589,47 +568,9 @@ static void __dentry_kill(struct dentry *dentry)
dentry_free(dentry);
}
-/*
- * Finish off a dentry we've decided to kill.
- * dentry->d_lock must be held, returns with it unlocked.
- * If ref is non-zero, then decrement the refcount too.
- * Returns dentry requiring refcount drop, or NULL if we're done.
- */
-static struct dentry *dentry_kill(struct dentry *dentry)
- __releases(dentry->d_lock)
-{
- struct inode *inode = dentry->d_inode;
- struct dentry *parent = NULL;
-
- if (inode && unlikely(!spin_trylock(&inode->i_lock)))
- goto failed;
-
- if (!IS_ROOT(dentry)) {
- parent = dentry->d_parent;
- if (unlikely(!spin_trylock(&parent->d_lock))) {
- if (inode)
- spin_unlock(&inode->i_lock);
- goto failed;
- }
- }
-
- __dentry_kill(dentry);
- return parent;
-
-failed:
- spin_unlock(&dentry->d_lock);
- return dentry; /* try again with same dentry */
-}
-
-static inline struct dentry *lock_parent(struct dentry *dentry)
+static struct dentry *__lock_parent(struct dentry *dentry)
{
- struct dentry *parent = dentry->d_parent;
- if (IS_ROOT(dentry))
- return NULL;
- if (unlikely(dentry->d_lockref.count < 0))
- return NULL;
- if (likely(spin_trylock(&parent->d_lock)))
- return parent;
+ struct dentry *parent;
rcu_read_lock();
spin_unlock(&dentry->d_lock);
again:
@@ -655,6 +596,91 @@ again:
return parent;
}
+static inline struct dentry *lock_parent(struct dentry *dentry)
+{
+ struct dentry *parent = dentry->d_parent;
+ if (IS_ROOT(dentry))
+ return NULL;
+ if (likely(spin_trylock(&parent->d_lock)))
+ return parent;
+ return __lock_parent(dentry);
+}
+
+static inline bool retain_dentry(struct dentry *dentry)
+{
+ WARN_ON(d_in_lookup(dentry));
+
+ /* Unreachable? Get rid of it */
+ if (unlikely(d_unhashed(dentry)))
+ return false;
+
+ if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
+ return false;
+
+ if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
+ if (dentry->d_op->d_delete(dentry))
+ return false;
+ }
+ /* retain; LRU fodder */
+ dentry->d_lockref.count--;
+ if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
+ d_lru_add(dentry);
+ else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED)))
+ dentry->d_flags |= DCACHE_REFERENCED;
+ return true;
+}
+
+/*
+ * Finish off a dentry we've decided to kill.
+ * dentry->d_lock must be held, returns with it unlocked.
+ * Returns dentry requiring refcount drop, or NULL if we're done.
+ */
+static struct dentry *dentry_kill(struct dentry *dentry)
+ __releases(dentry->d_lock)
+{
+ struct inode *inode = dentry->d_inode;
+ struct dentry *parent = NULL;
+
+ if (inode && unlikely(!spin_trylock(&inode->i_lock)))
+ goto slow_positive;
+
+ if (!IS_ROOT(dentry)) {
+ parent = dentry->d_parent;
+ if (unlikely(!spin_trylock(&parent->d_lock))) {
+ parent = __lock_parent(dentry);
+ if (likely(inode || !dentry->d_inode))
+ goto got_locks;
+ /* negative that became positive */
+ if (parent)
+ spin_unlock(&parent->d_lock);
+ inode = dentry->d_inode;
+ goto slow_positive;
+ }
+ }
+ __dentry_kill(dentry);
+ return parent;
+
+slow_positive:
+ spin_unlock(&dentry->d_lock);
+ spin_lock(&inode->i_lock);
+ spin_lock(&dentry->d_lock);
+ parent = lock_parent(dentry);
+got_locks:
+ if (unlikely(dentry->d_lockref.count != 1)) {
+ dentry->d_lockref.count--;
+ } else if (likely(!retain_dentry(dentry))) {
+ __dentry_kill(dentry);
+ return parent;
+ }
+ /* we are keeping it, after all */
+ if (inode)
+ spin_unlock(&inode->i_lock);
+ if (parent)
+ spin_unlock(&parent->d_lock);
+ spin_unlock(&dentry->d_lock);
+ return NULL;
+}
+
/*
* Try to do a lockless dput(), and return whether that was successful.
*
@@ -802,27 +828,11 @@ repeat:
/* Slow case: now with the dentry lock held */
rcu_read_unlock();
- WARN_ON(d_in_lookup(dentry));
-
- /* Unreachable? Get rid of it */
- if (unlikely(d_unhashed(dentry)))
- goto kill_it;
-
- if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
- goto kill_it;
-
- if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
- if (dentry->d_op->d_delete(dentry))
- goto kill_it;
+ if (likely(retain_dentry(dentry))) {
+ spin_unlock(&dentry->d_lock);
+ return;
}
- dentry_lru_add(dentry);
-
- dentry->d_lockref.count--;
- spin_unlock(&dentry->d_lock);
- return;
-
-kill_it:
dentry = dentry_kill(dentry);
if (dentry) {
cond_resched();
@@ -971,56 +981,83 @@ restart:
}
EXPORT_SYMBOL(d_prune_aliases);
-static void shrink_dentry_list(struct list_head *list)
+/*
+ * Lock a dentry from shrink list.
+ * Called under rcu_read_lock() and dentry->d_lock; the former
+ * guarantees that nothing we access will be freed under us.
+ * Note that dentry is *not* protected from concurrent dentry_kill(),
+ * d_delete(), etc.
+ *
+ * Return false if dentry has been disrupted or grabbed, leaving
+ * the caller to kick it off-list. Otherwise, return true and have
+ * that dentry's inode and parent both locked.
+ */
+static bool shrink_lock_dentry(struct dentry *dentry)
{
- struct dentry *dentry, *parent;
+ struct inode *inode;
+ struct dentry *parent;
- while (!list_empty(list)) {
- struct inode *inode;
- dentry = list_entry(list->prev, struct dentry, d_lru);
+ if (dentry->d_lockref.count)
+ return false;
+
+ inode = dentry->d_inode;
+ if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
+ spin_unlock(&dentry->d_lock);
+ spin_lock(&inode->i_lock);
spin_lock(&dentry->d_lock);
- parent = lock_parent(dentry);
+ if (unlikely(dentry->d_lockref.count))
+ goto out;
+ /* changed inode means that somebody had grabbed it */
+ if (unlikely(inode != dentry->d_inode))
+ goto out;
+ }
- /*
- * The dispose list is isolated and dentries are not accounted
- * to the LRU here, so we can simply remove it from the list
- * here regardless of whether it is referenced or not.
- */
- d_shrink_del(dentry);
+ parent = dentry->d_parent;
+ if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock)))
+ return true;
- /*
- * We found an inuse dentry which was not removed from
- * the LRU because of laziness during lookup. Do not free it.
- */
- if (dentry->d_lockref.count > 0) {
- spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
- continue;
- }
+ spin_unlock(&dentry->d_lock);
+ spin_lock(&parent->d_lock);
+ if (unlikely(parent != dentry->d_parent)) {
+ spin_unlock(&parent->d_lock);
+ spin_lock(&dentry->d_lock);
+ goto out;
+ }
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ if (likely(!dentry->d_lockref.count))
+ return true;
+ spin_unlock(&parent->d_lock);
+out:
+ if (inode)
+ spin_unlock(&inode->i_lock);
+ return false;
+}
+static void shrink_dentry_list(struct list_head *list)
+{
+ while (!list_empty(list)) {
+ struct dentry *dentry, *parent;
- if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
- bool can_free = dentry->d_flags & DCACHE_MAY_FREE;
+ dentry = list_entry(list->prev, struct dentry, d_lru);
+ spin_lock(&dentry->d_lock);
+ rcu_read_lock();
+ if (!shrink_lock_dentry(dentry)) {
+ bool can_free = false;
+ rcu_read_unlock();
+ d_shrink_del(dentry);
+ if (dentry->d_lockref.count < 0)
+ can_free = dentry->d_flags & DCACHE_MAY_FREE;
spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
if (can_free)
dentry_free(dentry);
continue;
}
-
- inode = dentry->d_inode;
- if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
- d_shrink_add(dentry, list);
- spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
- continue;
- }
-
+ rcu_read_unlock();
+ d_shrink_del(dentry);
+ parent = dentry->d_parent;
__dentry_kill(dentry);
-
+ if (parent == dentry)
+ continue;
/*
* We need to prune ancestors too. This is necessary to prevent
* quadratic behavior of shrink_dcache_parent(), but is also
@@ -1028,26 +1065,8 @@ static void shrink_dentry_list(struct list_head *list)
* fragmentation.
*/
dentry = parent;
- while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) {
- parent = lock_parent(dentry);
- if (dentry->d_lockref.count != 1) {
- dentry->d_lockref.count--;
- spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
- break;
- }
- inode = dentry->d_inode; /* can't be NULL */
- if (unlikely(!spin_trylock(&inode->i_lock))) {
- spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
- cpu_relax();
- continue;
- }
- __dentry_kill(dentry);
- dentry = parent;
- }
+ while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
+ dentry = dentry_kill(dentry);
}
}
@@ -2374,32 +2393,22 @@ EXPORT_SYMBOL(d_hash_and_lookup);
void d_delete(struct dentry * dentry)
{
- struct inode *inode;
- int isdir = 0;
+ struct inode *inode = dentry->d_inode;
+ int isdir = d_is_dir(dentry);
+
+ spin_lock(&inode->i_lock);
+ spin_lock(&dentry->d_lock);
/*
* Are we the only user?
*/
-again:
- spin_lock(&dentry->d_lock);
- inode = dentry->d_inode;
- isdir = S_ISDIR(inode->i_mode);
if (dentry->d_lockref.count == 1) {
- if (!spin_trylock(&inode->i_lock)) {
- spin_unlock(&dentry->d_lock);
- cpu_relax();
- goto again;
- }
dentry->d_flags &= ~DCACHE_CANT_MOUNT;
dentry_unlink_inode(dentry);
- fsnotify_nameremove(dentry, isdir);
- return;
- }
-
- if (!d_unhashed(dentry))
+ } else {
__d_drop(dentry);
-
- spin_unlock(&dentry->d_lock);
-
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&inode->i_lock);
+ }
fsnotify_nameremove(dentry, isdir);
}
EXPORT_SYMBOL(d_delete);
@@ -2474,7 +2483,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
retry:
rcu_read_lock();
- seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1;
+ seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
r_seq = read_seqbegin(&rename_lock);
dentry = __d_lookup_rcu(parent, name, &d_seq);
if (unlikely(dentry)) {
@@ -2495,8 +2504,14 @@ retry:
rcu_read_unlock();
goto retry;
}
+
+ if (unlikely(seq & 1)) {
+ rcu_read_unlock();
+ goto retry;
+ }
+
hlist_bl_lock(b);
- if (unlikely(parent->d_inode->i_dir_seq != seq)) {
+ if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) {
hlist_bl_unlock(b);
rcu_read_unlock();
goto retry;
@@ -2758,57 +2773,6 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
kfree_rcu(old_name, u.head);
}
-static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
-{
- /*
- * XXXX: do we really need to take target->d_lock?
- */
- if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
- spin_lock(&target->d_parent->d_lock);
- else {
- if (d_ancestor(dentry->d_parent, target->d_parent)) {
- spin_lock(&dentry->d_parent->d_lock);
- spin_lock_nested(&target->d_parent->d_lock,
- DENTRY_D_LOCK_NESTED);
- } else {
- spin_lock(&target->d_parent->d_lock);
- spin_lock_nested(&dentry->d_parent->d_lock,
- DENTRY_D_LOCK_NESTED);
- }
- }
- if (target < dentry) {
- spin_lock_nested(&target->d_lock, 2);
- spin_lock_nested(&dentry->d_lock, 3);
- } else {
- spin_lock_nested(&dentry->d_lock, 2);
- spin_lock_nested(&target->d_lock, 3);
- }
-}
-
-static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target)
-{
- if (target->d_parent != dentry->d_parent)
- spin_unlock(&dentry->d_parent->d_lock);
- if (target->d_parent != target)
- spin_unlock(&target->d_parent->d_lock);
- spin_unlock(&target->d_lock);
- spin_unlock(&dentry->d_lock);
-}
-
-/*
- * When switching names, the actual string doesn't strictly have to
- * be preserved in the target - because we're dropping the target
- * anyway. As such, we can just do a simple memcpy() to copy over
- * the new name before we switch, unless we are going to rehash
- * it. Note that if we *do* unhash the target, we are not allowed
- * to rehash it without giving it a new name/hash key - whether
- * we swap or overwrite the names here, resulting name won't match
- * the reality in filesystem; it's only there for d_path() purposes.
- * Note that all of this is happening under rename_lock, so the
- * any hash lookup seeing it in the middle of manipulations will
- * be discarded anyway. So we do not care what happens to the hash
- * key in that case.
- */
/*
* __d_move - move a dentry
* @dentry: entry to move
@@ -2823,15 +2787,34 @@ static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target)
static void __d_move(struct dentry *dentry, struct dentry *target,
bool exchange)
{
+ struct dentry *old_parent, *p;
struct inode *dir = NULL;
unsigned n;
- if (!dentry->d_inode)
- printk(KERN_WARNING "VFS: moving negative dcache entry\n");
- BUG_ON(d_ancestor(dentry, target));
+ WARN_ON(!dentry->d_inode);
+ if (WARN_ON(dentry == target))
+ return;
+
BUG_ON(d_ancestor(target, dentry));
+ old_parent = dentry->d_parent;
+ p = d_ancestor(old_parent, target);
+ if (IS_ROOT(dentry)) {
+ BUG_ON(p);
+ spin_lock(&target->d_parent->d_lock);
+ } else if (!p) {
+ /* target is not a descendent of dentry->d_parent */
+ spin_lock(&target->d_parent->d_lock);
+ spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED);
+ } else {
+ BUG_ON(p == dentry);
+ spin_lock(&old_parent->d_lock);
+ if (p != target)
+ spin_lock_nested(&target->d_parent->d_lock,
+ DENTRY_D_LOCK_NESTED);
+ }
+ spin_lock_nested(&dentry->d_lock, 2);
+ spin_lock_nested(&target->d_lock, 3);
- dentry_lock_for_move(dentry, target);
if (unlikely(d_in_lookup(target))) {
dir = target->d_parent->d_inode;
n = start_dir_add(dir);
@@ -2842,47 +2825,44 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
/* unhash both */
- /* ___d_drop does write_seqcount_barrier, but they're OK to nest. */
- ___d_drop(dentry);
- ___d_drop(target);
+ if (!d_unhashed(dentry))
+ ___d_drop(dentry);
+ if (!d_unhashed(target))
+ ___d_drop(target);
- /* Switch the names.. */
- if (exchange)
- swap_names(dentry, target);
- else
+ /* ... and switch them in the tree */
+ dentry->d_parent = target->d_parent;
+ if (!exchange) {
copy_name(dentry, target);
-
- /* rehash in new place(s) */
- __d_rehash(dentry);
- if (exchange)
- __d_rehash(target);
- else
target->d_hash.pprev = NULL;
-
- /* ... and switch them in the tree */
- if (IS_ROOT(dentry)) {
- /* splicing a tree */
- dentry->d_flags |= DCACHE_RCUACCESS;
- dentry->d_parent = target->d_parent;
- target->d_parent = target;
- list_del_init(&target->d_child);
- list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
+ dentry->d_parent->d_lockref.count++;
+ if (dentry == old_parent)
+ dentry->d_flags |= DCACHE_RCUACCESS;
+ else
+ WARN_ON(!--old_parent->d_lockref.count);
} else {
- /* swapping two dentries */
- swap(dentry->d_parent, target->d_parent);
+ target->d_parent = old_parent;
+ swap_names(dentry, target);
list_move(&target->d_child, &target->d_parent->d_subdirs);
- list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
- if (exchange)
- fsnotify_update_flags(target);
- fsnotify_update_flags(dentry);
+ __d_rehash(target);
+ fsnotify_update_flags(target);
}
+ list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
+ __d_rehash(dentry);
+ fsnotify_update_flags(dentry);
write_seqcount_end(&target->d_seq);
write_seqcount_end(&dentry->d_seq);
if (dir)
end_dir_add(dir, n);
- dentry_unlock_for_move(dentry, target);
+
+ if (dentry->d_parent != old_parent)
+ spin_unlock(&dentry->d_parent->d_lock);
+ if (dentry != old_parent)
+ spin_unlock(&old_parent->d_lock);
+ spin_unlock(&target->d_lock);
+ spin_unlock(&dentry->d_lock);
}
/*
@@ -3030,12 +3010,14 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
inode->i_sb->s_type->name,
inode->i_sb->s_id);
} else if (!IS_ROOT(new)) {
+ struct dentry *old_parent = dget(new->d_parent);
int err = __d_unalias(inode, dentry, new);
write_sequnlock(&rename_lock);
if (err) {
dput(new);
new = ERR_PTR(err);
}
+ dput(old_parent);
} else {
__d_move(new, dentry, false);
write_sequnlock(&rename_lock);
@@ -3050,467 +3032,6 @@ out:
}
EXPORT_SYMBOL(d_splice_alias);
-static int prepend(char **buffer, int *buflen, const char *str, int namelen)
-{
- *buflen -= namelen;
- if (*buflen < 0)
- return -ENAMETOOLONG;
- *buffer -= namelen;
- memcpy(*buffer, str, namelen);
- return 0;
-}
-
-/**
- * prepend_name - prepend a pathname in front of current buffer pointer
- * @buffer: buffer pointer
- * @buflen: allocated length of the buffer
- * @name: name string and length qstr structure
- *
- * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
- * make sure that either the old or the new name pointer and length are
- * fetched. However, there may be mismatch between length and pointer.
- * The length cannot be trusted, we need to copy it byte-by-byte until
- * the length is reached or a null byte is found. It also prepends "/" at
- * the beginning of the name. The sequence number check at the caller will
- * retry it again when a d_move() does happen. So any garbage in the buffer
- * due to mismatched pointer and length will be discarded.
- *
- * Load acquire is needed to make sure that we see that terminating NUL.
- */
-static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
-{
- const char *dname = smp_load_acquire(&name->name); /* ^^^ */
- u32 dlen = READ_ONCE(name->len);
- char *p;
-
- *buflen -= dlen + 1;
- if (*buflen < 0)
- return -ENAMETOOLONG;
- p = *buffer -= dlen + 1;
- *p++ = '/';
- while (dlen--) {
- char c = *dname++;
- if (!c)
- break;
- *p++ = c;
- }
- return 0;
-}
-
-/**
- * prepend_path - Prepend path string to a buffer
- * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry
- * @buffer: pointer to the end of the buffer
- * @buflen: pointer to buffer length
- *
- * The function will first try to write out the pathname without taking any
- * lock other than the RCU read lock to make sure that dentries won't go away.
- * It only checks the sequence number of the global rename_lock as any change
- * in the dentry's d_seq will be preceded by changes in the rename_lock
- * sequence number. If the sequence number had been changed, it will restart
- * the whole pathname back-tracing sequence again by taking the rename_lock.
- * In this case, there is no need to take the RCU read lock as the recursive
- * parent pointer references will keep the dentry chain alive as long as no
- * rename operation is performed.
- */
-static int prepend_path(const struct path *path,
- const struct path *root,
- char **buffer, int *buflen)
-{
- struct dentry *dentry;
- struct vfsmount *vfsmnt;
- struct mount *mnt;
- int error = 0;
- unsigned seq, m_seq = 0;
- char *bptr;
- int blen;
-
- rcu_read_lock();
-restart_mnt:
- read_seqbegin_or_lock(&mount_lock, &m_seq);
- seq = 0;
- rcu_read_lock();
-restart:
- bptr = *buffer;
- blen = *buflen;
- error = 0;
- dentry = path->dentry;
- vfsmnt = path->mnt;
- mnt = real_mount(vfsmnt);
- read_seqbegin_or_lock(&rename_lock, &seq);
- while (dentry != root->dentry || vfsmnt != root->mnt) {
- struct dentry * parent;
-
- if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
- struct mount *parent = READ_ONCE(mnt->mnt_parent);
- /* Escaped? */
- if (dentry != vfsmnt->mnt_root) {
- bptr = *buffer;
- blen = *buflen;
- error = 3;
- break;
- }
- /* Global root? */
- if (mnt != parent) {
- dentry = READ_ONCE(mnt->mnt_mountpoint);
- mnt = parent;
- vfsmnt = &mnt->mnt;
- continue;
- }
- if (!error)
- error = is_mounted(vfsmnt) ? 1 : 2;
- break;
- }
- parent = dentry->d_parent;
- prefetch(parent);
- error = prepend_name(&bptr, &blen, &dentry->d_name);
- if (error)
- break;
-
- dentry = parent;
- }
- if (!(seq & 1))
- rcu_read_unlock();
- if (need_seqretry(&rename_lock, seq)) {
- seq = 1;
- goto restart;
- }
- done_seqretry(&rename_lock, seq);
-
- if (!(m_seq & 1))
- rcu_read_unlock();
- if (need_seqretry(&mount_lock, m_seq)) {
- m_seq = 1;
- goto restart_mnt;
- }
- done_seqretry(&mount_lock, m_seq);
-
- if (error >= 0 && bptr == *buffer) {
- if (--blen < 0)
- error = -ENAMETOOLONG;
- else
- *--bptr = '/';
- }
- *buffer = bptr;
- *buflen = blen;
- return error;
-}
-
-/**
- * __d_path - return the path of a dentry
- * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry
- * @buf: buffer to return value in
- * @buflen: buffer length
- *
- * Convert a dentry into an ASCII path name.
- *
- * Returns a pointer into the buffer or an error code if the
- * path was too long.
- *
- * "buflen" should be positive.
- *
- * If the path is not reachable from the supplied root, return %NULL.
- */
-char *__d_path(const struct path *path,
- const struct path *root,
- char *buf, int buflen)
-{
- char *res = buf + buflen;
- int error;
-
- prepend(&res, &buflen, "\0", 1);
- error = prepend_path(path, root, &res, &buflen);
-
- if (error < 0)
- return ERR_PTR(error);
- if (error > 0)
- return NULL;
- return res;
-}
-
-char *d_absolute_path(const struct path *path,
- char *buf, int buflen)
-{
- struct path root = {};
- char *res = buf + buflen;
- int error;
-
- prepend(&res, &buflen, "\0", 1);
- error = prepend_path(path, &root, &res, &buflen);
-
- if (error > 1)
- error = -EINVAL;
- if (error < 0)
- return ERR_PTR(error);
- return res;
-}
-
-/*
- * same as __d_path but appends "(deleted)" for unlinked files.
- */
-static int path_with_deleted(const struct path *path,
- const struct path *root,
- char **buf, int *buflen)
-{
- prepend(buf, buflen, "\0", 1);
- if (d_unlinked(path->dentry)) {
- int error = prepend(buf, buflen, " (deleted)", 10);
- if (error)
- return error;
- }
-
- return prepend_path(path, root, buf, buflen);
-}
-
-static int prepend_unreachable(char **buffer, int *buflen)
-{
- return prepend(buffer, buflen, "(unreachable)", 13);
-}
-
-static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
-{
- unsigned seq;
-
- do {
- seq = read_seqcount_begin(&fs->seq);
- *root = fs->root;
- } while (read_seqcount_retry(&fs->seq, seq));
-}
-
-/**
- * d_path - return the path of a dentry
- * @path: path to report
- * @buf: buffer to return value in
- * @buflen: buffer length
- *
- * Convert a dentry into an ASCII path name. If the entry has been deleted
- * the string " (deleted)" is appended. Note that this is ambiguous.
- *
- * Returns a pointer into the buffer or an error code if the path was
- * too long. Note: Callers should use the returned pointer, not the passed
- * in buffer, to use the name! The implementation often starts at an offset
- * into the buffer, and may leave 0 bytes at the start.
- *
- * "buflen" should be positive.
- */
-char *d_path(const struct path *path, char *buf, int buflen)
-{
- char *res = buf + buflen;
- struct path root;
- int error;
-
- /*
- * We have various synthetic filesystems that never get mounted. On
- * these filesystems dentries are never used for lookup purposes, and
- * thus don't need to be hashed. They also don't need a name until a
- * user wants to identify the object in /proc/pid/fd/. The little hack
- * below allows us to generate a name for these objects on demand:
- *
- * Some pseudo inodes are mountable. When they are mounted
- * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
- * and instead have d_path return the mounted path.
- */
- if (path->dentry->d_op && path->dentry->d_op->d_dname &&
- (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
- return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
-
- rcu_read_lock();
- get_fs_root_rcu(current->fs, &root);
- error = path_with_deleted(path, &root, &res, &buflen);
- rcu_read_unlock();
-
- if (error < 0)
- res = ERR_PTR(error);
- return res;
-}
-EXPORT_SYMBOL(d_path);
-
-/*
- * Helper function for dentry_operations.d_dname() members
- */
-char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
- const char *fmt, ...)
-{
- va_list args;
- char temp[64];
- int sz;
-
- va_start(args, fmt);
- sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
- va_end(args);
-
- if (sz > sizeof(temp) || sz > buflen)
- return ERR_PTR(-ENAMETOOLONG);
-
- buffer += buflen - sz;
- return memcpy(buffer, temp, sz);
-}
-
-char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
-{
- char *end = buffer + buflen;
- /* these dentries are never renamed, so d_lock is not needed */
- if (prepend(&end, &buflen, " (deleted)", 11) ||
- prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
- prepend(&end, &buflen, "/", 1))
- end = ERR_PTR(-ENAMETOOLONG);
- return end;
-}
-EXPORT_SYMBOL(simple_dname);
-
-/*
- * Write full pathname from the root of the filesystem into the buffer.
- */
-static char *__dentry_path(struct dentry *d, char *buf, int buflen)
-{
- struct dentry *dentry;
- char *end, *retval;
- int len, seq = 0;
- int error = 0;
-
- if (buflen < 2)
- goto Elong;
-
- rcu_read_lock();
-restart:
- dentry = d;
- end = buf + buflen;
- len = buflen;
- prepend(&end, &len, "\0", 1);
- /* Get '/' right */
- retval = end-1;
- *retval = '/';
- read_seqbegin_or_lock(&rename_lock, &seq);
- while (!IS_ROOT(dentry)) {
- struct dentry *parent = dentry->d_parent;
-
- prefetch(parent);
- error = prepend_name(&end, &len, &dentry->d_name);
- if (error)
- break;
-
- retval = end;
- dentry = parent;
- }
- if (!(seq & 1))
- rcu_read_unlock();
- if (need_seqretry(&rename_lock, seq)) {
- seq = 1;
- goto restart;
- }
- done_seqretry(&rename_lock, seq);
- if (error)
- goto Elong;
- return retval;
-Elong:
- return ERR_PTR(-ENAMETOOLONG);
-}
-
-char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
-{
- return __dentry_path(dentry, buf, buflen);
-}
-EXPORT_SYMBOL(dentry_path_raw);
-
-char *dentry_path(struct dentry *dentry, char *buf, int buflen)
-{
- char *p = NULL;
- char *retval;
-
- if (d_unlinked(dentry)) {
- p = buf + buflen;
- if (prepend(&p, &buflen, "//deleted", 10) != 0)
- goto Elong;
- buflen++;
- }
- retval = __dentry_path(dentry, buf, buflen);
- if (!IS_ERR(retval) && p)
- *p = '/'; /* restore '/' overriden with '\0' */
- return retval;
-Elong:
- return ERR_PTR(-ENAMETOOLONG);
-}
-
-static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
- struct path *pwd)
-{
- unsigned seq;
-
- do {
- seq = read_seqcount_begin(&fs->seq);
- *root = fs->root;
- *pwd = fs->pwd;
- } while (read_seqcount_retry(&fs->seq, seq));
-}
-
-/*
- * NOTE! The user-level library version returns a
- * character pointer. The kernel system call just
- * returns the length of the buffer filled (which
- * includes the ending '\0' character), or a negative
- * error value. So libc would do something like
- *
- * char *getcwd(char * buf, size_t size)
- * {
- * int retval;
- *
- * retval = sys_getcwd(buf, size);
- * if (retval >= 0)
- * return buf;
- * errno = -retval;
- * return NULL;
- * }
- */
-SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
-{
- int error;
- struct path pwd, root;
- char *page = __getname();
-
- if (!page)
- return -ENOMEM;
-
- rcu_read_lock();
- get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
-
- error = -ENOENT;
- if (!d_unlinked(pwd.dentry)) {
- unsigned long len;
- char *cwd = page + PATH_MAX;
- int buflen = PATH_MAX;
-
- prepend(&cwd, &buflen, "\0", 1);
- error = prepend_path(&pwd, &root, &cwd, &buflen);
- rcu_read_unlock();
-
- if (error < 0)
- goto out;
-
- /* Unreachable from current root */
- if (error > 0) {
- error = prepend_unreachable(&cwd, &buflen);
- if (error)
- goto out;
- }
-
- error = -ERANGE;
- len = PATH_MAX + page - cwd;
- if (len <= size) {
- error = len;
- if (copy_to_user(buf, cwd, len))
- error = -EFAULT;
- }
- } else {
- rcu_read_unlock();
- }
-
-out:
- __putname(page);
- return error;
-}
-
/*
* Test whether new_dentry is a subdirectory of old_dentry.
*
@@ -3574,6 +3095,8 @@ void d_genocide(struct dentry *parent)
d_walk(parent, parent, d_genocide_kill, NULL);
}
+EXPORT_SYMBOL(d_genocide);
+
void d_tmpfile(struct dentry *dentry, struct inode *inode)
{
inode_dec_link_count(inode);
@@ -3653,8 +3176,6 @@ static void __init dcache_init(void)
struct kmem_cache *names_cachep __read_mostly;
EXPORT_SYMBOL(names_cachep);
-EXPORT_SYMBOL(d_genocide);
-
void __init vfs_caches_init_early(void)
{
int i;
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 0d0461cf2431..57bc96435feb 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -146,7 +146,7 @@ out:
/* And here is where the userspace process can look up the cookie value
* to retrieve the path.
*/
-SYSCALL_DEFINE3(lookup_dcookie, u64, cookie64, char __user *, buf, size_t, len)
+static int do_lookup_dcookie(u64 cookie64, char __user *buf, size_t len)
{
unsigned long cookie = (unsigned long)cookie64;
int err = -EINVAL;
@@ -203,13 +203,18 @@ out:
return err;
}
+SYSCALL_DEFINE3(lookup_dcookie, u64, cookie64, char __user *, buf, size_t, len)
+{
+ return do_lookup_dcookie(cookie64, buf, len);
+}
+
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, compat_size_t, len)
{
#ifdef __BIG_ENDIAN
- return sys_lookup_dcookie(((u64)w0 << 32) | w1, buf, len);
+ return do_lookup_dcookie(((u64)w0 << 32) | w1, buf, len);
#else
- return sys_lookup_dcookie(((u64)w1 << 32) | w0, buf, len);
+ return do_lookup_dcookie(((u64)w1 << 32) | w0, buf, len);
#endif
}
#endif
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 63a998c3f252..13b01351dd1c 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -270,10 +270,7 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
if (!parent)
parent = debugfs_mount->mnt_root;
- inode_lock(d_inode(parent));
- dentry = lookup_one_len(name, parent, strlen(name));
- inode_unlock(d_inode(parent));
-
+ dentry = lookup_one_len_unlocked(name, parent, strlen(name));
if (IS_ERR(dentry))
return NULL;
if (!d_really_is_positive(dentry)) {
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index cff79ea0c01d..5243989a60cc 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -482,7 +482,6 @@ static void lowcomms_error_report(struct sock *sk)
{
struct connection *con;
struct sockaddr_storage saddr;
- int buflen;
void (*orig_report)(struct sock *) = NULL;
read_lock_bh(&sk->sk_callback_lock);
@@ -492,7 +491,7 @@ static void lowcomms_error_report(struct sock *sk)
orig_report = listen_sock.sk_error_report;
if (con->sock == NULL ||
- kernel_getpeername(con->sock, (struct sockaddr *)&saddr, &buflen)) {
+ kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) {
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
"sending to node %d, port %d, "
"sk_err=%d/%d\n", dlm_our_nodeid(),
@@ -757,8 +756,8 @@ static int tcp_accept_from_sock(struct connection *con)
/* Get the connected socket's peer */
memset(&peeraddr, 0, sizeof(peeraddr));
- if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr,
- &len, 2)) {
+ len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2);
+ if (len < 0) {
result = -ECONNABORTED;
goto accept_err;
}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 012f5bd46dfa..08d3bd602f73 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -380,7 +380,7 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
}
EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
-SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
+static int do_eventfd(unsigned int count, int flags)
{
struct eventfd_ctx *ctx;
int fd;
@@ -409,8 +409,13 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
return fd;
}
+SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
+{
+ return do_eventfd(count, flags);
+}
+
SYSCALL_DEFINE1(eventfd, unsigned int, count)
{
- return sys_eventfd2(count, 0);
+ return do_eventfd(count, 0);
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 0f3494ed3ed0..602ca4285b2e 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1936,7 +1936,7 @@ static void clear_tfile_check_list(void)
/*
* Open an eventpoll file descriptor.
*/
-SYSCALL_DEFINE1(epoll_create1, int, flags)
+static int do_epoll_create(int flags)
{
int error, fd;
struct eventpoll *ep = NULL;
@@ -1979,12 +1979,17 @@ out_free_ep:
return error;
}
+SYSCALL_DEFINE1(epoll_create1, int, flags)
+{
+ return do_epoll_create(flags);
+}
+
SYSCALL_DEFINE1(epoll_create, int, size)
{
if (size <= 0)
return -EINVAL;
- return sys_epoll_create1(0);
+ return do_epoll_create(0);
}
/*
@@ -2148,8 +2153,8 @@ error_return:
* Implement the event wait interface for the eventpoll file. It is the kernel
* part of the user space epoll_wait(2).
*/
-SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
- int, maxevents, int, timeout)
+static int do_epoll_wait(int epfd, struct epoll_event __user *events,
+ int maxevents, int timeout)
{
int error;
struct fd f;
@@ -2190,6 +2195,12 @@ error_fput:
return error;
}
+SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
+ int, maxevents, int, timeout)
+{
+ return do_epoll_wait(epfd, events, maxevents, timeout);
+}
+
/*
* Implement the event wait interface for the eventpoll file. It is the kernel
* part of the user space epoll_pwait(2).
@@ -2214,7 +2225,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
set_current_blocked(&ksigmask);
}
- error = sys_epoll_wait(epfd, events, maxevents, timeout);
+ error = do_epoll_wait(epfd, events, maxevents, timeout);
/*
* If we changed the signal mask, we need to restore the original one.
@@ -2257,7 +2268,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
set_current_blocked(&ksigmask);
}
- err = sys_epoll_wait(epfd, events, maxevents, timeout);
+ err = do_epoll_wait(epfd, events, maxevents, timeout);
/*
* If we changed the signal mask, we need to restore the original one.
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 1e97f1fda90c..d737ff082472 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -607,8 +607,8 @@ static int fixup_compat_flock(struct flock *flock)
return 0;
}
-COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
- compat_ulong_t, arg)
+static long do_compat_fcntl64(unsigned int fd, unsigned int cmd,
+ compat_ulong_t arg)
{
struct fd f = fdget_raw(fd);
struct flock flock;
@@ -672,6 +672,12 @@ out_put:
return err;
}
+COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
+ compat_ulong_t, arg)
+{
+ return do_compat_fcntl64(fd, cmd, arg);
+}
+
COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
compat_ulong_t, arg)
{
@@ -684,7 +690,7 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
case F_OFD_SETLKW:
return -EINVAL;
}
- return compat_sys_fcntl64(fd, cmd, arg);
+ return do_compat_fcntl64(fd, cmd, arg);
}
#endif
diff --git a/fs/file.c b/fs/file.c
index 42f0db4bd0fb..7ffd6e9d103d 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -638,6 +638,7 @@ out_unlock:
spin_unlock(&files->file_lock);
return -EBADF;
}
+EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
void do_close_on_exec(struct files_struct *files)
{
@@ -870,7 +871,7 @@ out_unlock:
return err;
}
-SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
+static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
{
int err = -EBADF;
struct file *file;
@@ -904,6 +905,11 @@ out_unlock:
return err;
}
+SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
+{
+ return ksys_dup3(oldfd, newfd, flags);
+}
+
SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
{
if (unlikely(newfd == oldfd)) { /* corner case */
@@ -916,10 +922,10 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
rcu_read_unlock();
return retval;
}
- return sys_dup3(oldfd, newfd, 0);
+ return ksys_dup3(oldfd, newfd, 0);
}
-SYSCALL_DEFINE1(dup, unsigned int, fildes)
+int ksys_dup(unsigned int fildes)
{
int ret = -EBADF;
struct file *file = fget_raw(fildes);
@@ -934,6 +940,11 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
return ret;
}
+SYSCALL_DEFINE1(dup, unsigned int, fildes)
+{
+ return ksys_dup(fildes);
+}
+
int f_dupfd(unsigned int from, struct file *file, unsigned flags)
{
int err;
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index ff84258132bb..d705125665f0 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -557,9 +557,10 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
* n_active reaches 0). This makes sure outstanding reads and writes
* have completed.
*/
- if (!atomic_dec_and_test(&cookie->n_active))
- wait_on_atomic_t(&cookie->n_active, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ if (!atomic_dec_and_test(&cookie->n_active)) {
+ wait_var_event(&cookie->n_active,
+ !atomic_read(&cookie->n_active));
+ }
/* Make sure any pending writes are cancelled. */
if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX)
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index ffaec2e7526c..cb8374af08a6 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -84,7 +84,7 @@ extern int set_attr(const char *file, struct hostfs_iattr *attrs, int fd);
extern int make_symlink(const char *from, const char *to);
extern int unlink_file(const char *file);
extern int do_mkdir(const char *file, int mode);
-extern int do_rmdir(const char *file);
+extern int hostfs_do_rmdir(const char *file);
extern int do_mknod(const char *file, int mode, unsigned int major,
unsigned int minor);
extern int link_file(const char *from, const char *to);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index c148e7f4f451..3cd85eb5bbb1 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -706,7 +706,7 @@ static int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
if ((file = dentry_name(dentry)) == NULL)
return -ENOMEM;
- err = do_rmdir(file);
+ err = hostfs_do_rmdir(file);
__putname(file);
return err;
}
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 9c1e0f019880..5ecc4706172b 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -304,7 +304,7 @@ int do_mkdir(const char *file, int mode)
return 0;
}
-int do_rmdir(const char *file)
+int hostfs_do_rmdir(const char *file)
{
int err;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8fe1b0aa2896..b9a254dcc0e7 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -108,6 +108,16 @@ static void huge_pagevec_release(struct pagevec *pvec)
pagevec_reinit(pvec);
}
+/*
+ * Mask used when checking the page offset value passed in via system
+ * calls. This value will be converted to a loff_t which is signed.
+ * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
+ * value. The extra bit (- 1 in the shift value) is to take the sign
+ * bit into account.
+ */
+#define PGOFF_LOFFT_MAX \
+ (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1)))
+
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
@@ -127,12 +137,13 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_ops = &hugetlb_vm_ops;
/*
- * Offset passed to mmap (before page shift) could have been
- * negative when represented as a (l)off_t.
+ * page based offset in vm_pgoff could be sufficiently large to
+ * overflow a (l)off_t when converted to byte offset.
*/
- if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
+ if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
return -EINVAL;
+ /* must be huge page aligned */
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;
diff --git a/fs/internal.h b/fs/internal.h
index df262f41a0ef..980d005b21b4 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -55,7 +55,15 @@ extern void __init chrdev_init(void);
extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
+long do_mknodat(int dfd, const char __user *filename, umode_t mode,
+ unsigned int dev);
+long do_mkdirat(int dfd, const char __user *pathname, umode_t mode);
+long do_rmdir(int dfd, const char __user *pathname);
long do_unlinkat(int dfd, struct filename *name);
+long do_symlinkat(const char __user *oldname, int newdfd,
+ const char __user *newname);
+int do_linkat(int olddfd, const char __user *oldname, int newdfd,
+ const char __user *newname, int flags);
/*
* namespace.c
@@ -111,6 +119,12 @@ extern struct file *do_filp_open(int dfd, struct filename *pathname,
extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
const char *, const struct open_flags *);
+long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
+long do_faccessat(int dfd, const char __user *filename, int mode);
+int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
+int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
+ int flag);
+
extern int open_check_o_direct(struct file *f);
extern int vfs_open(const struct path *, struct file *, const struct cred *);
extern struct file *filp_clone_open(struct file *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5ace7efb0d04..4823431d1c9d 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -689,7 +689,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
return error;
}
-SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
+int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
{
int error;
struct fd f = fdget(fd);
@@ -702,3 +702,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
fdput(f);
return error;
}
+
+SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
+{
+ return ksys_ioctl(fd, cmd, arg);
+}
diff --git a/fs/minix/Kconfig b/fs/minix/Kconfig
index f2a0cfcef11d..bcd53a79156f 100644
--- a/fs/minix/Kconfig
+++ b/fs/minix/Kconfig
@@ -18,7 +18,7 @@ config MINIX_FS
config MINIX_FS_NATIVE_ENDIAN
def_bool MINIX_FS
- depends on M32R || MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU)
+ depends on MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU)
config MINIX_FS_BIG_ENDIAN_16BIT_INDEXED
def_bool MINIX_FS
diff --git a/fs/namei.c b/fs/namei.c
index 921ae32dbc80..a09419379f5d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -559,9 +559,10 @@ static int __nd_alloc_stack(struct nameidata *nd)
static bool path_connected(const struct path *path)
{
struct vfsmount *mnt = path->mnt;
+ struct super_block *sb = mnt->mnt_sb;
- /* Only bind mounts can have disconnected paths */
- if (mnt->mnt_root == mnt->mnt_sb->s_root)
+ /* Bind mounts and multi-root filesystems can have disconnected paths */
+ if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root))
return true;
return is_subdir(path->dentry, mnt->mnt_root);
@@ -1473,43 +1474,36 @@ static struct dentry *lookup_dcache(const struct qstr *name,
}
/*
- * Call i_op->lookup on the dentry. The dentry must be negative and
- * unhashed.
- *
- * dir->d_inode->i_mutex must be held
+ * Parent directory has inode locked exclusive. This is one
+ * and only case when ->lookup() gets called on non in-lookup
+ * dentries - as the matter of fact, this only gets called
+ * when directory is guaranteed to have no in-lookup children
+ * at all.
*/
-static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
-{
- struct dentry *old;
-
- /* Don't create child dentry for a dead directory. */
- if (unlikely(IS_DEADDIR(dir))) {
- dput(dentry);
- return ERR_PTR(-ENOENT);
- }
-
- old = dir->i_op->lookup(dir, dentry, flags);
- if (unlikely(old)) {
- dput(dentry);
- dentry = old;
- }
- return dentry;
-}
-
static struct dentry *__lookup_hash(const struct qstr *name,
struct dentry *base, unsigned int flags)
{
struct dentry *dentry = lookup_dcache(name, base, flags);
+ struct dentry *old;
+ struct inode *dir = base->d_inode;
if (dentry)
return dentry;
+ /* Don't create child dentry for a dead directory. */
+ if (unlikely(IS_DEADDIR(dir)))
+ return ERR_PTR(-ENOENT);
+
dentry = d_alloc(base, name);
if (unlikely(!dentry))
return ERR_PTR(-ENOMEM);
- return lookup_real(base->d_inode, dentry, flags);
+ old = dir->i_op->lookup(dir, dentry, flags);
+ if (unlikely(old)) {
+ dput(dentry);
+ dentry = old;
+ }
+ return dentry;
}
static int lookup_fast(struct nameidata *nd,
@@ -3728,8 +3722,8 @@ static int may_mknod(umode_t mode)
}
}
-SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
- unsigned, dev)
+long do_mknodat(int dfd, const char __user *filename, umode_t mode,
+ unsigned int dev)
{
struct dentry *dentry;
struct path path;
@@ -3772,9 +3766,15 @@ out:
return error;
}
+SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
+ unsigned int, dev)
+{
+ return do_mknodat(dfd, filename, mode, dev);
+}
+
SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
{
- return sys_mknodat(AT_FDCWD, filename, mode, dev);
+ return do_mknodat(AT_FDCWD, filename, mode, dev);
}
int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
@@ -3803,7 +3803,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
}
EXPORT_SYMBOL(vfs_mkdir);
-SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
+long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
{
struct dentry *dentry;
struct path path;
@@ -3828,9 +3828,14 @@ retry:
return error;
}
+SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
+{
+ return do_mkdirat(dfd, pathname, mode);
+}
+
SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
{
- return sys_mkdirat(AT_FDCWD, pathname, mode);
+ return do_mkdirat(AT_FDCWD, pathname, mode);
}
int vfs_rmdir(struct inode *dir, struct dentry *dentry)
@@ -3872,7 +3877,7 @@ out:
}
EXPORT_SYMBOL(vfs_rmdir);
-static long do_rmdir(int dfd, const char __user *pathname)
+long do_rmdir(int dfd, const char __user *pathname)
{
int error = 0;
struct filename *name;
@@ -4108,8 +4113,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
}
EXPORT_SYMBOL(vfs_symlink);
-SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
- int, newdfd, const char __user *, newname)
+long do_symlinkat(const char __user *oldname, int newdfd,
+ const char __user *newname)
{
int error;
struct filename *from;
@@ -4139,9 +4144,15 @@ out_putname:
return error;
}
+SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
+ int, newdfd, const char __user *, newname)
+{
+ return do_symlinkat(oldname, newdfd, newname);
+}
+
SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
{
- return sys_symlinkat(oldname, AT_FDCWD, newname);
+ return do_symlinkat(oldname, AT_FDCWD, newname);
}
/**
@@ -4233,8 +4244,8 @@ EXPORT_SYMBOL(vfs_link);
* with linux 2.0, and to avoid hard-linking to directories
* and other special files. --ADM
*/
-SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
- int, newdfd, const char __user *, newname, int, flags)
+int do_linkat(int olddfd, const char __user *oldname, int newdfd,
+ const char __user *newname, int flags)
{
struct dentry *new_dentry;
struct path old_path, new_path;
@@ -4298,9 +4309,15 @@ out:
return error;
}
+SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
+ int, newdfd, const char __user *, newname, int, flags)
+{
+ return do_linkat(olddfd, oldname, newdfd, newname, flags);
+}
+
SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
{
- return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
+ return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
}
/**
@@ -4478,8 +4495,8 @@ out:
}
EXPORT_SYMBOL(vfs_rename);
-SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
- int, newdfd, const char __user *, newname, unsigned int, flags)
+static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
+ const char __user *newname, unsigned int flags)
{
struct dentry *old_dentry, *new_dentry;
struct dentry *trap;
@@ -4621,15 +4638,21 @@ exit:
return error;
}
+SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
+ int, newdfd, const char __user *, newname, unsigned int, flags)
+{
+ return do_renameat2(olddfd, oldname, newdfd, newname, flags);
+}
+
SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
int, newdfd, const char __user *, newname)
{
- return sys_renameat2(olddfd, oldname, newdfd, newname, 0);
+ return do_renameat2(olddfd, oldname, newdfd, newname, 0);
}
SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
{
- return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
+ return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
}
int vfs_whiteout(struct inode *dir, struct dentry *dentry)
diff --git a/fs/namespace.c b/fs/namespace.c
index 9d1374ab6e06..e398f32d7541 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1680,7 +1680,7 @@ static inline bool may_mandlock(void)
* unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
*/
-SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
+int ksys_umount(char __user *name, int flags)
{
struct path path;
struct mount *mnt;
@@ -1720,6 +1720,11 @@ out:
return retval;
}
+SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
+{
+ return ksys_umount(name, flags);
+}
+
#ifdef __ARCH_WANT_SYS_OLDUMOUNT
/*
@@ -1727,7 +1732,7 @@ out:
*/
SYSCALL_DEFINE1(oldumount, char __user *, name)
{
- return sys_umount(name, 0);
+ return ksys_umount(name, 0);
}
#endif
@@ -3032,8 +3037,8 @@ struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
}
EXPORT_SYMBOL(mount_subtree);
-SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
- char __user *, type, unsigned long, flags, void __user *, data)
+int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type,
+ unsigned long flags, void __user *data)
{
int ret;
char *kernel_type;
@@ -3066,6 +3071,12 @@ out_type:
return ret;
}
+SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
+ char __user *, type, unsigned long, flags, void __user *, data)
+{
+ return ksys_mount(dev_name, dir_name, type, flags, data);
+}
+
/*
* Return true if path is reachable from root
*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8c10b0562e75..621c517b325c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -86,10 +86,10 @@ struct nfs_direct_req {
struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX];
int mirror_count;
+ loff_t io_start; /* Start offset for I/O */
ssize_t count, /* bytes actually processed */
max_count, /* max expected count */
bytes_left, /* bytes left to be sent */
- io_start, /* start of IO */
error; /* any reported error */
struct completion completion; /* wait for i/o completion */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7d893543cf3b..d17a90c4fa37 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -85,11 +85,6 @@ int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
}
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
-int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode)
-{
- return nfs_wait_killable(mode);
-}
-
/**
* nfs_compat_user_ino64 - returns the user-visible inode number
* @fileid: 64-bit fileid
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 18a7626ac638..67d19cd92e44 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -98,8 +98,8 @@ nfs_page_free(struct nfs_page *p)
int
nfs_iocounter_wait(struct nfs_lock_context *l_ctx)
{
- return wait_on_atomic_t(&l_ctx->io_count, nfs_wait_atomic_killable,
- TASK_KILLABLE);
+ return wait_var_event_killable(&l_ctx->io_count,
+ !atomic_read(&l_ctx->io_count));
}
/**
@@ -395,7 +395,7 @@ static void nfs_clear_request(struct nfs_page *req)
}
if (l_ctx != NULL) {
if (atomic_dec_and_test(&l_ctx->io_count)) {
- wake_up_atomic_t(&l_ctx->io_count);
+ wake_up_var(&l_ctx->io_count);
if (test_bit(NFS_CONTEXT_UNLOCK, &ctx->flags))
rpc_wake_up(&NFS_SERVER(d_inode(ctx->dentry))->uoc_rpcwaitq);
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c13e826614b5..ee723aa153a3 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -292,8 +292,11 @@ pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
void
pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
{
- struct inode *inode = lo->plh_inode;
+ struct inode *inode;
+ if (!lo)
+ return;
+ inode = lo->plh_inode;
pnfs_layoutreturn_before_put_layout_hdr(lo);
if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
@@ -1241,10 +1244,12 @@ retry:
spin_lock(&ino->i_lock);
lo = nfsi->layout;
if (!lo || !pnfs_layout_is_valid(lo) ||
- test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+ lo = NULL;
goto out_noroc;
+ }
+ pnfs_get_layout_hdr(lo);
if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
- pnfs_get_layout_hdr(lo);
spin_unlock(&ino->i_lock);
wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
TASK_UNINTERRUPTIBLE);
@@ -1312,10 +1317,12 @@ out_noroc:
struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
if (ld->prepare_layoutreturn)
ld->prepare_layoutreturn(args);
+ pnfs_put_layout_hdr(lo);
return true;
}
if (layoutreturn)
pnfs_send_layoutreturn(lo, &stateid, iomode, true);
+ pnfs_put_layout_hdr(lo);
return false;
}
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 03aaa60c7768..32ba2d471853 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -245,7 +245,7 @@ pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
{
if (list_empty(pages)) {
if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
- wake_up_atomic_t(&cinfo->mds->rpcs_out);
+ wake_up_var(&cinfo->mds->rpcs_out);
/* don't call nfs_commitdata_release - it tries to put
* the open_context which is not acquired until nfs_init_commit
* which has not been called on @data */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 29bacdc56f6a..5e470e233c83 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2631,6 +2631,8 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
/* initial superblock/root creation */
mount_info->fill_super(s, mount_info);
nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
+ if (!(server->flags & NFS_MOUNT_UNSHARED))
+ s->s_iflags |= SB_I_MULTIROOT;
}
mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7428a669d7a7..6579f3b367bd 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1620,8 +1620,8 @@ static void nfs_writeback_result(struct rpc_task *task,
static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
{
- return wait_on_atomic_t(&cinfo->rpcs_out,
- nfs_wait_atomic_killable, TASK_KILLABLE);
+ return wait_var_event_killable(&cinfo->rpcs_out,
+ !atomic_read(&cinfo->rpcs_out));
}
static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
@@ -1632,7 +1632,7 @@ static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
{
if (atomic_dec_and_test(&cinfo->rpcs_out))
- wake_up_atomic_t(&cinfo->rpcs_out);
+ wake_up_var(&cinfo->rpcs_out);
}
void nfs_commitdata_release(struct nfs_commit_data *data)
@@ -1876,40 +1876,43 @@ int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
return status;
}
-int nfs_commit_inode(struct inode *inode, int how)
+static int __nfs_commit_inode(struct inode *inode, int how,
+ struct writeback_control *wbc)
{
LIST_HEAD(head);
struct nfs_commit_info cinfo;
int may_wait = how & FLUSH_SYNC;
- int error = 0;
- int res;
+ int ret, nscan;
nfs_init_cinfo_from_inode(&cinfo, inode);
nfs_commit_begin(cinfo.mds);
- res = nfs_scan_commit(inode, &head, &cinfo);
- if (res)
- error = nfs_generic_commit_list(inode, &head, how, &cinfo);
+ for (;;) {
+ ret = nscan = nfs_scan_commit(inode, &head, &cinfo);
+ if (ret <= 0)
+ break;
+ ret = nfs_generic_commit_list(inode, &head, how, &cinfo);
+ if (ret < 0)
+ break;
+ ret = 0;
+ if (wbc && wbc->sync_mode == WB_SYNC_NONE) {
+ if (nscan < wbc->nr_to_write)
+ wbc->nr_to_write -= nscan;
+ else
+ wbc->nr_to_write = 0;
+ }
+ if (nscan < INT_MAX)
+ break;
+ cond_resched();
+ }
nfs_commit_end(cinfo.mds);
- if (res == 0)
- return res;
- if (error < 0)
- goto out_error;
- if (!may_wait)
- goto out_mark_dirty;
- error = wait_on_commit(cinfo.mds);
- if (error < 0)
- return error;
- return res;
-out_error:
- res = error;
- /* Note: If we exit without ensuring that the commit is complete,
- * we must mark the inode as dirty. Otherwise, future calls to
- * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
- * that the data is on the disk.
- */
-out_mark_dirty:
- __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
- return res;
+ if (ret || !may_wait)
+ return ret;
+ return wait_on_commit(cinfo.mds);
+}
+
+int nfs_commit_inode(struct inode *inode, int how)
+{
+ return __nfs_commit_inode(inode, how, NULL);
}
EXPORT_SYMBOL_GPL(nfs_commit_inode);
@@ -1919,11 +1922,11 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
int flags = FLUSH_SYNC;
int ret = 0;
- /* no commits means nothing needs to be done */
- if (!atomic_long_read(&nfsi->commit_info.ncommit))
- return ret;
-
if (wbc->sync_mode == WB_SYNC_NONE) {
+ /* no commits means nothing needs to be done */
+ if (!atomic_long_read(&nfsi->commit_info.ncommit))
+ goto check_requests_outstanding;
+
/* Don't commit yet if this is a non-blocking flush and there
* are a lot of outstanding writes for this mapping.
*/
@@ -1934,16 +1937,16 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
flags = 0;
}
- ret = nfs_commit_inode(inode, flags);
- if (ret >= 0) {
- if (wbc->sync_mode == WB_SYNC_NONE) {
- if (ret < wbc->nr_to_write)
- wbc->nr_to_write -= ret;
- else
- wbc->nr_to_write = 0;
- }
- return 0;
- }
+ ret = __nfs_commit_inode(inode, flags, wbc);
+ if (!ret) {
+ if (flags & FLUSH_SYNC)
+ return 0;
+ } else if (atomic_long_read(&nfsi->commit_info.ncommit))
+ goto out_mark_dirty;
+
+check_requests_outstanding:
+ if (!atomic_read(&nfsi->commit_info.rpcs_out))
+ return ret;
out_mark_dirty:
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return ret;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 150521c9671b..61b770e39809 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -268,6 +268,35 @@ free_blocked_lock(struct nfsd4_blocked_lock *nbl)
kfree(nbl);
}
+static void
+remove_blocked_locks(struct nfs4_lockowner *lo)
+{
+ struct nfs4_client *clp = lo->lo_owner.so_client;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct nfsd4_blocked_lock *nbl;
+ LIST_HEAD(reaplist);
+
+ /* Dequeue all blocked locks */
+ spin_lock(&nn->blocked_locks_lock);
+ while (!list_empty(&lo->lo_blocked)) {
+ nbl = list_first_entry(&lo->lo_blocked,
+ struct nfsd4_blocked_lock,
+ nbl_list);
+ list_del_init(&nbl->nbl_list);
+ list_move(&nbl->nbl_lru, &reaplist);
+ }
+ spin_unlock(&nn->blocked_locks_lock);
+
+ /* Now free them */
+ while (!list_empty(&reaplist)) {
+ nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
+ nbl_lru);
+ list_del_init(&nbl->nbl_lru);
+ posix_unblock_lock(&nbl->nbl_lock);
+ free_blocked_lock(nbl);
+ }
+}
+
static int
nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
@@ -1866,6 +1895,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp)
static void
__destroy_client(struct nfs4_client *clp)
{
+ int i;
struct nfs4_openowner *oo;
struct nfs4_delegation *dp;
struct list_head reaplist;
@@ -1895,6 +1925,16 @@ __destroy_client(struct nfs4_client *clp)
nfs4_get_stateowner(&oo->oo_owner);
release_openowner(oo);
}
+ for (i = 0; i < OWNER_HASH_SIZE; i++) {
+ struct nfs4_stateowner *so, *tmp;
+
+ list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i],
+ so_strhash) {
+ /* Should be no openowners at this point */
+ WARN_ON_ONCE(so->so_is_open_owner);
+ remove_blocked_locks(lockowner(so));
+ }
+ }
nfsd4_return_all_client_layouts(clp);
nfsd4_shutdown_callback(clp);
if (clp->cl_cb_conn.cb_xprt)
@@ -6355,6 +6395,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
}
spin_unlock(&clp->cl_lock);
free_ol_stateid_reaplist(&reaplist);
+ remove_blocked_locks(lo);
nfs4_put_stateowner(&lo->lo_owner);
return status;
@@ -7140,6 +7181,8 @@ nfs4_state_destroy_net(struct net *net)
}
}
+ WARN_ON(!list_empty(&nn->blocked_locks_lru));
+
for (i = 0; i < CLIENT_HASH_SIZE; i++) {
while (!list_empty(&nn->unconf_id_hashtbl[i])) {
clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
@@ -7206,7 +7249,6 @@ nfs4_state_shutdown_net(struct net *net)
struct nfs4_delegation *dp = NULL;
struct list_head *pos, *next, reaplist;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- struct nfsd4_blocked_lock *nbl;
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);
@@ -7227,24 +7269,6 @@ nfs4_state_shutdown_net(struct net *net)
nfs4_put_stid(&dp->dl_stid);
}
- BUG_ON(!list_empty(&reaplist));
- spin_lock(&nn->blocked_locks_lock);
- while (!list_empty(&nn->blocked_locks_lru)) {
- nbl = list_first_entry(&nn->blocked_locks_lru,
- struct nfsd4_blocked_lock, nbl_lru);
- list_move(&nbl->nbl_lru, &reaplist);
- list_del_init(&nbl->nbl_list);
- }
- spin_unlock(&nn->blocked_locks_lock);
-
- while (!list_empty(&reaplist)) {
- nbl = list_first_entry(&reaplist,
- struct nfsd4_blocked_lock, nbl_lru);
- list_del_init(&nbl->nbl_lru);
- posix_unblock_lock(&nbl->nbl_lock);
- free_blocked_lock(nbl);
- }
-
nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net);
}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index c07eb3d655ea..fa803a58a605 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -820,9 +820,8 @@ out_destroy_group:
return fd;
}
-SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
- __u64, mask, int, dfd,
- const char __user *, pathname)
+static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ int dfd, const char __user *pathname)
{
struct inode *inode = NULL;
struct vfsmount *mnt = NULL;
@@ -928,13 +927,20 @@ fput_and_out:
return ret;
}
+SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
+ __u64, mask, int, dfd,
+ const char __user *, pathname)
+{
+ return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
+}
+
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE6(fanotify_mark,
int, fanotify_fd, unsigned int, flags,
__u32, mask0, __u32, mask1, int, dfd,
const char __user *, pathname)
{
- return sys_fanotify_mark(fanotify_fd, flags,
+ return do_fanotify_mark(fanotify_fd, flags,
#ifdef __BIG_ENDIAN
((__u64)mask0 << 32) | mask1,
#else
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 2c908b31d6c9..43c23653ce2e 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -635,7 +635,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
/* inotify syscalls */
-SYSCALL_DEFINE1(inotify_init1, int, flags)
+static int do_inotify_init(int flags)
{
struct fsnotify_group *group;
int ret;
@@ -660,9 +660,14 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
return ret;
}
+SYSCALL_DEFINE1(inotify_init1, int, flags)
+{
+ return do_inotify_init(flags);
+}
+
SYSCALL_DEFINE0(inotify_init)
{
- return sys_inotify_init1(0);
+ return do_inotify_init(0);
}
SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 36b0772701a0..60702d677bd4 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -184,6 +184,7 @@ int open_related_ns(struct ns_common *ns,
return fd;
}
+EXPORT_SYMBOL_GPL(open_related_ns);
static long ns_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index eac5140aac47..e5076185cc1e 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1819,7 +1819,7 @@ int o2net_register_hb_callbacks(void)
static int o2net_accept_one(struct socket *sock, int *more)
{
- int ret, slen;
+ int ret;
struct sockaddr_in sin;
struct socket *new_sock = NULL;
struct o2nm_node *node = NULL;
@@ -1864,9 +1864,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
goto out;
}
- slen = sizeof(sin);
- ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
- &slen, 1);
+ ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1);
if (ret < 0)
goto out;
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index e87279e49ba3..6b92cb241138 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -134,9 +134,10 @@ ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry)
{
struct ocfs2_filecheck_entry *p;
- if (!atomic_dec_and_test(&entry->fs_count))
- wait_on_atomic_t(&entry->fs_count, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ if (!atomic_dec_and_test(&entry->fs_count)) {
+ wait_var_event(&entry->fs_count,
+ !atomic_read(&entry->fs_count));
+ }
spin_lock(&entry->fs_fcheck->fc_lock);
while (!list_empty(&entry->fs_fcheck->fc_head)) {
@@ -183,7 +184,7 @@ static void
ocfs2_filecheck_sysfs_put(struct ocfs2_filecheck_sysfs_entry *entry)
{
if (atomic_dec_and_test(&entry->fs_count))
- wake_up_atomic_t(&entry->fs_count);
+ wake_up_var(&entry->fs_count);
}
static struct ocfs2_filecheck_sysfs_entry *
diff --git a/fs/open.c b/fs/open.c
index 7ea118471dce..d0e955b558ad 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -128,7 +128,7 @@ out:
}
EXPORT_SYMBOL_GPL(vfs_truncate);
-static long do_sys_truncate(const char __user *pathname, loff_t length)
+long do_sys_truncate(const char __user *pathname, loff_t length)
{
unsigned int lookup_flags = LOOKUP_FOLLOW;
struct path path;
@@ -162,7 +162,7 @@ COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length
}
#endif
-static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
+long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
{
struct inode *inode;
struct dentry *dentry;
@@ -333,7 +333,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
}
EXPORT_SYMBOL_GPL(vfs_fallocate);
-SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
+int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len)
{
struct fd f = fdget(fd);
int error = -EBADF;
@@ -345,12 +345,17 @@ SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
return error;
}
+SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
+{
+ return ksys_fallocate(fd, mode, offset, len);
+}
+
/*
* access() needs to use the real uid/gid, not the effective uid/gid.
* We do this by temporarily clearing all FS-related capabilities and
* switching the fsuid/fsgid around to the real ones.
*/
-SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
+long do_faccessat(int dfd, const char __user *filename, int mode)
{
const struct cred *old_cred;
struct cred *override_cred;
@@ -426,12 +431,17 @@ out:
return res;
}
+SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
+{
+ return do_faccessat(dfd, filename, mode);
+}
+
SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
{
- return sys_faccessat(AT_FDCWD, filename, mode);
+ return do_faccessat(AT_FDCWD, filename, mode);
}
-SYSCALL_DEFINE1(chdir, const char __user *, filename)
+int ksys_chdir(const char __user *filename)
{
struct path path;
int error;
@@ -457,6 +467,11 @@ out:
return error;
}
+SYSCALL_DEFINE1(chdir, const char __user *, filename)
+{
+ return ksys_chdir(filename);
+}
+
SYSCALL_DEFINE1(fchdir, unsigned int, fd)
{
struct fd f = fdget_raw(fd);
@@ -479,7 +494,7 @@ out:
return error;
}
-SYSCALL_DEFINE1(chroot, const char __user *, filename)
+int ksys_chroot(const char __user *filename)
{
struct path path;
int error;
@@ -512,6 +527,11 @@ out:
return error;
}
+SYSCALL_DEFINE1(chroot, const char __user *, filename)
+{
+ return ksys_chroot(filename);
+}
+
static int chmod_common(const struct path *path, umode_t mode)
{
struct inode *inode = path->dentry->d_inode;
@@ -541,7 +561,7 @@ out_unlock:
return error;
}
-SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
+int ksys_fchmod(unsigned int fd, umode_t mode)
{
struct fd f = fdget(fd);
int err = -EBADF;
@@ -554,7 +574,12 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
return err;
}
-SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode)
+SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
+{
+ return ksys_fchmod(fd, mode);
+}
+
+int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
{
struct path path;
int error;
@@ -572,9 +597,15 @@ retry:
return error;
}
+SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
+ umode_t, mode)
+{
+ return do_fchmodat(dfd, filename, mode);
+}
+
SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
{
- return sys_fchmodat(AT_FDCWD, filename, mode);
+ return do_fchmodat(AT_FDCWD, filename, mode);
}
static int chown_common(const struct path *path, uid_t user, gid_t group)
@@ -619,8 +650,8 @@ retry_deleg:
return error;
}
-SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
- gid_t, group, int, flag)
+int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
+ int flag)
{
struct path path;
int error = -EINVAL;
@@ -651,18 +682,24 @@ out:
return error;
}
+SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
+ gid_t, group, int, flag)
+{
+ return do_fchownat(dfd, filename, user, group, flag);
+}
+
SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
{
- return sys_fchownat(AT_FDCWD, filename, user, group, 0);
+ return do_fchownat(AT_FDCWD, filename, user, group, 0);
}
SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
{
- return sys_fchownat(AT_FDCWD, filename, user, group,
- AT_SYMLINK_NOFOLLOW);
+ return do_fchownat(AT_FDCWD, filename, user, group,
+ AT_SYMLINK_NOFOLLOW);
}
-SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
+int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
{
struct fd f = fdget(fd);
int error = -EBADF;
@@ -682,6 +719,11 @@ out:
return error;
}
+SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
+{
+ return ksys_fchown(fd, user, group);
+}
+
int open_check_o_direct(struct file *f)
{
/* NB: we're sure to have correct a_ops only after f_op->open */
@@ -1114,7 +1156,7 @@ COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, fla
*/
SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
{
- return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
+ return ksys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
}
#endif
@@ -1163,7 +1205,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
return retval;
}
-EXPORT_SYMBOL(sys_close);
/*
* This routine simulates a hangup on the tty, to arrange that users
diff --git a/fs/pipe.c b/fs/pipe.c
index 7b1954caf388..39d6f431da83 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -841,7 +841,7 @@ int do_pipe_flags(int *fd, int flags)
* sys_pipe() is the normal C calling standard for creating
* a pipe. It's not the way Unix traditionally does this, though.
*/
-SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
+static int do_pipe2(int __user *fildes, int flags)
{
struct file *files[2];
int fd[2];
@@ -863,9 +863,14 @@ SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
return error;
}
+SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
+{
+ return do_pipe2(fildes, flags);
+}
+
SYSCALL_DEFINE1(pipe, int __user *, fildes)
{
- return sys_pipe2(fildes, 0);
+ return do_pipe2(fildes, 0);
}
static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9298324325ed..d53246863cfb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -94,9 +94,6 @@
#include <linux/sched/stat.h>
#include <linux/flex_array.h>
#include <linux/posix-timers.h>
-#ifdef CONFIG_HARDWALL
-#include <asm/hardwall.h>
-#endif
#include <trace/events/oom.h>
#include "internal.h"
#include "fd.h"
@@ -3002,9 +2999,6 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_TASK_IO_ACCOUNTING
ONE("io", S_IRUSR, proc_tgid_io_accounting),
#endif
-#ifdef CONFIG_HARDWALL
- ONE("hardwall", S_IRUGO, proc_pid_hardwall),
-#endif
#ifdef CONFIG_USER_NS
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
@@ -3393,9 +3387,6 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_TASK_IO_ACCOUNTING
ONE("io", S_IRUSR, proc_tid_io_accounting),
#endif
-#ifdef CONFIG_HARDWALL
- ONE("hardwall", S_IRUGO, proc_pid_hardwall),
-#endif
#ifdef CONFIG_USER_NS
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
diff --git a/fs/quota/compat.c b/fs/quota/compat.c
index 779caed4f078..c30572857619 100644
--- a/fs/quota/compat.c
+++ b/fs/quota/compat.c
@@ -41,8 +41,9 @@ struct compat_fs_quota_stat {
__u16 qs_iwarnlimit;
};
-asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
- qid_t id, void __user *addr)
+COMPAT_SYSCALL_DEFINE4(quotactl32, unsigned int, cmd,
+ const char __user *, special, qid_t, id,
+ void __user *, addr)
{
unsigned int cmds;
struct if_dqblk __user *dqblk;
@@ -59,7 +60,7 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
case Q_GETQUOTA:
dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
compat_dqblk = addr;
- ret = sys_quotactl(cmd, special, id, dqblk);
+ ret = kernel_quotactl(cmd, special, id, dqblk);
if (ret)
break;
if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) ||
@@ -75,12 +76,12 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
get_user(data, &compat_dqblk->dqb_valid) ||
put_user(data, &dqblk->dqb_valid))
break;
- ret = sys_quotactl(cmd, special, id, dqblk);
+ ret = kernel_quotactl(cmd, special, id, dqblk);
break;
case Q_XGETQSTAT:
fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat));
compat_fsqstat = addr;
- ret = sys_quotactl(cmd, special, id, fsqstat);
+ ret = kernel_quotactl(cmd, special, id, fsqstat);
if (ret)
break;
ret = -EFAULT;
@@ -113,7 +114,7 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
ret = 0;
break;
default:
- ret = sys_quotactl(cmd, special, id, addr);
+ ret = kernel_quotactl(cmd, special, id, addr);
}
return ret;
}
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 43612e2a73af..860bfbe7a07a 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -833,8 +833,8 @@ static struct super_block *quotactl_block(const char __user *special, int cmd)
* calls. Maybe we need to add the process quotas etc. in the future,
* but we probably should use rlimits for that.
*/
-SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
- qid_t, id, void __user *, addr)
+int kernel_quotactl(unsigned int cmd, const char __user *special,
+ qid_t id, void __user *addr)
{
uint cmds, type;
struct super_block *sb = NULL;
@@ -885,3 +885,9 @@ out:
path_put(pathp);
return ret;
}
+
+SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
+ qid_t, id, void __user *, addr)
+{
+ return kernel_quotactl(cmd, special, id, addr);
+}
diff --git a/fs/read_write.c b/fs/read_write.c
index f8547b82dfb3..c4eabbfc90df 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -301,7 +301,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
}
EXPORT_SYMBOL(vfs_llseek);
-SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
+off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence)
{
off_t retval;
struct fd f = fdget_pos(fd);
@@ -319,10 +319,15 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
return retval;
}
+SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
+{
+ return ksys_lseek(fd, offset, whence);
+}
+
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
{
- return sys_lseek(fd, offset, whence);
+ return ksys_lseek(fd, offset, whence);
}
#endif
@@ -563,7 +568,7 @@ static inline void file_pos_write(struct file *file, loff_t pos)
file->f_pos = pos;
}
-SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
+ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
@@ -578,8 +583,12 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
return ret;
}
-SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
- size_t, count)
+SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
+{
+ return ksys_read(fd, buf, count);
+}
+
+ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
@@ -595,8 +604,14 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
return ret;
}
-SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
- size_t, count, loff_t, pos)
+SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
+ size_t, count)
+{
+ return ksys_write(fd, buf, count);
+}
+
+ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count,
+ loff_t pos)
{
struct fd f;
ssize_t ret = -EBADF;
@@ -615,8 +630,14 @@ SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
return ret;
}
-SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
- size_t, count, loff_t, pos)
+SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
+ size_t, count, loff_t, pos)
+{
+ return ksys_pread64(fd, buf, count, pos);
+}
+
+ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf,
+ size_t count, loff_t pos)
{
struct fd f;
ssize_t ret = -EBADF;
@@ -635,6 +656,12 @@ SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
return ret;
}
+SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
+ size_t, count, loff_t, pos)
+{
+ return ksys_pwrite64(fd, buf, count, pos);
+}
+
static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
loff_t *ppos, int type, rwf_t flags)
{
diff --git a/fs/readdir.c b/fs/readdir.c
index 1b83b0ad183b..d97f548e6323 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -292,8 +292,8 @@ efault:
return -EFAULT;
}
-SYSCALL_DEFINE3(getdents64, unsigned int, fd,
- struct linux_dirent64 __user *, dirent, unsigned int, count)
+int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
+ unsigned int count)
{
struct fd f;
struct linux_dirent64 __user * lastdirent;
@@ -326,6 +326,13 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
return error;
}
+
+SYSCALL_DEFINE3(getdents64, unsigned int, fd,
+ struct linux_dirent64 __user *, dirent, unsigned int, count)
+{
+ return ksys_getdents64(fd, dirent, count);
+}
+
#ifdef CONFIG_COMPAT
struct compat_old_linux_dirent {
compat_ulong_t d_ino;
diff --git a/fs/select.c b/fs/select.c
index b6c36254028a..ba879c51288f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -675,8 +675,8 @@ out_nofds:
return ret;
}
-SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
- fd_set __user *, exp, struct timeval __user *, tvp)
+static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
+ fd_set __user *exp, struct timeval __user *tvp)
{
struct timespec64 end_time, *to = NULL;
struct timeval tv;
@@ -699,6 +699,12 @@ SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
return ret;
}
+SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
+ fd_set __user *, exp, struct timeval __user *, tvp)
+{
+ return kern_select(n, inp, outp, exp, tvp);
+}
+
static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
fd_set __user *exp, struct timespec __user *tsp,
const sigset_t __user *sigmask, size_t sigsetsize)
@@ -784,7 +790,7 @@ SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
if (copy_from_user(&a, arg, sizeof(a)))
return -EFAULT;
- return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
+ return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp);
}
#endif
@@ -1259,9 +1265,9 @@ out_nofds:
return ret;
}
-COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
- compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
- struct compat_timeval __user *, tvp)
+static int do_compat_select(int n, compat_ulong_t __user *inp,
+ compat_ulong_t __user *outp, compat_ulong_t __user *exp,
+ struct compat_timeval __user *tvp)
{
struct timespec64 end_time, *to = NULL;
struct compat_timeval tv;
@@ -1284,6 +1290,13 @@ COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
return ret;
}
+COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
+ compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
+ struct compat_timeval __user *, tvp)
+{
+ return do_compat_select(n, inp, outp, exp, tvp);
+}
+
struct compat_sel_arg_struct {
compat_ulong_t n;
compat_uptr_t inp;
@@ -1298,8 +1311,8 @@ COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
if (copy_from_user(&a, arg, sizeof(a)))
return -EFAULT;
- return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
- compat_ptr(a.exp), compat_ptr(a.tvp));
+ return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
+ compat_ptr(a.exp), compat_ptr(a.tvp));
}
static long do_compat_pselect(int n, compat_ulong_t __user *inp,
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 76bf9cc62074..d2187a813376 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -256,8 +256,8 @@ static const struct file_operations signalfd_fops = {
.llseek = noop_llseek,
};
-SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
- size_t, sizemask, int, flags)
+static int do_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask,
+ int flags)
{
sigset_t sigmask;
struct signalfd_ctx *ctx;
@@ -310,17 +310,22 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
return ufd;
}
+SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
+ size_t, sizemask, int, flags)
+{
+ return do_signalfd4(ufd, user_mask, sizemask, flags);
+}
+
SYSCALL_DEFINE3(signalfd, int, ufd, sigset_t __user *, user_mask,
size_t, sizemask)
{
- return sys_signalfd4(ufd, user_mask, sizemask, 0);
+ return do_signalfd4(ufd, user_mask, sizemask, 0);
}
#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd,
- const compat_sigset_t __user *,sigmask,
- compat_size_t, sigsetsize,
- int, flags)
+static long do_compat_signalfd4(int ufd,
+ const compat_sigset_t __user *sigmask,
+ compat_size_t sigsetsize, int flags)
{
sigset_t tmp;
sigset_t __user *ksigmask;
@@ -333,13 +338,21 @@ COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd,
if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t)))
return -EFAULT;
- return sys_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags);
+ return do_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags);
+}
+
+COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd,
+ const compat_sigset_t __user *, sigmask,
+ compat_size_t, sigsetsize,
+ int, flags)
+{
+ return do_compat_signalfd4(ufd, sigmask, sigsetsize, flags);
}
COMPAT_SYSCALL_DEFINE3(signalfd, int, ufd,
const compat_sigset_t __user *,sigmask,
compat_size_t, sigsetsize)
{
- return compat_sys_signalfd4(ufd, sigmask, sigsetsize, 0);
+ return do_compat_signalfd4(ufd, sigmask, sigsetsize, 0);
}
#endif
diff --git a/fs/splice.c b/fs/splice.c
index 39e2dc01ac12..005d09cf3fa8 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1331,8 +1331,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov,
* Currently we punt and implement it as a normal copy, see pipe_to_user().
*
*/
-SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
- unsigned long, nr_segs, unsigned int, flags)
+static long do_vmsplice(int fd, const struct iovec __user *iov,
+ unsigned long nr_segs, unsigned int flags)
{
struct fd f;
long error;
@@ -1358,6 +1358,12 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
return error;
}
+SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
+ unsigned long, nr_segs, unsigned int, flags)
+{
+ return do_vmsplice(fd, iov, nr_segs, flags);
+}
+
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32,
unsigned int, nr_segs, unsigned int, flags)
@@ -1375,7 +1381,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io
put_user(v.iov_len, &iov[i].iov_len))
return -EFAULT;
}
- return sys_vmsplice(fd, iov, nr_segs, flags);
+ return do_vmsplice(fd, iov, nr_segs, flags);
}
#endif
diff --git a/fs/stat.c b/fs/stat.c
index 873785dae022..f8e6fb2c3657 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -379,8 +379,8 @@ SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf)
return error;
}
-SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
- char __user *, buf, int, bufsiz)
+static int do_readlinkat(int dfd, const char __user *pathname,
+ char __user *buf, int bufsiz)
{
struct path path;
int error;
@@ -415,10 +415,16 @@ retry:
return error;
}
+SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
+ char __user *, buf, int, bufsiz)
+{
+ return do_readlinkat(dfd, pathname, buf, bufsiz);
+}
+
SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf,
int, bufsiz)
{
- return sys_readlinkat(AT_FDCWD, path, buf, bufsiz);
+ return do_readlinkat(AT_FDCWD, path, buf, bufsiz);
}
diff --git a/fs/sync.c b/fs/sync.c
index 09f0259724ff..b54e0541ad89 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -105,7 +105,7 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
* just write metadata (such as inodes or bitmaps) to block device page cache
* and do not sync it on their own in ->sync_fs().
*/
-SYSCALL_DEFINE0(sync)
+void ksys_sync(void)
{
int nowait = 0, wait = 1;
@@ -117,6 +117,11 @@ SYSCALL_DEFINE0(sync)
iterate_bdevs(fdatawait_one_bdev, NULL);
if (unlikely(laptop_mode))
laptop_sync_completion();
+}
+
+SYSCALL_DEFINE0(sync)
+{
+ ksys_sync();
return 0;
}
@@ -276,8 +281,8 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
* already-instantiated disk blocks, there are no guarantees here that the data
* will be available after a crash.
*/
-SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
- unsigned int, flags)
+int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
+ unsigned int flags)
{
int ret;
struct fd f;
@@ -355,10 +360,16 @@ out:
return ret;
}
+SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
+ unsigned int, flags)
+{
+ return ksys_sync_file_range(fd, offset, nbytes, flags);
+}
+
/* It would be nice if people remember that not all the world's an i386
when they introduce new system calls */
SYSCALL_DEFINE4(sync_file_range2, int, fd, unsigned int, flags,
loff_t, offset, loff_t, nbytes)
{
- return sys_sync_file_range(fd, offset, nbytes, flags);
+ return ksys_sync_file_range(fd, offset, nbytes, flags);
}
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 8664db25a9a6..215c225b2ca1 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -106,6 +106,7 @@ int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
{
return sysfs_do_create_link(kobj, target, name, 0);
}
+EXPORT_SYMBOL_GPL(sysfs_create_link_nowarn);
/**
* sysfs_delete_link - remove symlink in object's directory.
diff --git a/fs/utimes.c b/fs/utimes.c
index e4b3d7c2c9f5..69d4b6ba1bfb 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -184,8 +184,8 @@ SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename,
return do_utimes(dfd, filename, utimes ? tstimes : NULL, flags);
}
-SYSCALL_DEFINE3(futimesat, int, dfd, const char __user *, filename,
- struct timeval __user *, utimes)
+static long do_futimesat(int dfd, const char __user *filename,
+ struct timeval __user *utimes)
{
struct timeval times[2];
struct timespec64 tstimes[2];
@@ -212,10 +212,17 @@ SYSCALL_DEFINE3(futimesat, int, dfd, const char __user *, filename,
return do_utimes(dfd, filename, utimes ? tstimes : NULL, 0);
}
+
+SYSCALL_DEFINE3(futimesat, int, dfd, const char __user *, filename,
+ struct timeval __user *, utimes)
+{
+ return do_futimesat(dfd, filename, utimes);
+}
+
SYSCALL_DEFINE2(utimes, char __user *, filename,
struct timeval __user *, utimes)
{
- return sys_futimesat(AT_FDCWD, filename, utimes);
+ return do_futimesat(AT_FDCWD, filename, utimes);
}
#ifdef CONFIG_COMPAT
@@ -253,7 +260,8 @@ COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filena
return do_utimes(dfd, filename, t ? tv : NULL, flags);
}
-COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t)
+static long do_compat_futimesat(unsigned int dfd, const char __user *filename,
+ struct compat_timeval __user *t)
{
struct timespec64 tv[2];
@@ -272,8 +280,15 @@ COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filena
return do_utimes(dfd, filename, t ? tv : NULL, 0);
}
+COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd,
+ const char __user *, filename,
+ struct compat_timeval __user *, t)
+{
+ return do_compat_futimesat(dfd, filename, t);
+}
+
COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t)
{
- return compat_sys_futimesat(AT_FDCWD, filename, t);
+ return do_compat_futimesat(AT_FDCWD, filename, t);
}
#endif