diff options
Diffstat (limited to 'fs')
91 files changed, 3624 insertions, 2262 deletions
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 967db336d11a..9eaff55df7b4 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -251,7 +251,7 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry _enter("%s", cell->name); ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1", - &result, _expiry); + &result, _expiry, true); if (ret < 0) { _leave(" = %d [dns]", ret); return ERR_PTR(ret); diff --git a/fs/afs/afs.h b/fs/afs/afs.h index d12ffb457e47..3f4e460c6655 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -23,6 +23,9 @@ #define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */ #define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */ +#define AFS_VL_MAX_LIFESPAN (120 * HZ) +#define AFS_PROBE_MAX_LIFESPAN (30 * HZ) + typedef u64 afs_volid_t; typedef u64 afs_vnodeid_t; typedef u64 afs_dataversion_t; @@ -69,8 +72,8 @@ typedef enum { struct afs_callback { time64_t expires_at; /* Time at which expires */ - unsigned version; /* Callback version */ - afs_callback_type_t type; /* Type of callback */ + //unsigned version; /* Callback version */ + //afs_callback_type_t type; /* Type of callback */ }; struct afs_callback_break { @@ -144,6 +147,15 @@ struct afs_file_status { u32 abort_code; /* Abort if bulk-fetching this failed */ }; +struct afs_status_cb { + struct afs_file_status status; + struct afs_callback callback; + unsigned int cb_break; /* Pre-op callback break counter */ + bool have_status; /* True if status record was retrieved */ + bool have_cb; /* True if cb record was retrieved */ + bool have_error; /* True if status.abort_code indicates an error */ +}; + /* * AFS file status change request */ diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 128f2dbe256a..d441bef72163 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -94,15 +94,15 @@ int afs_register_server_cb_interest(struct afs_vnode *vnode, struct afs_server *server = entry->server; again: - if (vnode->cb_interest && - likely(vnode->cb_interest == entry->cb_interest)) + vcbi = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->io_lock)); + if (vcbi && likely(vcbi == entry->cb_interest)) return 0; read_lock(&slist->lock); cbi = afs_get_cb_interest(entry->cb_interest); read_unlock(&slist->lock); - vcbi = vnode->cb_interest; if (vcbi) { if (vcbi == cbi) { afs_put_cb_interest(afs_v2net(vnode), cbi); @@ -114,8 +114,9 @@ again: */ if (cbi && vcbi->server == cbi->server) { write_seqlock(&vnode->cb_lock); - old = vnode->cb_interest; - vnode->cb_interest = cbi; + old = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->cb_lock.lock)); + rcu_assign_pointer(vnode->cb_interest, cbi); write_sequnlock(&vnode->cb_lock); afs_put_cb_interest(afs_v2net(vnode), old); return 0; @@ -160,8 +161,9 @@ again: */ write_seqlock(&vnode->cb_lock); - old = vnode->cb_interest; - vnode->cb_interest = cbi; + old = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->cb_lock.lock)); + rcu_assign_pointer(vnode->cb_interest, cbi); vnode->cb_s_break = cbi->server->cb_s_break; vnode->cb_v_break = vnode->volume->cb_v_break; clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); @@ -191,10 +193,11 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) vi = NULL; write_unlock(&cbi->server->cb_break_lock); - kfree(vi); + if (vi) + kfree_rcu(vi, rcu); afs_put_server(net, cbi->server); } - kfree(cbi); + kfree_rcu(cbi, rcu); } } @@ -218,14 +221,8 @@ void __afs_break_callback(struct afs_vnode *vnode) vnode->cb_break++; afs_clear_permits(vnode); - spin_lock(&vnode->lock); - - _debug("break callback"); - - if (list_empty(&vnode->granted_locks) && - !list_empty(&vnode->pending_locks)) + if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) afs_lock_may_be_available(vnode); - spin_unlock(&vnode->lock); } } diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 9de46116c749..9c3b07ba2222 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -123,6 +123,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, const char *name, unsigned int namelen, const char *addresses) { + struct afs_vlserver_list *vllist; struct afs_cell *cell; int i, ret; @@ -151,18 +152,14 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, atomic_set(&cell->usage, 2); INIT_WORK(&cell->manager, afs_manage_cell); - cell->flags = ((1 << AFS_CELL_FL_NOT_READY) | - (1 << AFS_CELL_FL_NO_LOOKUP_YET)); INIT_LIST_HEAD(&cell->proc_volumes); rwlock_init(&cell->proc_lock); rwlock_init(&cell->vl_servers_lock); - /* Fill in the VL server list if we were given a list of addresses to - * use. + /* Provide a VL server list, filling it in if we were given a list of + * addresses to use. */ if (addresses) { - struct afs_vlserver_list *vllist; - vllist = afs_parse_text_addrs(net, addresses, strlen(addresses), ':', VL_SERVICE, AFS_VL_PORT); @@ -171,19 +168,32 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, goto parse_failed; } - rcu_assign_pointer(cell->vl_servers, vllist); + vllist->source = DNS_RECORD_FROM_CONFIG; + vllist->status = DNS_LOOKUP_NOT_DONE; cell->dns_expiry = TIME64_MAX; - __clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags); } else { + ret = -ENOMEM; + vllist = afs_alloc_vlserver_list(0); + if (!vllist) + goto error; + vllist->source = DNS_RECORD_UNAVAILABLE; + vllist->status = DNS_LOOKUP_NOT_DONE; cell->dns_expiry = ktime_get_real_seconds(); } + rcu_assign_pointer(cell->vl_servers, vllist); + + cell->dns_source = vllist->source; + cell->dns_status = vllist->status; + smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */ + _leave(" = %p", cell); return cell; parse_failed: if (ret == -EINVAL) printk(KERN_ERR "kAFS: bad VL server IP address\n"); +error: kfree(cell); _leave(" = %d", ret); return ERR_PTR(ret); @@ -208,6 +218,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, { struct afs_cell *cell, *candidate, *cursor; struct rb_node *parent, **pp; + enum afs_cell_state state; int ret, n; _enter("%s,%s", name, vllist); @@ -267,18 +278,16 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, wait_for_cell: _debug("wait_for_cell"); - ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NOT_READY, TASK_INTERRUPTIBLE); - smp_rmb(); - - switch (READ_ONCE(cell->state)) { - case AFS_CELL_FAILED: + wait_var_event(&cell->state, + ({ + state = smp_load_acquire(&cell->state); /* vs error */ + state == AFS_CELL_ACTIVE || state == AFS_CELL_FAILED; + })); + + /* Check the state obtained from the wait check. */ + if (state == AFS_CELL_FAILED) { ret = cell->error; goto error; - default: - _debug("weird %u %d", cell->state, cell->error); - goto error; - case AFS_CELL_ACTIVE: - break; } _leave(" = %p [cell]", cell); @@ -360,16 +369,46 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) /* * Update a cell's VL server address list from the DNS. */ -static void afs_update_cell(struct afs_cell *cell) +static int afs_update_cell(struct afs_cell *cell) { - struct afs_vlserver_list *vllist, *old; + struct afs_vlserver_list *vllist, *old = NULL, *p; unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl); unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl); time64_t now, expiry = 0; + int ret = 0; _enter("%s", cell->name); vllist = afs_dns_query(cell, &expiry); + if (IS_ERR(vllist)) { + ret = PTR_ERR(vllist); + + _debug("%s: fail %d", cell->name, ret); + if (ret == -ENOMEM) + goto out_wake; + + ret = -ENOMEM; + vllist = afs_alloc_vlserver_list(0); + if (!vllist) + goto out_wake; + + switch (ret) { + case -ENODATA: + case -EDESTADDRREQ: + vllist->status = DNS_LOOKUP_GOT_NOT_FOUND; + break; + case -EAGAIN: + case -ECONNREFUSED: + vllist->status = DNS_LOOKUP_GOT_TEMP_FAILURE; + break; + default: + vllist->status = DNS_LOOKUP_GOT_LOCAL_FAILURE; + break; + } + } + + _debug("%s: got list %d %d", cell->name, vllist->source, vllist->status); + cell->dns_status = vllist->status; now = ktime_get_real_seconds(); if (min_ttl > max_ttl) @@ -379,48 +418,47 @@ static void afs_update_cell(struct afs_cell *cell) else if (expiry > now + max_ttl) expiry = now + max_ttl; - if (IS_ERR(vllist)) { - switch (PTR_ERR(vllist)) { - case -ENODATA: - case -EDESTADDRREQ: + _debug("%s: status %d", cell->name, vllist->status); + if (vllist->source == DNS_RECORD_UNAVAILABLE) { + switch (vllist->status) { + case DNS_LOOKUP_GOT_NOT_FOUND: /* The DNS said that the cell does not exist or there * weren't any addresses to be had. */ - set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); - clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); cell->dns_expiry = expiry; break; - case -EAGAIN: - case -ECONNREFUSED: + case DNS_LOOKUP_BAD: + case DNS_LOOKUP_GOT_LOCAL_FAILURE: + case DNS_LOOKUP_GOT_TEMP_FAILURE: + case DNS_LOOKUP_GOT_NS_FAILURE: default: - set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); cell->dns_expiry = now + 10; break; } - - cell->error = -EDESTADDRREQ; } else { - clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); - clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); - - /* Exclusion on changing vl_addrs is achieved by a - * non-reentrant work item. - */ - old = rcu_dereference_protected(cell->vl_servers, true); - rcu_assign_pointer(cell->vl_servers, vllist); cell->dns_expiry = expiry; - - if (old) - afs_put_vlserverlist(cell->net, old); } - if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags)) - wake_up_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET); + /* Replace the VL server list if the new record has servers or the old + * record doesn't. + */ + write_lock(&cell->vl_servers_lock); + p = rcu_dereference_protected(cell->vl_servers, true); + if (vllist->nr_servers > 0 || p->nr_servers == 0) { + rcu_assign_pointer(cell->vl_servers, vllist); + cell->dns_source = vllist->source; + old = p; + } + write_unlock(&cell->vl_servers_lock); + afs_put_vlserverlist(cell->net, old); - now = ktime_get_real_seconds(); - afs_set_cell_timer(cell->net, cell->dns_expiry - now); - _leave(""); +out_wake: + smp_store_release(&cell->dns_lookup_count, + cell->dns_lookup_count + 1); /* vs source/status */ + wake_up_var(&cell->dns_lookup_count); + _leave(" = %d", ret); + return ret; } /* @@ -491,8 +529,7 @@ void afs_put_cell(struct afs_net *net, struct afs_cell *cell) now = ktime_get_real_seconds(); cell->last_inactive = now; expire_delay = 0; - if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) && - !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags)) + if (cell->vl_servers->nr_servers) expire_delay = afs_cell_gc_delay; if (atomic_dec_return(&cell->usage) > 1) @@ -623,11 +660,13 @@ again: goto final_destruction; if (cell->state == AFS_CELL_FAILED) goto done; - cell->state = AFS_CELL_UNSET; + smp_store_release(&cell->state, AFS_CELL_UNSET); + wake_up_var(&cell->state); goto again; case AFS_CELL_UNSET: - cell->state = AFS_CELL_ACTIVATING; + smp_store_release(&cell->state, AFS_CELL_ACTIVATING); + wake_up_var(&cell->state); goto again; case AFS_CELL_ACTIVATING: @@ -635,28 +674,29 @@ again: if (ret < 0) goto activation_failed; - cell->state = AFS_CELL_ACTIVE; - smp_wmb(); - clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags); - wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); + smp_store_release(&cell->state, AFS_CELL_ACTIVE); + wake_up_var(&cell->state); goto again; case AFS_CELL_ACTIVE: if (atomic_read(&cell->usage) > 1) { - time64_t now = ktime_get_real_seconds(); - if (cell->dns_expiry <= now && net->live) - afs_update_cell(cell); + if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) { + ret = afs_update_cell(cell); + if (ret < 0) + cell->error = ret; + } goto done; } - cell->state = AFS_CELL_DEACTIVATING; + smp_store_release(&cell->state, AFS_CELL_DEACTIVATING); + wake_up_var(&cell->state); goto again; case AFS_CELL_DEACTIVATING: - set_bit(AFS_CELL_FL_NOT_READY, &cell->flags); if (atomic_read(&cell->usage) > 1) goto reverse_deactivation; afs_deactivate_cell(net, cell); - cell->state = AFS_CELL_INACTIVE; + smp_store_release(&cell->state, AFS_CELL_INACTIVE); + wake_up_var(&cell->state); goto again; default: @@ -669,17 +709,13 @@ activation_failed: cell->error = ret; afs_deactivate_cell(net, cell); - cell->state = AFS_CELL_FAILED; - smp_wmb(); - if (test_and_clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags)) - wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); + smp_store_release(&cell->state, AFS_CELL_FAILED); /* vs error */ + wake_up_var(&cell->state); goto again; reverse_deactivation: - cell->state = AFS_CELL_ACTIVE; - smp_wmb(); - clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags); - wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); + smp_store_release(&cell->state, AFS_CELL_ACTIVE); + wake_up_var(&cell->state); _leave(" [deact->act]"); return; @@ -739,11 +775,16 @@ void afs_manage_cells(struct work_struct *work) } if (usage == 1) { + struct afs_vlserver_list *vllist; time64_t expire_at = cell->last_inactive; - if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) && - !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags)) + read_lock(&cell->vl_servers_lock); + vllist = rcu_dereference_protected( + cell->vl_servers, + lockdep_is_held(&cell->vl_servers_lock)); + if (vllist->nr_servers > 0) expire_at += afs_cell_gc_delay; + read_unlock(&cell->vl_servers_lock); if (purging || expire_at <= now) sched_cell = true; else if (expire_at < next_manage) @@ -751,10 +792,8 @@ void afs_manage_cells(struct work_struct *work) } if (!purging) { - if (cell->dns_expiry <= now) + if (test_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) sched_cell = true; - else if (cell->dns_expiry <= next_manage) - next_manage = cell->dns_expiry; } if (sched_cell) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 748090014519..01437cfe5432 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -213,7 +213,7 @@ static int afs_find_cm_server_by_peer(struct afs_call *call) return 0; } - call->cm_server = server; + call->server = server; return afs_record_cm_probe(call, server); } @@ -234,7 +234,7 @@ static int afs_find_cm_server_by_uuid(struct afs_call *call, return 0; } - call->cm_server = server; + call->server = server; return afs_record_cm_probe(call, server); } @@ -260,8 +260,8 @@ static void SRXAFSCB_CallBack(struct work_struct *work) * server holds up change visibility till it receives our reply so as * to maintain cache coherency. */ - if (call->cm_server) - afs_break_callbacks(call->cm_server, call->count, call->request); + if (call->server) + afs_break_callbacks(call->server, call->count, call->request); afs_send_empty_reply(call); afs_put_call(call); @@ -376,10 +376,10 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) { struct afs_call *call = container_of(work, struct afs_call, work); - _enter("{%p}", call->cm_server); + _enter("{%p}", call->server); - if (call->cm_server) - afs_init_callback_state(call->cm_server); + if (call->server) + afs_init_callback_state(call->server); afs_send_empty_reply(call); afs_put_call(call); _leave(""); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 9a466be583d2..79d93a26759a 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -18,6 +18,7 @@ #include <linux/sched.h> #include <linux/task_io_accounting_ops.h> #include "internal.h" +#include "afs_fs.h" #include "xdr_fs.h" static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, @@ -102,8 +103,8 @@ struct afs_lookup_cookie { bool found; bool one_only; unsigned short nr_fids; - struct afs_file_status *statuses; - struct afs_callback *callbacks; + struct inode **inodes; + struct afs_status_cb *statuses; struct afs_fid fids[50]; }; @@ -638,12 +639,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, struct key *key) { struct afs_lookup_cookie *cookie; - struct afs_cb_interest *cbi = NULL; + struct afs_cb_interest *dcbi, *cbi = NULL; struct afs_super_info *as = dir->i_sb->s_fs_info; - struct afs_iget_data data; + struct afs_status_cb *scb; + struct afs_iget_data iget_data; struct afs_fs_cursor fc; - struct afs_vnode *dvnode = AFS_FS_I(dir); - struct inode *inode = NULL; + struct afs_server *server; + struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; + struct inode *inode = NULL, *ti; int ret, i; _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); @@ -657,10 +660,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */ read_seqlock_excl(&dvnode->cb_lock); - if (dvnode->cb_interest && - dvnode->cb_interest->server && - test_bit(AFS_SERVER_FL_NO_IBULK, &dvnode->cb_interest->server->flags)) - cookie->one_only = true; + dcbi = rcu_dereference_protected(dvnode->cb_interest, + lockdep_is_held(&dvnode->cb_lock.lock)); + if (dcbi) { + server = dcbi->server; + if (server && + test_bit(AFS_SERVER_FL_NO_IBULK, &server->flags)) + cookie->one_only = true; + } read_sequnlock_excl(&dvnode->cb_lock); for (i = 0; i < 50; i++) @@ -678,24 +685,43 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, goto out; /* Check to see if we already have an inode for the primary fid. */ - data.volume = dvnode->volume; - data.fid = cookie->fids[0]; - inode = ilookup5(dir->i_sb, cookie->fids[0].vnode, afs_iget5_test, &data); + iget_data.fid = cookie->fids[0]; + iget_data.volume = dvnode->volume; + iget_data.cb_v_break = dvnode->volume->cb_v_break; + iget_data.cb_s_break = 0; + inode = ilookup5(dir->i_sb, cookie->fids[0].vnode, + afs_iget5_test, &iget_data); if (inode) goto out; /* Need space for examining all the selected files */ inode = ERR_PTR(-ENOMEM); - cookie->statuses = kcalloc(cookie->nr_fids, sizeof(struct afs_file_status), - GFP_KERNEL); + cookie->statuses = kvcalloc(cookie->nr_fids, sizeof(struct afs_status_cb), + GFP_KERNEL); if (!cookie->statuses) goto out; - cookie->callbacks = kcalloc(cookie->nr_fids, sizeof(struct afs_callback), - GFP_KERNEL); - if (!cookie->callbacks) + cookie->inodes = kcalloc(cookie->nr_fids, sizeof(struct inode *), + GFP_KERNEL); + if (!cookie->inodes) goto out_s; + for (i = 1; i < cookie->nr_fids; i++) { + scb = &cookie->statuses[i]; + + /* Find any inodes that already exist and get their + * callback counters. + */ + iget_data.fid = cookie->fids[i]; + ti = ilookup5_nowait(dir->i_sb, iget_data.fid.vnode, + afs_iget5_test, &iget_data); + if (!IS_ERR_OR_NULL(ti)) { + vnode = AFS_FS_I(ti); + scb->cb_break = afs_calc_vnode_cb_break(vnode); + cookie->inodes[i] = ti; + } + } + /* Try FS.InlineBulkStatus first. Abort codes for the individual * lookups contained therein are stored in the reply without aborting * the whole operation. @@ -704,7 +730,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, goto no_inline_bulk_status; inode = ERR_PTR(-ERESTARTSYS); - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { while (afs_select_fileserver(&fc)) { if (test_bit(AFS_SERVER_FL_NO_IBULK, &fc.cbi->server->flags)) { @@ -712,11 +738,12 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, fc.ac.error = -ECONNABORTED; break; } + iget_data.cb_v_break = dvnode->volume->cb_v_break; + iget_data.cb_s_break = fc.cbi->server->cb_s_break; afs_fs_inline_bulk_status(&fc, afs_v2net(dvnode), cookie->fids, cookie->statuses, - cookie->callbacks, cookie->nr_fids, NULL); } @@ -737,15 +764,16 @@ no_inline_bulk_status: * any of the lookups fails - so, for the moment, revert to * FS.FetchStatus for just the primary fid. */ - cookie->nr_fids = 1; inode = ERR_PTR(-ERESTARTSYS); - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { while (afs_select_fileserver(&fc)) { + iget_data.cb_v_break = dvnode->volume->cb_v_break; + iget_data.cb_s_break = fc.cbi->server->cb_s_break; + scb = &cookie->statuses[0]; afs_fs_fetch_status(&fc, afs_v2net(dvnode), cookie->fids, - cookie->statuses, - cookie->callbacks, + scb, NULL); } @@ -757,26 +785,36 @@ no_inline_bulk_status: if (IS_ERR(inode)) goto out_c; - for (i = 0; i < cookie->nr_fids; i++) - cookie->statuses[i].abort_code = 0; - success: /* Turn all the files into inodes and save the first one - which is the * one we actually want. */ - if (cookie->statuses[0].abort_code != 0) - inode = ERR_PTR(afs_abort_to_error(cookie->statuses[0].abort_code)); + scb = &cookie->statuses[0]; + if (scb->status.abort_code != 0) + inode = ERR_PTR(afs_abort_to_error(scb->status.abort_code)); for (i = 0; i < cookie->nr_fids; i++) { - struct inode *ti; + struct afs_status_cb *scb = &cookie->statuses[i]; + + if (!scb->have_status && !scb->have_error) + continue; + + if (cookie->inodes[i]) { + afs_vnode_commit_status(&fc, AFS_FS_I(cookie->inodes[i]), + scb->cb_break, NULL, scb); + continue; + } - if (cookie->statuses[i].abort_code != 0) + if (scb->status.abort_code != 0) continue; - ti = afs_iget(dir->i_sb, key, &cookie->fids[i], - &cookie->statuses[i], - &cookie->callbacks[i], - cbi, dvnode); + iget_data.fid = cookie->fids[i]; + ti = afs_iget(dir->i_sb, key, &iget_data, scb, cbi, dvnode); + if (!IS_ERR(ti)) + afs_cache_permit(AFS_FS_I(ti), key, + 0 /* Assume vnode->cb_break is 0 */ + + iget_data.cb_v_break, + scb); if (i == 0) { inode = ti; } else { @@ -787,9 +825,13 @@ success: out_c: afs_put_cb_interest(afs_v2net(dvnode), cbi); - kfree(cookie->callbacks); + if (cookie->inodes) { + for (i = 0; i < cookie->nr_fids; i++) + iput(cookie->inodes[i]); + kfree(cookie->inodes); + } out_s: - kfree(cookie->statuses); + kvfree(cookie->statuses); out: kfree(cookie); return inode; @@ -1114,9 +1156,8 @@ void afs_d_release(struct dentry *dentry) */ static void afs_vnode_new_inode(struct afs_fs_cursor *fc, struct dentry *new_dentry, - struct afs_fid *newfid, - struct afs_file_status *newstatus, - struct afs_callback *newcb) + struct afs_iget_data *new_data, + struct afs_status_cb *new_scb) { struct afs_vnode *vnode; struct inode *inode; @@ -1125,7 +1166,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc, return; inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key, - newfid, newstatus, newcb, fc->cbi, fc->vnode); + new_data, new_scb, fc->cbi, fc->vnode); if (IS_ERR(inode)) { /* ENOMEM or EINTR at a really inconvenient time - just abandon * the new directory on the server. @@ -1136,22 +1177,29 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc, vnode = AFS_FS_I(inode); set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); - afs_vnode_commit_status(fc, vnode, 0); + if (fc->ac.error == 0) + afs_cache_permit(vnode, fc->key, vnode->cb_break, new_scb); d_instantiate(new_dentry, inode); } +static void afs_prep_for_new_inode(struct afs_fs_cursor *fc, + struct afs_iget_data *iget_data) +{ + iget_data->volume = fc->vnode->volume; + iget_data->cb_v_break = fc->vnode->volume->cb_v_break; + iget_data->cb_s_break = fc->cbi->server->cb_s_break; +} + /* * create a directory on an AFS filesystem */ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { - struct afs_file_status newstatus; + struct afs_iget_data iget_data; + struct afs_status_cb *scb; struct afs_fs_cursor fc; - struct afs_callback newcb; struct afs_vnode *dvnode = AFS_FS_I(dir); - struct afs_fid newfid; struct key *key; - u64 data_version = dvnode->status.data_version; int ret; mode |= S_IFDIR; @@ -1159,23 +1207,32 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) _enter("{%llx:%llu},{%pd},%ho", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); + ret = -ENOMEM; + scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + goto error; + key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); - goto error; + goto error_scb; } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { + afs_dataversion_t data_version = dvnode->status.data_version + 1; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); - afs_fs_create(&fc, dentry->d_name.name, mode, data_version, - &newfid, &newstatus, &newcb); + afs_prep_for_new_inode(&fc, &iget_data); + afs_fs_create(&fc, dentry->d_name.name, mode, + &scb[0], &iget_data.fid, &scb[1]); } - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, dvnode, fc.cb_break); - afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb); + afs_check_for_remote_deletion(&fc, dvnode); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break, + &data_version, &scb[0]); + afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); ret = afs_end_vnode_operation(&fc); if (ret < 0) goto error_key; @@ -1185,15 +1242,18 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (ret == 0 && test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_add(dvnode, &dentry->d_name, &newfid, + afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, afs_edit_dir_for_create); key_put(key); + kfree(scb); _leave(" = 0"); return 0; error_key: key_put(key); +error_scb: + kfree(scb); error: d_drop(dentry); _leave(" = %d", ret); @@ -1220,15 +1280,19 @@ static void afs_dir_remove_subdir(struct dentry *dentry) */ static int afs_rmdir(struct inode *dir, struct dentry *dentry) { + struct afs_status_cb *scb; struct afs_fs_cursor fc; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; struct key *key; - u64 data_version = dvnode->status.data_version; int ret; _enter("{%llx:%llu},{%pd}", dvnode->fid.vid, dvnode->fid.vnode, dentry); + scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + return -ENOMEM; + key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); @@ -1250,14 +1314,16 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { + afs_dataversion_t data_version = dvnode->status.data_version + 1; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); - afs_fs_remove(&fc, vnode, dentry->d_name.name, true, - data_version); + afs_fs_remove(&fc, vnode, dentry->d_name.name, true, scb); } - afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break, + &data_version, scb); ret = afs_end_vnode_operation(&fc); if (ret == 0) { afs_dir_remove_subdir(dentry); @@ -1272,6 +1338,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) error_key: key_put(key); error: + kfree(scb); return ret; } @@ -1285,32 +1352,27 @@ error: * However, if we didn't have a callback promise outstanding, or it was * outstanding on a different server, then it won't break it either... */ -int afs_dir_remove_link(struct dentry *dentry, struct key *key, - unsigned long d_version_before, - unsigned long d_version_after) +static int afs_dir_remove_link(struct afs_vnode *dvnode, struct dentry *dentry, + struct key *key) { - bool dir_valid; int ret = 0; - /* There were no intervening changes on the server if the version - * number we got back was incremented by exactly 1. - */ - dir_valid = (d_version_after == d_version_before + 1); - if (d_really_is_positive(dentry)) { struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { /* Already done */ - } else if (dir_valid) { + } else if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { + write_seqlock(&vnode->cb_lock); drop_nlink(&vnode->vfs_inode); if (vnode->vfs_inode.i_nlink == 0) { set_bit(AFS_VNODE_DELETED, &vnode->flags); - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + __afs_break_callback(vnode); } + write_sequnlock(&vnode->cb_lock); ret = 0; } else { - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + afs_break_callback(vnode); if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) kdebug("AFS_VNODE_DELETED"); @@ -1331,11 +1393,10 @@ int afs_dir_remove_link(struct dentry *dentry, struct key *key, static int afs_unlink(struct inode *dir, struct dentry *dentry) { struct afs_fs_cursor fc; + struct afs_status_cb *scb; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; struct key *key; - unsigned long d_version = (unsigned long)dentry->d_fsdata; bool need_rehash = false; - u64 data_version = dvnode->status.data_version; int ret; _enter("{%llx:%llu},{%pd}", @@ -1344,10 +1405,15 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) if (dentry->d_name.len >= AFSNAMEMAX) return -ENAMETOOLONG; + ret = -ENOMEM; + scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + goto error; + key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); - goto error; + goto error_scb; } /* Try to make sure we have a callback promise on the victim. */ @@ -1374,30 +1440,34 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) spin_unlock(&dentry->d_lock); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { + afs_dataversion_t data_version = dvnode->status.data_version + 1; + afs_dataversion_t data_version_2 = vnode->status.data_version; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); + fc.cb_break_2 = afs_calc_vnode_cb_break(vnode); if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) && !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) { yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name, - data_version); + &scb[0], &scb[1]); if (fc.ac.error != -ECONNABORTED || fc.ac.abort_code != RXGEN_OPCODE) continue; set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags); } - afs_fs_remove(&fc, vnode, dentry->d_name.name, false, - data_version); + afs_fs_remove(&fc, vnode, dentry->d_name.name, false, &scb[0]); } - afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break, + &data_version, &scb[0]); + afs_vnode_commit_status(&fc, vnode, fc.cb_break_2, + &data_version_2, &scb[1]); ret = afs_end_vnode_operation(&fc); - if (ret == 0) - ret = afs_dir_remove_link( - dentry, key, d_version, - (unsigned long)dvnode->status.data_version); + if (ret == 0 && !(scb[1].have_status || scb[1].have_error)) + ret = afs_dir_remove_link(dvnode, dentry, key); if (ret == 0 && test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) afs_edit_dir_remove(dvnode, &dentry->d_name, @@ -1409,6 +1479,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) error_key: key_put(key); +error_scb: + kfree(scb); error: _leave(" = %d", ret); return ret; @@ -1420,13 +1492,11 @@ error: static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { + struct afs_iget_data iget_data; struct afs_fs_cursor fc; - struct afs_file_status newstatus; - struct afs_callback newcb; + struct afs_status_cb *scb; struct afs_vnode *dvnode = AFS_FS_I(dir); - struct afs_fid newfid; struct key *key; - u64 data_version = dvnode->status.data_version; int ret; mode |= S_IFREG; @@ -1444,17 +1514,26 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, goto error; } + ret = -ENOMEM; + scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + goto error_scb; + ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { + afs_dataversion_t data_version = dvnode->status.data_version + 1; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); - afs_fs_create(&fc, dentry->d_name.name, mode, data_version, - &newfid, &newstatus, &newcb); + afs_prep_for_new_inode(&fc, &iget_data); + afs_fs_create(&fc, dentry->d_name.name, mode, + &scb[0], &iget_data.fid, &scb[1]); } - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, dvnode, fc.cb_break); - afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb); + afs_check_for_remote_deletion(&fc, dvnode); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break, + &data_version, &scb[0]); + afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); ret = afs_end_vnode_operation(&fc); if (ret < 0) goto error_key; @@ -1463,13 +1542,16 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, } if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_add(dvnode, &dentry->d_name, &newfid, + afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, afs_edit_dir_for_create); + kfree(scb); key_put(key); _leave(" = 0"); return 0; +error_scb: + kfree(scb); error_key: key_put(key); error: @@ -1485,15 +1567,12 @@ static int afs_link(struct dentry *from, struct inode *dir, struct dentry *dentry) { struct afs_fs_cursor fc; - struct afs_vnode *dvnode, *vnode; + struct afs_status_cb *scb; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_vnode *vnode = AFS_FS_I(d_inode(from)); struct key *key; - u64 data_version; int ret; - vnode = AFS_FS_I(d_inode(from)); - dvnode = AFS_FS_I(dir); - data_version = dvnode->status.data_version; - _enter("{%llx:%llu},{%llx:%llu},{%pd}", vnode->fid.vid, vnode->fid.vnode, dvnode->fid.vid, dvnode->fid.vnode, @@ -1503,14 +1582,21 @@ static int afs_link(struct dentry *from, struct inode *dir, if (dentry->d_name.len >= AFSNAMEMAX) goto error; + ret = -ENOMEM; + scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + goto error; + key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); - goto error; + goto error_scb; } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { + afs_dataversion_t data_version = dvnode->status.data_version + 1; + if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) { afs_end_vnode_operation(&fc); goto error_key; @@ -1519,11 +1605,14 @@ static int afs_link(struct dentry *from, struct inode *dir, while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); fc.cb_break_2 = afs_calc_vnode_cb_break(vnode); - afs_fs_link(&fc, vnode, dentry->d_name.name, data_version); + afs_fs_link(&fc, vnode, dentry->d_name.name, + &scb[0], &scb[1]); } - afs_vnode_commit_status(&fc, dvnode, fc.cb_break); - afs_vnode_commit_status(&fc, vnode, fc.cb_break_2); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break, + &data_version, &scb[0]); + afs_vnode_commit_status(&fc, vnode, fc.cb_break_2, + NULL, &scb[1]); ihold(&vnode->vfs_inode); d_instantiate(dentry, &vnode->vfs_inode); @@ -1540,11 +1629,14 @@ static int afs_link(struct dentry *from, struct inode *dir, afs_edit_dir_for_link); key_put(key); + kfree(scb); _leave(" = 0"); return 0; error_key: key_put(key); +error_scb: + kfree(scb); error: d_drop(dentry); _leave(" = %d", ret); @@ -1557,12 +1649,11 @@ error: static int afs_symlink(struct inode *dir, struct dentry *dentry, const char *content) { + struct afs_iget_data iget_data; struct afs_fs_cursor fc; - struct afs_file_status newstatus; + struct afs_status_cb *scb; struct afs_vnode *dvnode = AFS_FS_I(dir); - struct afs_fid newfid; struct key *key; - u64 data_version = dvnode->status.data_version; int ret; _enter("{%llx:%llu},{%pd},%s", @@ -1577,24 +1668,32 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, if (strlen(content) >= AFSPATHMAX) goto error; + ret = -ENOMEM; + scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + goto error; + key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); - goto error; + goto error_scb; } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { + afs_dataversion_t data_version = dvnode->status.data_version + 1; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); - afs_fs_symlink(&fc, dentry->d_name.name, - content, data_version, - &newfid, &newstatus); + afs_prep_for_new_inode(&fc, &iget_data); + afs_fs_symlink(&fc, dentry->d_name.name, content, + &scb[0], &iget_data.fid, &scb[1]); } - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, dvnode, fc.cb_break); - afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, NULL); + afs_check_for_remote_deletion(&fc, dvnode); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break, + &data_version, &scb[0]); + afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); ret = afs_end_vnode_operation(&fc); if (ret < 0) goto error_key; @@ -1603,15 +1702,18 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, } if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_add(dvnode, &dentry->d_name, &newfid, + afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, afs_edit_dir_for_symlink); key_put(key); + kfree(scb); _leave(" = 0"); return 0; error_key: key_put(key); +error_scb: + kfree(scb); error: d_drop(dentry); _leave(" = %d", ret); @@ -1626,11 +1728,11 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, unsigned int flags) { struct afs_fs_cursor fc; + struct afs_status_cb *scb; struct afs_vnode *orig_dvnode, *new_dvnode, *vnode; struct dentry *tmp = NULL, *rehash = NULL; struct inode *new_inode; struct key *key; - u64 orig_data_version, new_data_version; bool new_negative = d_is_negative(new_dentry); int ret; @@ -1644,8 +1746,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, vnode = AFS_FS_I(d_inode(old_dentry)); orig_dvnode = AFS_FS_I(old_dir); new_dvnode = AFS_FS_I(new_dir); - orig_data_version = orig_dvnode->status.data_version; - new_data_version = new_dvnode->status.data_version; _enter("{%llx:%llu},{%llx:%llu},{%llx:%llu},{%pd}", orig_dvnode->fid.vid, orig_dvnode->fid.vnode, @@ -1653,10 +1753,15 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dvnode->fid.vid, new_dvnode->fid.vnode, new_dentry); + ret = -ENOMEM; + scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + goto error; + key = afs_request_key(orig_dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); - goto error; + goto error_scb; } /* For non-directories, check whether the target is busy and if so, @@ -1690,31 +1795,43 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dentry = tmp; rehash = NULL; new_negative = true; - orig_data_version = orig_dvnode->status.data_version; - new_data_version = new_dvnode->status.data_version; } } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) { + if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) { + afs_dataversion_t orig_data_version; + afs_dataversion_t new_data_version; + struct afs_status_cb *new_scb = &scb[1]; + + orig_data_version = orig_dvnode->status.data_version + 1; + if (orig_dvnode != new_dvnode) { if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) { afs_end_vnode_operation(&fc); goto error_rehash; } + new_data_version = new_dvnode->status.data_version; + } else { + new_data_version = orig_data_version; + new_scb = &scb[0]; } + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(orig_dvnode); fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode); afs_fs_rename(&fc, old_dentry->d_name.name, new_dvnode, new_dentry->d_name.name, - orig_data_version, new_data_version); + &scb[0], new_scb); } - afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break); - afs_vnode_commit_status(&fc, new_dvnode, fc.cb_break_2); - if (orig_dvnode != new_dvnode) + afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break, + &orig_data_version, &scb[0]); + if (new_dvnode != orig_dvnode) { + afs_vnode_commit_status(&fc, new_dvnode, fc.cb_break_2, + &new_data_version, &scb[1]); mutex_unlock(&new_dvnode->io_lock); + } ret = afs_end_vnode_operation(&fc); if (ret < 0) goto error_rehash; @@ -1754,6 +1871,8 @@ error_tmp: if (tmp) dput(tmp); key_put(key); +error_scb: + kfree(scb); error: _leave(" = %d", ret); return ret; diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c index f6f89fdab6b2..28f4aa015229 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c @@ -24,21 +24,28 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode struct key *key) { struct afs_fs_cursor fc; - u64 dir_data_version = dvnode->status.data_version; + struct afs_status_cb *scb; int ret = -ERESTARTSYS; _enter("%pd,%pd", old, new); + scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + return -ENOMEM; + trace_afs_silly_rename(vnode, false); - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { + afs_dataversion_t dir_data_version = dvnode->status.data_version + 1; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_rename(&fc, old->d_name.name, dvnode, new->d_name.name, - dir_data_version, dir_data_version); + scb, scb); } - afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break, + &dir_data_version, scb); ret = afs_end_vnode_operation(&fc); } @@ -64,6 +71,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode fsnotify_nameremove(old, 0); } + kfree(scb); _leave(" = %d", ret); return ret; } @@ -143,31 +151,37 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode struct dentry *dentry, struct key *key) { struct afs_fs_cursor fc; - u64 dir_data_version = dvnode->status.data_version; + struct afs_status_cb *scb; int ret = -ERESTARTSYS; _enter(""); + scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + return -ENOMEM; + trace_afs_silly_rename(vnode, true); - if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (afs_begin_vnode_operation(&fc, dvnode, key, false)) { + afs_dataversion_t dir_data_version = dvnode->status.data_version + 1; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) && !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) { yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name, - dir_data_version); + &scb[0], &scb[1]); if (fc.ac.error != -ECONNABORTED || fc.ac.abort_code != RXGEN_OPCODE) continue; set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags); } - afs_fs_remove(&fc, vnode, dentry->d_name.name, false, - dir_data_version); + afs_fs_remove(&fc, vnode, dentry->d_name.name, false, &scb[0]); } - afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break, + &dir_data_version, &scb[0]); ret = afs_end_vnode_operation(&fc); if (ret == 0) { drop_nlink(&vnode->vfs_inode); @@ -182,6 +196,7 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode afs_edit_dir_for_unlink); } + kfree(scb); _leave(" = %d", ret); return ret; } diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index a9ba81ddf154..af1689d1f32e 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry) return 0; } - ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL); + ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL, false); if (ret == -ENODATA) ret = -EDESTADDRREQ; return ret; @@ -261,8 +261,7 @@ int afs_dynroot_populate(struct super_block *sb) struct afs_net *net = afs_sb2net(sb); int ret; - if (mutex_lock_interruptible(&net->proc_cells_lock) < 0) - return -ERESTARTSYS; + mutex_lock(&net->proc_cells_lock); net->dynroot_sb = sb; hlist_for_each_entry(cell, &net->proc_cells, proc_link) { diff --git a/fs/afs/file.c b/fs/afs/file.c index e8d6619890a9..11e69c5fb7ab 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -170,11 +170,12 @@ int afs_release(struct inode *inode, struct file *file) { struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_file *af = file->private_data; + int ret = 0; _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode); if ((file->f_mode & FMODE_WRITE)) - return vfs_fsync(file, 0); + ret = vfs_fsync(file, 0); file->private_data = NULL; if (af->wb) @@ -182,8 +183,8 @@ int afs_release(struct inode *inode, struct file *file) key_put(af->key); kfree(af); afs_prune_wb_keys(vnode); - _leave(" = 0"); - return 0; + _leave(" = %d", ret); + return ret; } /* @@ -227,6 +228,7 @@ static void afs_file_readpage_read_complete(struct page *page, int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *desc) { struct afs_fs_cursor fc; + struct afs_status_cb *scb; int ret; _enter("%s{%llx:%llu.%u},%x,,,", @@ -236,15 +238,22 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de vnode->fid.unique, key_serial(key)); + scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + return -ENOMEM; + ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { + afs_dataversion_t data_version = vnode->status.data_version; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_fetch_data(&fc, desc); + afs_fs_fetch_data(&fc, scb, desc); } - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break); + afs_check_for_remote_deletion(&fc, vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break, + &data_version, scb); ret = afs_end_vnode_operation(&fc); } @@ -254,6 +263,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de &afs_v2net(vnode)->n_fetch_bytes); } + kfree(scb); _leave(" = %d", ret); return ret; } @@ -404,10 +414,10 @@ static int afs_readpage(struct file *file, struct page *page) /* * Make pages available as they're filled. */ -static void afs_readpages_page_done(struct afs_call *call, struct afs_read *req) +static void afs_readpages_page_done(struct afs_read *req) { #ifdef CONFIG_AFS_FSCACHE - struct afs_vnode *vnode = call->reply[0]; + struct afs_vnode *vnode = req->vnode; #endif struct page *page = req->pages[req->index]; @@ -461,6 +471,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, return -ENOMEM; refcount_set(&req->usage, 1); + req->vnode = vnode; req->page_done = afs_readpages_page_done; req->pos = first->index; req->pos <<= PAGE_SHIFT; diff --git a/fs/afs/flock.c b/fs/afs/flock.c index adc88eff7849..ed3ac03682d7 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -41,9 +41,6 @@ void afs_lock_may_be_available(struct afs_vnode *vnode) { _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); - if (vnode->lock_state != AFS_VNODE_LOCK_WAITING_FOR_CB) - return; - spin_lock(&vnode->lock); if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) afs_next_locker(vnode, 0); @@ -77,7 +74,7 @@ static void afs_schedule_lock_extension(struct afs_vnode *vnode) */ void afs_lock_op_done(struct afs_call *call) { - struct afs_vnode *vnode = call->reply[0]; + struct afs_vnode *vnode = call->lvnode; if (call->error == 0) { spin_lock(&vnode->lock); @@ -185,6 +182,7 @@ static void afs_kill_lockers_enoent(struct afs_vnode *vnode) static int afs_set_lock(struct afs_vnode *vnode, struct key *key, afs_lock_type_t type) { + struct afs_status_cb *scb; struct afs_fs_cursor fc; int ret; @@ -195,18 +193,23 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key, vnode->fid.unique, key_serial(key), type); + scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + return -ENOMEM; + ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_set_lock(&fc, type); + afs_fs_set_lock(&fc, type, scb); } - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break); + afs_check_for_remote_deletion(&fc, vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break, NULL, scb); ret = afs_end_vnode_operation(&fc); } + kfree(scb); _leave(" = %d", ret); return ret; } @@ -216,6 +219,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key, */ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) { + struct afs_status_cb *scb; struct afs_fs_cursor fc; int ret; @@ -226,18 +230,23 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) vnode->fid.unique, key_serial(key)); + scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + return -ENOMEM; + ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, false)) { while (afs_select_current_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_extend_lock(&fc); + afs_fs_extend_lock(&fc, scb); } - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break); + afs_check_for_remote_deletion(&fc, vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break, NULL, scb); ret = afs_end_vnode_operation(&fc); } + kfree(scb); _leave(" = %d", ret); return ret; } @@ -247,6 +256,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) */ static int afs_release_lock(struct afs_vnode *vnode, struct key *key) { + struct afs_status_cb *scb; struct afs_fs_cursor fc; int ret; @@ -257,18 +267,23 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key) vnode->fid.unique, key_serial(key)); + scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + return -ENOMEM; + ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, false)) { while (afs_select_current_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_release_lock(&fc); + afs_fs_release_lock(&fc, scb); } - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break); + afs_check_for_remote_deletion(&fc, vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break, NULL, scb); ret = afs_end_vnode_operation(&fc); } + kfree(scb); _leave(" = %d", ret); return ret; } @@ -736,7 +751,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl) posix_test_lock(file, fl); if (fl->fl_type == F_UNLCK) { /* no local locks; consult the server */ - ret = afs_fetch_status(vnode, key, false); + ret = afs_fetch_status(vnode, key, false, NULL); if (ret < 0) goto error; diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 5d3abde52a0f..9b7266209343 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -33,8 +33,8 @@ static bool afs_fs_probe_done(struct afs_server *server) void afs_fileserver_probe_result(struct afs_call *call) { struct afs_addr_list *alist = call->alist; - struct afs_server *server = call->reply[0]; - unsigned int server_index = (long)call->reply[1]; + struct afs_server *server = call->server; + unsigned int server_index = call->server_index; unsigned int index = call->addr_ix; unsigned int rtt = UINT_MAX; bool have_result = false; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 1296f5dc4c1e..48298408d6ac 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -60,78 +60,17 @@ static void xdr_dump_bad(const __be32 *bp) } /* - * Update the core inode struct from a returned status record. - */ -void afs_update_inode_from_status(struct afs_vnode *vnode, - struct afs_file_status *status, - const afs_dataversion_t *expected_version, - u8 flags) -{ - struct timespec64 t; - umode_t mode; - - t = status->mtime_client; - vnode->vfs_inode.i_ctime = t; - vnode->vfs_inode.i_mtime = t; - vnode->vfs_inode.i_atime = t; - - if (flags & (AFS_VNODE_META_CHANGED | AFS_VNODE_NOT_YET_SET)) { - vnode->vfs_inode.i_uid = make_kuid(&init_user_ns, status->owner); - vnode->vfs_inode.i_gid = make_kgid(&init_user_ns, status->group); - set_nlink(&vnode->vfs_inode, status->nlink); - - mode = vnode->vfs_inode.i_mode; - mode &= ~S_IALLUGO; - mode |= status->mode; - barrier(); - vnode->vfs_inode.i_mode = mode; - } - - if (!(flags & AFS_VNODE_NOT_YET_SET)) { - if (expected_version && - *expected_version != status->data_version) { - _debug("vnode modified %llx on {%llx:%llu} [exp %llx]", - (unsigned long long) status->data_version, - vnode->fid.vid, vnode->fid.vnode, - (unsigned long long) *expected_version); - vnode->invalid_before = status->data_version; - if (vnode->status.type == AFS_FTYPE_DIR) { - if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) - afs_stat_v(vnode, n_inval); - } else { - set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); - } - } else if (vnode->status.type == AFS_FTYPE_DIR) { - /* Expected directory change is handled elsewhere so - * that we can locally edit the directory and save on a - * download. - */ - if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) - flags &= ~AFS_VNODE_DATA_CHANGED; - } - } - - if (flags & (AFS_VNODE_DATA_CHANGED | AFS_VNODE_NOT_YET_SET)) { - inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); - i_size_write(&vnode->vfs_inode, status->size); - } -} - -/* * decode an AFSFetchStatus block */ -static int xdr_decode_AFSFetchStatus(struct afs_call *call, - const __be32 **_bp, - struct afs_file_status *status, - struct afs_vnode *vnode, - const afs_dataversion_t *expected_version, - struct afs_read *read_req) +static int xdr_decode_AFSFetchStatus(const __be32 **_bp, + struct afs_call *call, + struct afs_status_cb *scb) { const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp; + struct afs_file_status *status = &scb->status; bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus); u64 data_version, size; u32 type, abort_code; - u8 flags = 0; abort_code = ntohl(xdr->abort_code); @@ -144,6 +83,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, * case. */ status->abort_code = abort_code; + scb->have_error = true; return 0; } @@ -161,44 +101,25 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, case AFS_FTYPE_FILE: case AFS_FTYPE_DIR: case AFS_FTYPE_SYMLINK: - if (type != status->type && - vnode && - !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { - pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n", - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - status->type, type); - goto bad; - } status->type = type; break; default: goto bad; } -#define EXTRACT_M(FIELD) \ - do { \ - u32 x = ntohl(xdr->FIELD); \ - if (status->FIELD != x) { \ - flags |= AFS_VNODE_META_CHANGED; \ - status->FIELD = x; \ - } \ - } while (0) - - EXTRACT_M(nlink); - EXTRACT_M(author); - EXTRACT_M(owner); - EXTRACT_M(caller_access); /* call ticket dependent */ - EXTRACT_M(anon_access); - EXTRACT_M(mode); - EXTRACT_M(group); + status->nlink = ntohl(xdr->nlink); + status->author = ntohl(xdr->author); + status->owner = ntohl(xdr->owner); + status->caller_access = ntohl(xdr->caller_access); /* Ticket dependent */ + status->anon_access = ntohl(xdr->anon_access); + status->mode = ntohl(xdr->mode) & S_IALLUGO; + status->group = ntohl(xdr->group); + status->lock_count = ntohl(xdr->lock_count); status->mtime_client.tv_sec = ntohl(xdr->mtime_client); status->mtime_client.tv_nsec = 0; status->mtime_server.tv_sec = ntohl(xdr->mtime_server); status->mtime_server.tv_nsec = 0; - status->lock_count = ntohl(xdr->lock_count); size = (u64)ntohl(xdr->size_lo); size |= (u64)ntohl(xdr->size_hi) << 32; @@ -206,25 +127,10 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, data_version = (u64)ntohl(xdr->data_version_lo); data_version |= (u64)ntohl(xdr->data_version_hi) << 32; - if (data_version != status->data_version) { - status->data_version = data_version; - flags |= AFS_VNODE_DATA_CHANGED; - } - - if (read_req) { - read_req->data_version = data_version; - read_req->file_size = size; - } + status->data_version = data_version; + scb->have_status = true; *_bp = (const void *)*_bp + sizeof(*xdr); - - if (vnode) { - if (test_bit(AFS_VNODE_UNSET, &vnode->flags)) - flags |= AFS_VNODE_NOT_YET_SET; - afs_update_inode_from_status(vnode, status, expected_version, - flags); - } - return 0; bad: @@ -232,77 +138,22 @@ bad: return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); } -/* - * Decode the file status. We need to lock the target vnode if we're going to - * update its status so that stat() sees the attributes update atomically. - */ -static int afs_decode_status(struct afs_call *call, - const __be32 **_bp, - struct afs_file_status *status, - struct afs_vnode *vnode, - const afs_dataversion_t *expected_version, - struct afs_read *read_req) +static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry) { - int ret; - - if (!vnode) - return xdr_decode_AFSFetchStatus(call, _bp, status, vnode, - expected_version, read_req); - - write_seqlock(&vnode->cb_lock); - ret = xdr_decode_AFSFetchStatus(call, _bp, status, vnode, - expected_version, read_req); - write_sequnlock(&vnode->cb_lock); - return ret; + return ktime_divns(call->reply_time, NSEC_PER_SEC) + expiry; } -/* - * decode an AFSCallBack block - */ -static void xdr_decode_AFSCallBack(struct afs_call *call, - struct afs_vnode *vnode, - const __be32 **_bp) +static void xdr_decode_AFSCallBack(const __be32 **_bp, + struct afs_call *call, + struct afs_status_cb *scb) { - struct afs_cb_interest *old, *cbi = call->cbi; + struct afs_callback *cb = &scb->callback; const __be32 *bp = *_bp; - u32 cb_expiry; - - write_seqlock(&vnode->cb_lock); - - if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) { - vnode->cb_version = ntohl(*bp++); - cb_expiry = ntohl(*bp++); - vnode->cb_type = ntohl(*bp++); - vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds(); - old = vnode->cb_interest; - if (old != call->cbi) { - vnode->cb_interest = cbi; - cbi = old; - } - set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - } else { - bp += 3; - } - write_sequnlock(&vnode->cb_lock); - call->cbi = cbi; - *_bp = bp; -} - -static ktime_t xdr_decode_expiry(struct afs_call *call, u32 expiry) -{ - return ktime_add_ns(call->reply_time, expiry * NSEC_PER_SEC); -} - -static void xdr_decode_AFSCallBack_raw(struct afs_call *call, - const __be32 **_bp, - struct afs_callback *cb) -{ - const __be32 *bp = *_bp; - - cb->version = ntohl(*bp++); + bp++; /* version */ cb->expires_at = xdr_decode_expiry(call, ntohl(*bp++)); - cb->type = ntohl(*bp++); + bp++; /* type */ + scb->have_cb = true; *_bp = bp; } @@ -395,7 +246,6 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, */ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) { - struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -403,16 +253,13 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) if (ret < 0) return ret; - _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); - /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - xdr_decode_AFSCallBack(call, vnode, &bp); - xdr_decode_AFSVolSync(&bp, call->reply[1]); + xdr_decode_AFSCallBack(&bp, call, call->out_scb); + xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -431,8 +278,8 @@ static const struct afs_call_type afs_RXFSFetchStatus_vnode = { /* * fetch the status information for a file */ -int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync, - bool new_inode) +int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_status_cb *scb, + struct afs_volsync *volsync) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -440,7 +287,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy __be32 *bp; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_fetch_file_status(fc, volsync, new_inode); + return yfs_fs_fetch_file_status(fc, scb, volsync); _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); @@ -453,10 +300,8 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy } call->key = fc->key; - call->reply[0] = vnode; - call->reply[1] = volsync; - call->expected_version = new_inode ? 1 : vnode->status.data_version; - call->want_reply_time = true; + call->out_scb = scb; + call->out_volsync = volsync; /* marshall the parameters */ bp = call->request; @@ -465,10 +310,10 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy bp[2] = htonl(vnode->fid.vnode); bp[3] = htonl(vnode->fid.unique); - call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -478,8 +323,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy */ static int afs_deliver_fs_fetch_data(struct afs_call *call) { - struct afs_vnode *vnode = call->reply[0]; - struct afs_read *req = call->reply[2]; + struct afs_read *req = call->read_request; const __be32 *bp; unsigned int size; int ret; @@ -541,7 +385,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) if (req->offset == PAGE_SIZE) { req->offset = 0; if (req->page_done) - req->page_done(call, req); + req->page_done(req); req->index++; if (req->remain > 0) goto begin_page; @@ -575,12 +419,14 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) return ret; bp = call->buffer; - ret = afs_decode_status(call, &bp, &vnode->status, vnode, - &vnode->status.data_version, req); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - xdr_decode_AFSCallBack(call, vnode, &bp); - xdr_decode_AFSVolSync(&bp, call->reply[1]); + xdr_decode_AFSCallBack(&bp, call, call->out_scb); + xdr_decode_AFSVolSync(&bp, call->out_volsync); + + req->data_version = call->out_scb->status.data_version; + req->file_size = call->out_scb->status.size; call->unmarshall++; @@ -593,7 +439,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) zero_user_segment(req->pages[req->index], req->offset, PAGE_SIZE); if (req->page_done) - req->page_done(call, req); + req->page_done(req); req->offset = 0; } @@ -603,7 +449,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) static void afs_fetch_data_destructor(struct afs_call *call) { - struct afs_read *req = call->reply[2]; + struct afs_read *req = call->read_request; afs_put_read(req); afs_flat_call_destructor(call); @@ -629,7 +475,9 @@ static const struct afs_call_type afs_RXFSFetchData64 = { /* * fetch data from a very large file */ -static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) +static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, + struct afs_status_cb *scb, + struct afs_read *req) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -643,11 +491,9 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->reply[1] = NULL; /* volsync */ - call->reply[2] = req; - call->expected_version = vnode->status.data_version; - call->want_reply_time = true; + call->out_scb = scb; + call->out_volsync = NULL; + call->read_request = req; /* marshall the parameters */ bp = call->request; @@ -661,9 +507,9 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) bp[7] = htonl(lower_32_bits(req->len)); refcount_inc(&req->usage); - call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -671,7 +517,9 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) /* * fetch data from a file */ -int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) +int afs_fs_fetch_data(struct afs_fs_cursor *fc, + struct afs_status_cb *scb, + struct afs_read *req) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -679,12 +527,12 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) __be32 *bp; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_fetch_data(fc, req); + return yfs_fs_fetch_data(fc, scb, req); if (upper_32_bits(req->pos) || upper_32_bits(req->len) || upper_32_bits(req->pos + req->len)) - return afs_fs_fetch_data64(fc, req); + return afs_fs_fetch_data64(fc, scb, req); _enter(""); @@ -693,11 +541,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->reply[1] = NULL; /* volsync */ - call->reply[2] = req; - call->expected_version = vnode->status.data_version; - call->want_reply_time = true; + call->out_scb = scb; + call->out_volsync = NULL; + call->read_request = req; /* marshall the parameters */ bp = call->request; @@ -709,9 +555,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) bp[5] = htonl(lower_32_bits(req->len)); refcount_inc(&req->usage); - call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -721,28 +567,24 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) */ static int afs_deliver_fs_create_vnode(struct afs_call *call) { - struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; - _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call); if (ret < 0) return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFid(&bp, call->reply[1]); - ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL); + xdr_decode_AFSFid(&bp, call->out_fid); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - ret = afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - xdr_decode_AFSCallBack_raw(call, &bp, call->reply[3]); - /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ + xdr_decode_AFSCallBack(&bp, call, call->out_scb); + xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -771,24 +613,23 @@ static const struct afs_call_type afs_RXFSMakeDir = { int afs_fs_create(struct afs_fs_cursor *fc, const char *name, umode_t mode, - u64 current_data_version, + struct afs_status_cb *dvnode_scb, struct afs_fid *newfid, - struct afs_file_status *newstatus, - struct afs_callback *newcb) + struct afs_status_cb *new_scb) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + struct afs_net *net = afs_v2net(dvnode); size_t namesz, reqsz, padsz; __be32 *bp; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)){ if (S_ISDIR(mode)) - return yfs_fs_make_dir(fc, name, mode, current_data_version, - newfid, newstatus, newcb); + return yfs_fs_make_dir(fc, name, mode, dvnode_scb, + newfid, new_scb); else - return yfs_fs_create_file(fc, name, mode, current_data_version, - newfid, newstatus, newcb); + return yfs_fs_create_file(fc, name, mode, dvnode_scb, + newfid, new_scb); } _enter(""); @@ -804,19 +645,16 @@ int afs_fs_create(struct afs_fs_cursor *fc, return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->reply[1] = newfid; - call->reply[2] = newstatus; - call->reply[3] = newcb; - call->expected_version = current_data_version + 1; - call->want_reply_time = true; + call->out_dir_scb = dvnode_scb; + call->out_fid = newfid; + call->out_scb = new_scb; /* marshall the parameters */ bp = call->request; *bp++ = htonl(S_ISDIR(mode) ? FSMAKEDIR : FSCREATEFILE); - *bp++ = htonl(vnode->fid.vid); - *bp++ = htonl(vnode->fid.vnode); - *bp++ = htonl(vnode->fid.unique); + *bp++ = htonl(dvnode->fid.vid); + *bp++ = htonl(dvnode->fid.vnode); + *bp++ = htonl(dvnode->fid.unique); *bp++ = htonl(namesz); memcpy(bp, name, namesz); bp = (void *) bp + namesz; @@ -825,41 +663,38 @@ int afs_fs_create(struct afs_fs_cursor *fc, bp = (void *) bp + padsz; } *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); - *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ + *bp++ = htonl(dvnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = htonl(mode & S_IALLUGO); /* unix mode */ *bp++ = 0; /* segment size */ afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &vnode->fid, name); + trace_afs_make_fs_call1(call, &dvnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } /* - * Deliver reply data to any operation that returns file status and volume + * Deliver reply data to any operation that returns directory status and volume * sync. */ -static int afs_deliver_fs_status_and_vol(struct afs_call *call) +static int afs_deliver_fs_dir_status_and_vol(struct afs_call *call) { - struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; - _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call); if (ret < 0) return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ + xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -871,14 +706,14 @@ static int afs_deliver_fs_status_and_vol(struct afs_call *call) static const struct afs_call_type afs_RXFSRemoveFile = { .name = "FS.RemoveFile", .op = afs_FS_RemoveFile, - .deliver = afs_deliver_fs_status_and_vol, + .deliver = afs_deliver_fs_dir_status_and_vol, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSRemoveDir = { .name = "FS.RemoveDir", .op = afs_FS_RemoveDir, - .deliver = afs_deliver_fs_status_and_vol, + .deliver = afs_deliver_fs_dir_status_and_vol, .destructor = afs_flat_call_destructor, }; @@ -886,7 +721,7 @@ static const struct afs_call_type afs_RXFSRemoveDir = { * remove a file or directory */ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - const char *name, bool isdir, u64 current_data_version) + const char *name, bool isdir, struct afs_status_cb *dvnode_scb) { struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; @@ -895,7 +730,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, __be32 *bp; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_remove(fc, vnode, name, isdir, current_data_version); + return yfs_fs_remove(fc, vnode, name, isdir, dvnode_scb); _enter(""); @@ -910,9 +745,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, return -ENOMEM; call->key = fc->key; - call->reply[0] = dvnode; - call->reply[1] = vnode; - call->expected_version = current_data_version + 1; + call->out_dir_scb = dvnode_scb; /* marshall the parameters */ bp = call->request; @@ -930,6 +763,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &dvnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -939,7 +773,6 @@ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, */ static int afs_deliver_fs_link(struct afs_call *call) { - struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1]; const __be32 *bp; int ret; @@ -951,14 +784,13 @@ static int afs_deliver_fs_link(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - ret = afs_decode_status(call, &bp, &dvnode->status, dvnode, - &call->expected_version, NULL); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ + xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -978,7 +810,9 @@ static const struct afs_call_type afs_RXFSLink = { * make a hard link */ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - const char *name, u64 current_data_version) + const char *name, + struct afs_status_cb *dvnode_scb, + struct afs_status_cb *vnode_scb) { struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; @@ -987,7 +821,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, __be32 *bp; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_link(fc, vnode, name, current_data_version); + return yfs_fs_link(fc, vnode, name, dvnode_scb, vnode_scb); _enter(""); @@ -1000,9 +834,8 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, return -ENOMEM; call->key = fc->key; - call->reply[0] = dvnode; - call->reply[1] = vnode; - call->expected_version = current_data_version + 1; + call->out_dir_scb = dvnode_scb; + call->out_scb = vnode_scb; /* marshall the parameters */ bp = call->request; @@ -1023,6 +856,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &vnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1032,7 +866,6 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, */ static int afs_deliver_fs_symlink(struct afs_call *call) { - struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -1044,15 +877,14 @@ static int afs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFid(&bp, call->reply[1]); - ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL); + xdr_decode_AFSFid(&bp, call->out_fid); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - ret = afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ + xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -1074,19 +906,19 @@ static const struct afs_call_type afs_RXFSSymlink = { int afs_fs_symlink(struct afs_fs_cursor *fc, const char *name, const char *contents, - u64 current_data_version, + struct afs_status_cb *dvnode_scb, struct afs_fid *newfid, - struct afs_file_status *newstatus) + struct afs_status_cb *new_scb) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + struct afs_net *net = afs_v2net(dvnode); size_t namesz, reqsz, padsz, c_namesz, c_padsz; __be32 *bp; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_symlink(fc, name, contents, current_data_version, - newfid, newstatus); + return yfs_fs_symlink(fc, name, contents, dvnode_scb, + newfid, new_scb); _enter(""); @@ -1104,17 +936,16 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->reply[1] = newfid; - call->reply[2] = newstatus; - call->expected_version = current_data_version + 1; + call->out_dir_scb = dvnode_scb; + call->out_fid = newfid; + call->out_scb = new_scb; /* marshall the parameters */ bp = call->request; *bp++ = htonl(FSSYMLINK); - *bp++ = htonl(vnode->fid.vid); - *bp++ = htonl(vnode->fid.vnode); - *bp++ = htonl(vnode->fid.unique); + *bp++ = htonl(dvnode->fid.vid); + *bp++ = htonl(dvnode->fid.vnode); + *bp++ = htonl(dvnode->fid.unique); *bp++ = htonl(namesz); memcpy(bp, name, namesz); bp = (void *) bp + namesz; @@ -1130,14 +961,15 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, bp = (void *) bp + c_padsz; } *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); - *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ + *bp++ = htonl(dvnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = htonl(S_IRWXUGO); /* unix mode */ *bp++ = 0; /* segment size */ afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &vnode->fid, name); + trace_afs_make_fs_call1(call, &dvnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1147,29 +979,24 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, */ static int afs_deliver_fs_rename(struct afs_call *call) { - struct afs_vnode *orig_dvnode = call->reply[0], *new_dvnode = call->reply[1]; const __be32 *bp; int ret; - _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call); if (ret < 0) return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode, - &call->expected_version, NULL); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - if (new_dvnode != orig_dvnode) { - ret = afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode, - &call->expected_version_2, NULL); + if (call->out_dir_scb != call->out_scb) { + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; } - /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ + xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -1186,14 +1013,14 @@ static const struct afs_call_type afs_RXFSRename = { }; /* - * create a symbolic link + * Rename/move a file or directory. */ int afs_fs_rename(struct afs_fs_cursor *fc, const char *orig_name, struct afs_vnode *new_dvnode, const char *new_name, - u64 current_orig_data_version, - u64 current_new_data_version) + struct afs_status_cb *orig_dvnode_scb, + struct afs_status_cb *new_dvnode_scb) { struct afs_vnode *orig_dvnode = fc->vnode; struct afs_call *call; @@ -1204,8 +1031,8 @@ int afs_fs_rename(struct afs_fs_cursor *fc, if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) return yfs_fs_rename(fc, orig_name, new_dvnode, new_name, - current_orig_data_version, - current_new_data_version); + orig_dvnode_scb, + new_dvnode_scb); _enter(""); @@ -1225,10 +1052,8 @@ int afs_fs_rename(struct afs_fs_cursor *fc, return -ENOMEM; call->key = fc->key; - call->reply[0] = orig_dvnode; - call->reply[1] = new_dvnode; - call->expected_version = current_orig_data_version + 1; - call->expected_version_2 = current_new_data_version + 1; + call->out_dir_scb = orig_dvnode_scb; + call->out_scb = new_dvnode_scb; /* marshall the parameters */ bp = call->request; @@ -1257,6 +1082,7 @@ int afs_fs_rename(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1266,7 +1092,6 @@ int afs_fs_rename(struct afs_fs_cursor *fc, */ static int afs_deliver_fs_store_data(struct afs_call *call) { - struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -1278,13 +1103,10 @@ static int afs_deliver_fs_store_data(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ - - afs_pages_written_back(vnode, call); + xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -1314,7 +1136,8 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, struct address_space *mapping, pgoff_t first, pgoff_t last, unsigned offset, unsigned to, - loff_t size, loff_t pos, loff_t i_size) + loff_t size, loff_t pos, loff_t i_size, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1332,13 +1155,12 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, call->key = fc->key; call->mapping = mapping; - call->reply[0] = vnode; call->first = first; call->last = last; call->first_offset = offset; call->last_to = to; call->send_pages = true; - call->expected_version = vnode->status.data_version + 1; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1362,6 +1184,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, *bp++ = htonl((u32) i_size); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1371,7 +1194,8 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, */ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, pgoff_t first, pgoff_t last, - unsigned offset, unsigned to) + unsigned offset, unsigned to, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1380,7 +1204,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, __be32 *bp; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_store_data(fc, mapping, first, last, offset, to); + return yfs_fs_store_data(fc, mapping, first, last, offset, to, scb); _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); @@ -1401,7 +1225,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32) return afs_fs_store_data64(fc, mapping, first, last, offset, to, - size, pos, i_size); + size, pos, i_size, scb); call = afs_alloc_flat_call(net, &afs_RXFSStoreData, (4 + 6 + 3) * 4, @@ -1411,13 +1235,12 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, call->key = fc->key; call->mapping = mapping; - call->reply[0] = vnode; call->first = first; call->last = last; call->first_offset = offset; call->last_to = to; call->send_pages = true; - call->expected_version = vnode->status.data_version + 1; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1439,6 +1262,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1448,7 +1272,6 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, */ static int afs_deliver_fs_store_status(struct afs_call *call) { - struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -1460,11 +1283,10 @@ static int afs_deliver_fs_store_status(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ + xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -1498,7 +1320,8 @@ static const struct afs_call_type afs_RXFSStoreData64_as_Status = { * set the attributes on a very large file, using FS.StoreData rather than * FS.StoreStatus so as to alter the file size also */ -static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) +static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1517,8 +1340,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->expected_version = vnode->status.data_version + 1; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1538,6 +1360,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1546,7 +1369,8 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus * so as to alter the file size also */ -static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) +static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1558,7 +1382,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) ASSERT(attr->ia_valid & ATTR_SIZE); if (attr->ia_size >> 32) - return afs_fs_setattr_size64(fc, attr); + return afs_fs_setattr_size64(fc, attr, scb); call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status, (4 + 6 + 3) * 4, @@ -1567,8 +1391,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->expected_version = vnode->status.data_version + 1; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1585,6 +1408,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1593,7 +1417,8 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) * set the attributes on a file, using FS.StoreData if there's a change in file * size, and FS.StoreStatus otherwise */ -int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) +int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1601,10 +1426,10 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) __be32 *bp; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_setattr(fc, attr); + return yfs_fs_setattr(fc, attr, scb); if (attr->ia_valid & ATTR_SIZE) - return afs_fs_setattr_size(fc, attr); + return afs_fs_setattr_size(fc, attr, scb); _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); @@ -1616,8 +1441,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->expected_version = vnode->status.data_version; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1630,6 +1454,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1659,7 +1484,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) return ret; bp = call->buffer; - xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]); + xdr_decode_AFSFetchVolumeStatus(&bp, call->out_volstatus); call->unmarshall++; afs_extract_to_tmp(call); @@ -1675,7 +1500,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) return afs_protocol_error(call, -EBADMSG, afs_eproto_volname_len); size = (call->count + 3) & ~3; /* It's padded */ - afs_extract_begin(call, call->reply[2], size); + afs_extract_to_buf(call, size); call->unmarshall++; /* Fall through - and extract the volume name */ @@ -1685,7 +1510,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) if (ret < 0) return ret; - p = call->reply[2]; + p = call->buffer; p[call->count] = 0; _debug("volname '%s'", p); afs_extract_to_tmp(call); @@ -1703,7 +1528,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) return afs_protocol_error(call, -EBADMSG, afs_eproto_offline_msg_len); size = (call->count + 3) & ~3; /* It's padded */ - afs_extract_begin(call, call->reply[2], size); + afs_extract_to_buf(call, size); call->unmarshall++; /* Fall through - and extract the offline message */ @@ -1713,7 +1538,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) if (ret < 0) return ret; - p = call->reply[2]; + p = call->buffer; p[call->count] = 0; _debug("offline '%s'", p); @@ -1732,7 +1557,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) return afs_protocol_error(call, -EBADMSG, afs_eproto_motd_len); size = (call->count + 3) & ~3; /* It's padded */ - afs_extract_begin(call, call->reply[2], size); + afs_extract_to_buf(call, size); call->unmarshall++; /* Fall through - and extract the message of the day */ @@ -1742,7 +1567,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) if (ret < 0) return ret; - p = call->reply[2]; + p = call->buffer; p[call->count] = 0; _debug("motd '%s'", p); @@ -1757,23 +1582,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) } /* - * destroy an FS.GetVolumeStatus call - */ -static void afs_get_volume_status_call_destructor(struct afs_call *call) -{ - kfree(call->reply[2]); - call->reply[2] = NULL; - afs_flat_call_destructor(call); -} - -/* * FS.GetVolumeStatus operation type */ static const struct afs_call_type afs_RXFSGetVolumeStatus = { .name = "FS.GetVolumeStatus", .op = afs_FS_GetVolumeStatus, .deliver = afs_deliver_fs_get_volume_status, - .destructor = afs_get_volume_status_call_destructor, + .destructor = afs_flat_call_destructor, }; /* @@ -1786,27 +1601,19 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc, struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; - void *tmpbuf; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) return yfs_fs_get_volume_status(fc, vs); _enter(""); - tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL); - if (!tmpbuf) - return -ENOMEM; - - call = afs_alloc_flat_call(net, &afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4); - if (!call) { - kfree(tmpbuf); + call = afs_alloc_flat_call(net, &afs_RXFSGetVolumeStatus, 2 * 4, + max(12 * 4, AFSOPAQUEMAX + 1)); + if (!call) return -ENOMEM; - } call->key = fc->key; - call->reply[0] = vnode; - call->reply[1] = vs; - call->reply[2] = tmpbuf; + call->out_volstatus = vs; /* marshall the parameters */ bp = call->request; @@ -1815,6 +1622,7 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1835,7 +1643,7 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ + xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -1876,7 +1684,8 @@ static const struct afs_call_type afs_RXFSReleaseLock = { /* * Set a lock on a file */ -int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) +int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1884,7 +1693,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) __be32 *bp; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_set_lock(fc, type); + return yfs_fs_set_lock(fc, type, scb); _enter(""); @@ -1893,8 +1702,8 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->want_reply_time = true; + call->lvnode = vnode; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1906,6 +1715,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_calli(call, &vnode->fid, type); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1913,7 +1723,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) /* * extend a lock on a file */ -int afs_fs_extend_lock(struct afs_fs_cursor *fc) +int afs_fs_extend_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1921,7 +1731,7 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc) __be32 *bp; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_extend_lock(fc); + return yfs_fs_extend_lock(fc, scb); _enter(""); @@ -1930,8 +1740,8 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->want_reply_time = true; + call->lvnode = vnode; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1942,6 +1752,7 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1949,7 +1760,7 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc) /* * release a lock on a file */ -int afs_fs_release_lock(struct afs_fs_cursor *fc) +int afs_fs_release_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1957,7 +1768,7 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc) __be32 *bp; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_release_lock(fc); + return yfs_fs_release_lock(fc, scb); _enter(""); @@ -1966,7 +1777,8 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; + call->lvnode = vnode; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1977,6 +1789,7 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -2071,14 +1884,6 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call) return 0; } -static void afs_destroy_fs_get_capabilities(struct afs_call *call) -{ - struct afs_server *server = call->reply[0]; - - afs_put_server(call->net, server); - afs_flat_call_destructor(call); -} - /* * FS.GetCapabilities operation type */ @@ -2087,7 +1892,7 @@ static const struct afs_call_type afs_RXFSGetCapabilities = { .op = afs_FS_GetCapabilities, .deliver = afs_deliver_fs_get_capabilities, .done = afs_fileserver_probe_result, - .destructor = afs_destroy_fs_get_capabilities, + .destructor = afs_flat_call_destructor, }; /* @@ -2110,11 +1915,11 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net, return ERR_PTR(-ENOMEM); call->key = key; - call->reply[0] = afs_get_server(server); - call->reply[1] = (void *)(long)server_index; + call->server = afs_get_server(server); + call->server_index = server_index; call->upgrade = true; - call->want_reply_time = true; call->async = true; + call->max_lifespan = AFS_PROBE_MAX_LIFESPAN; /* marshall the parameters */ bp = call->request; @@ -2131,10 +1936,6 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net, */ static int afs_deliver_fs_fetch_status(struct afs_call *call) { - struct afs_file_status *status = call->reply[1]; - struct afs_callback *callback = call->reply[2]; - struct afs_volsync *volsync = call->reply[3]; - struct afs_fid *fid = call->reply[0]; const __be32 *bp; int ret; @@ -2142,16 +1943,13 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) if (ret < 0) return ret; - _enter("{%llx:%llu}", fid->vid, fid->vnode); - /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = afs_decode_status(call, &bp, status, NULL, - &call->expected_version, NULL); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - xdr_decode_AFSCallBack_raw(call, &bp, callback); - xdr_decode_AFSVolSync(&bp, volsync); + xdr_decode_AFSCallBack(&bp, call, call->out_scb); + xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -2173,15 +1971,14 @@ static const struct afs_call_type afs_RXFSFetchStatus = { int afs_fs_fetch_status(struct afs_fs_cursor *fc, struct afs_net *net, struct afs_fid *fid, - struct afs_file_status *status, - struct afs_callback *callback, + struct afs_status_cb *scb, struct afs_volsync *volsync) { struct afs_call *call; __be32 *bp; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_fetch_status(fc, net, fid, status, callback, volsync); + return yfs_fs_fetch_status(fc, net, fid, scb, volsync); _enter(",%x,{%llx:%llu},,", key_serial(fc->key), fid->vid, fid->vnode); @@ -2193,12 +1990,9 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc, } call->key = fc->key; - call->reply[0] = fid; - call->reply[1] = status; - call->reply[2] = callback; - call->reply[3] = volsync; - call->expected_version = 1; /* vnode->status.data_version */ - call->want_reply_time = true; + call->out_fid = fid; + call->out_scb = scb; + call->out_volsync = volsync; /* marshall the parameters */ bp = call->request; @@ -2207,9 +2001,9 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc, bp[2] = htonl(fid->vnode); bp[3] = htonl(fid->unique); - call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -2219,9 +2013,7 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc, */ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) { - struct afs_file_status *statuses; - struct afs_callback *callbacks; - struct afs_vnode *vnode = call->reply[0]; + struct afs_status_cb *scb; const __be32 *bp; u32 tmp; int ret; @@ -2260,10 +2052,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) return ret; bp = call->buffer; - statuses = call->reply[1]; - ret = afs_decode_status(call, &bp, &statuses[call->count], - call->count == 0 ? vnode : NULL, - NULL, NULL); + scb = &call->out_scb[call->count]; + ret = xdr_decode_AFSFetchStatus(&bp, call, scb); if (ret < 0) return ret; @@ -2302,13 +2092,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) _debug("unmarshall CB array"); bp = call->buffer; - callbacks = call->reply[2]; - callbacks[call->count].version = ntohl(bp[0]); - callbacks[call->count].expires_at = xdr_decode_expiry(call, ntohl(bp[1])); - callbacks[call->count].type = ntohl(bp[2]); - statuses = call->reply[1]; - if (call->count == 0 && vnode && statuses[0].abort_code == 0) - xdr_decode_AFSCallBack(call, vnode, &bp); + scb = &call->out_scb[call->count]; + xdr_decode_AFSCallBack(&bp, call, scb); call->count++; if (call->count < call->count2) goto more_cbs; @@ -2323,7 +2108,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) return ret; bp = call->buffer; - xdr_decode_AFSVolSync(&bp, call->reply[3]); + xdr_decode_AFSVolSync(&bp, call->out_volsync); call->unmarshall++; @@ -2351,8 +2136,7 @@ static const struct afs_call_type afs_RXFSInlineBulkStatus = { int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, struct afs_net *net, struct afs_fid *fids, - struct afs_file_status *statuses, - struct afs_callback *callbacks, + struct afs_status_cb *statuses, unsigned int nr_fids, struct afs_volsync *volsync) { @@ -2361,7 +2145,7 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, int i; if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_inline_bulk_status(fc, net, fids, statuses, callbacks, + return yfs_fs_inline_bulk_status(fc, net, fids, statuses, nr_fids, volsync); _enter(",%x,{%llx:%llu},%u", @@ -2376,12 +2160,9 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, } call->key = fc->key; - call->reply[0] = NULL; /* vnode for fid[0] */ - call->reply[1] = statuses; - call->reply[2] = callbacks; - call->reply[3] = volsync; + call->out_scb = statuses; + call->out_volsync = volsync; call->count2 = nr_fids; - call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -2393,9 +2174,9 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, *bp++ = htonl(fids[i].unique); } - call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &fids[0]); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -2405,7 +2186,6 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, */ static int afs_deliver_fs_fetch_acl(struct afs_call *call) { - struct afs_vnode *vnode = call->reply[1]; struct afs_acl *acl; const __be32 *bp; unsigned int size; @@ -2430,7 +2210,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call) acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL); if (!acl) return -ENOMEM; - call->reply[0] = acl; + call->ret_acl = acl; acl->size = call->count2; afs_extract_begin(call, acl->data, size); call->unmarshall++; @@ -2451,11 +2231,10 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call) return ret; bp = call->buffer; - ret = afs_decode_status(call, &bp, &vnode->status, vnode, - &vnode->status.data_version, NULL); + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - xdr_decode_AFSVolSync(&bp, call->reply[2]); + xdr_decode_AFSVolSync(&bp, call->out_volsync); call->unmarshall++; @@ -2469,7 +2248,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call) static void afs_destroy_fs_fetch_acl(struct afs_call *call) { - kfree(call->reply[0]); + kfree(call->ret_acl); afs_flat_call_destructor(call); } @@ -2486,7 +2265,8 @@ static const struct afs_call_type afs_RXFSFetchACL = { /* * Fetch the ACL for a file. */ -struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc) +struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -2503,10 +2283,9 @@ struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc) } call->key = fc->key; - call->reply[0] = NULL; - call->reply[1] = vnode; - call->reply[2] = NULL; /* volsync */ - call->ret_reply0 = true; + call->ret_acl = NULL; + call->out_scb = scb; + call->out_volsync = NULL; /* marshall the parameters */ bp = call->request; @@ -2515,7 +2294,6 @@ struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc) bp[2] = htonl(vnode->fid.vnode); bp[3] = htonl(vnode->fid.unique); - call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); afs_make_call(&fc->ac, call, GFP_KERNEL); @@ -2523,19 +2301,43 @@ struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc) } /* + * Deliver reply data to any operation that returns file status and volume + * sync. + */ +static int afs_deliver_fs_file_status_and_vol(struct afs_call *call) +{ + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + bp = call->buffer; + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); + if (ret < 0) + return ret; + xdr_decode_AFSVolSync(&bp, call->out_volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* * FS.StoreACL operation type */ static const struct afs_call_type afs_RXFSStoreACL = { .name = "FS.StoreACL", .op = afs_FS_StoreACL, - .deliver = afs_deliver_fs_status_and_vol, + .deliver = afs_deliver_fs_file_status_and_vol, .destructor = afs_flat_call_destructor, }; /* * Fetch the ACL for a file. */ -int afs_fs_store_acl(struct afs_fs_cursor *fc, const struct afs_acl *acl) +int afs_fs_store_acl(struct afs_fs_cursor *fc, const struct afs_acl *acl, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -2555,8 +2357,8 @@ int afs_fs_store_acl(struct afs_fs_cursor *fc, const struct afs_acl *acl) } call->key = fc->key; - call->reply[0] = vnode; - call->reply[2] = NULL; /* volsync */ + call->out_scb = scb; + call->out_volsync = NULL; /* marshall the parameters */ bp = call->request; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index c4652b42d545..b42d9d09669c 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -23,6 +23,7 @@ #include <linux/namei.h> #include <linux/iversion.h> #include "internal.h" +#include "afs_fs.h" static const struct inode_operations afs_symlink_inode_operations = { .get_link = page_get_link, @@ -58,38 +59,50 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren * Initialise an inode from the vnode status. */ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, - struct afs_vnode *parent_vnode) + struct afs_cb_interest *cbi, + struct afs_vnode *parent_vnode, + struct afs_status_cb *scb) { + struct afs_cb_interest *old_cbi = NULL; + struct afs_file_status *status = &scb->status; struct inode *inode = AFS_VNODE_TO_I(vnode); + struct timespec64 t; _debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu", - vnode->status.type, - vnode->status.nlink, - (unsigned long long) vnode->status.size, - vnode->status.data_version, - vnode->status.mode); + status->type, + status->nlink, + (unsigned long long) status->size, + status->data_version, + status->mode); - read_seqlock_excl(&vnode->cb_lock); + write_seqlock(&vnode->cb_lock); - afs_update_inode_from_status(vnode, &vnode->status, NULL, - AFS_VNODE_NOT_YET_SET); + vnode->status = *status; - switch (vnode->status.type) { + t = status->mtime_client; + inode->i_ctime = t; + inode->i_mtime = t; + inode->i_atime = t; + inode->i_uid = make_kuid(&init_user_ns, status->owner); + inode->i_gid = make_kgid(&init_user_ns, status->group); + set_nlink(&vnode->vfs_inode, status->nlink); + + switch (status->type) { case AFS_FTYPE_FILE: - inode->i_mode = S_IFREG | vnode->status.mode; + inode->i_mode = S_IFREG | status->mode; inode->i_op = &afs_file_inode_operations; inode->i_fop = &afs_file_operations; inode->i_mapping->a_ops = &afs_fs_aops; break; case AFS_FTYPE_DIR: - inode->i_mode = S_IFDIR | vnode->status.mode; + inode->i_mode = S_IFDIR | status->mode; inode->i_op = &afs_dir_inode_operations; inode->i_fop = &afs_dir_file_operations; inode->i_mapping->a_ops = &afs_dir_aops; break; case AFS_FTYPE_SYMLINK: /* Symlinks with a mode of 0644 are actually mountpoints. */ - if ((vnode->status.mode & 0777) == 0644) { + if ((status->mode & 0777) == 0644) { inode->i_flags |= S_AUTOMOUNT; set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); @@ -99,7 +112,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, inode->i_fop = &afs_mntpt_file_operations; inode->i_mapping->a_ops = &afs_fs_aops; } else { - inode->i_mode = S_IFLNK | vnode->status.mode; + inode->i_mode = S_IFLNK | status->mode; inode->i_op = &afs_symlink_inode_operations; inode->i_mapping->a_ops = &afs_fs_aops; } @@ -107,7 +120,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, break; default: dump_vnode(vnode, parent_vnode); - read_sequnlock_excl(&vnode->cb_lock); + write_sequnlock(&vnode->cb_lock); return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type); } @@ -116,17 +129,175 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, * for consistency with other AFS clients. */ inode->i_blocks = ((i_size_read(inode) + 1023) >> 10) << 1; - vnode->invalid_before = vnode->status.data_version; + i_size_write(&vnode->vfs_inode, status->size); + + vnode->invalid_before = status->data_version; + inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); + + if (!scb->have_cb) { + /* it's a symlink we just created (the fileserver + * didn't give us a callback) */ + vnode->cb_expires_at = ktime_get_real_seconds(); + } else { + vnode->cb_expires_at = scb->callback.expires_at; + old_cbi = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->cb_lock.lock)); + if (cbi != old_cbi) + rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(cbi)); + else + old_cbi = NULL; + set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } - read_sequnlock_excl(&vnode->cb_lock); + write_sequnlock(&vnode->cb_lock); + afs_put_cb_interest(afs_v2net(vnode), old_cbi); return 0; } /* + * Update the core inode struct from a returned status record. + */ +static void afs_apply_status(struct afs_fs_cursor *fc, + struct afs_vnode *vnode, + struct afs_status_cb *scb, + const afs_dataversion_t *expected_version) +{ + struct afs_file_status *status = &scb->status; + struct timespec64 t; + umode_t mode; + bool data_changed = false; + + BUG_ON(test_bit(AFS_VNODE_UNSET, &vnode->flags)); + + if (status->type != vnode->status.type) { + pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n", + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + status->type, vnode->status.type); + afs_protocol_error(NULL, -EBADMSG, afs_eproto_bad_status); + return; + } + + if (status->nlink != vnode->status.nlink) + set_nlink(&vnode->vfs_inode, status->nlink); + + if (status->owner != vnode->status.owner) + vnode->vfs_inode.i_uid = make_kuid(&init_user_ns, status->owner); + + if (status->group != vnode->status.group) + vnode->vfs_inode.i_gid = make_kgid(&init_user_ns, status->group); + + if (status->mode != vnode->status.mode) { + mode = vnode->vfs_inode.i_mode; + mode &= ~S_IALLUGO; + mode |= status->mode; + WRITE_ONCE(vnode->vfs_inode.i_mode, mode); + } + + t = status->mtime_client; + vnode->vfs_inode.i_ctime = t; + vnode->vfs_inode.i_mtime = t; + vnode->vfs_inode.i_atime = t; + + if (vnode->status.data_version != status->data_version) + data_changed = true; + + vnode->status = *status; + + if (expected_version && + *expected_version != status->data_version) { + kdebug("vnode modified %llx on {%llx:%llu} [exp %llx] %s", + (unsigned long long) status->data_version, + vnode->fid.vid, vnode->fid.vnode, + (unsigned long long) *expected_version, + fc->type ? fc->type->name : "???"); + vnode->invalid_before = status->data_version; + if (vnode->status.type == AFS_FTYPE_DIR) { + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + afs_stat_v(vnode, n_inval); + } else { + set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); + } + } else if (vnode->status.type == AFS_FTYPE_DIR) { + /* Expected directory change is handled elsewhere so + * that we can locally edit the directory and save on a + * download. + */ + if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + data_changed = false; + } + + if (data_changed) { + inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); + i_size_write(&vnode->vfs_inode, status->size); + } +} + +/* + * Apply a callback to a vnode. + */ +static void afs_apply_callback(struct afs_fs_cursor *fc, + struct afs_vnode *vnode, + struct afs_status_cb *scb, + unsigned int cb_break) +{ + struct afs_cb_interest *old; + struct afs_callback *cb = &scb->callback; + + if (!afs_cb_is_broken(cb_break, vnode, fc->cbi)) { + vnode->cb_expires_at = cb->expires_at; + old = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->cb_lock.lock)); + if (old != fc->cbi) { + rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(fc->cbi)); + afs_put_cb_interest(afs_v2net(vnode), old); + } + set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } +} + +/* + * Apply the received status and callback to an inode all in the same critical + * section to avoid races with afs_validate(). + */ +void afs_vnode_commit_status(struct afs_fs_cursor *fc, + struct afs_vnode *vnode, + unsigned int cb_break, + const afs_dataversion_t *expected_version, + struct afs_status_cb *scb) +{ + if (fc->ac.error != 0) + return; + + write_seqlock(&vnode->cb_lock); + + if (scb->have_error) { + if (scb->status.abort_code == VNOVNODE) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + clear_nlink(&vnode->vfs_inode); + __afs_break_callback(vnode); + } + } else { + if (scb->have_status) + afs_apply_status(fc, vnode, scb, expected_version); + if (scb->have_cb) + afs_apply_callback(fc, vnode, scb, cb_break); + } + + write_sequnlock(&vnode->cb_lock); + + if (fc->ac.error == 0 && scb->have_status) + afs_cache_permit(vnode, fc->key, cb_break, scb); +} + +/* * Fetch file status from the volume. */ -int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode) +int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool is_new, + afs_access_t *_caller_access) { + struct afs_status_cb *scb; struct afs_fs_cursor fc; int ret; @@ -135,18 +306,38 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode) vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, vnode->flags); + scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + return -ENOMEM; + ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { + afs_dataversion_t data_version = vnode->status.data_version; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_fetch_file_status(&fc, NULL, new_inode); + afs_fs_fetch_file_status(&fc, scb, NULL); } - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break); + if (fc.error) { + /* Do nothing. */ + } else if (is_new) { + ret = afs_inode_init_from_status(vnode, key, fc.cbi, + NULL, scb); + fc.error = ret; + if (ret == 0) + afs_cache_permit(vnode, key, fc.cb_break, scb); + } else { + afs_vnode_commit_status(&fc, vnode, fc.cb_break, + &data_version, scb); + } + afs_check_for_remote_deletion(&fc, vnode); ret = afs_end_vnode_operation(&fc); } + if (ret == 0 && _caller_access) + *_caller_access = scb->status.caller_access; + kfree(scb); _leave(" = %d", ret); return ret; } @@ -156,10 +347,10 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode) */ int afs_iget5_test(struct inode *inode, void *opaque) { - struct afs_iget_data *data = opaque; + struct afs_iget_data *iget_data = opaque; struct afs_vnode *vnode = AFS_FS_I(inode); - return memcmp(&vnode->fid, &data->fid, sizeof(data->fid)) == 0; + return memcmp(&vnode->fid, &iget_data->fid, sizeof(iget_data->fid)) == 0; } /* @@ -177,17 +368,19 @@ static int afs_iget5_pseudo_dir_test(struct inode *inode, void *opaque) */ static int afs_iget5_set(struct inode *inode, void *opaque) { - struct afs_iget_data *data = opaque; + struct afs_iget_data *iget_data = opaque; struct afs_vnode *vnode = AFS_FS_I(inode); - vnode->fid = data->fid; - vnode->volume = data->volume; + vnode->fid = iget_data->fid; + vnode->volume = iget_data->volume; + vnode->cb_v_break = iget_data->cb_v_break; + vnode->cb_s_break = iget_data->cb_s_break; /* YFS supports 96-bit vnode IDs, but Linux only supports * 64-bit inode numbers. */ - inode->i_ino = data->fid.vnode; - inode->i_generation = data->fid.unique; + inode->i_ino = iget_data->fid.vnode; + inode->i_generation = iget_data->fid.unique; return 0; } @@ -197,38 +390,42 @@ static int afs_iget5_set(struct inode *inode, void *opaque) */ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) { - struct afs_iget_data data; struct afs_super_info *as; struct afs_vnode *vnode; struct inode *inode; static atomic_t afs_autocell_ino; + struct afs_iget_data iget_data = { + .cb_v_break = 0, + .cb_s_break = 0, + }; + _enter(""); as = sb->s_fs_info; if (as->volume) { - data.volume = as->volume; - data.fid.vid = as->volume->vid; + iget_data.volume = as->volume; + iget_data.fid.vid = as->volume->vid; } if (root) { - data.fid.vnode = 1; - data.fid.unique = 1; + iget_data.fid.vnode = 1; + iget_data.fid.unique = 1; } else { - data.fid.vnode = atomic_inc_return(&afs_autocell_ino); - data.fid.unique = 0; + iget_data.fid.vnode = atomic_inc_return(&afs_autocell_ino); + iget_data.fid.unique = 0; } - inode = iget5_locked(sb, data.fid.vnode, + inode = iget5_locked(sb, iget_data.fid.vnode, afs_iget5_pseudo_dir_test, afs_iget5_set, - &data); + &iget_data); if (!inode) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); } _debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }", - inode, inode->i_ino, data.fid.vid, data.fid.vnode, - data.fid.unique); + inode, inode->i_ino, iget_data.fid.vid, iget_data.fid.vnode, + iget_data.fid.unique); vnode = AFS_FS_I(inode); @@ -299,23 +496,24 @@ static void afs_get_inode_cache(struct afs_vnode *vnode) * inode retrieval */ struct inode *afs_iget(struct super_block *sb, struct key *key, - struct afs_fid *fid, struct afs_file_status *status, - struct afs_callback *cb, struct afs_cb_interest *cbi, + struct afs_iget_data *iget_data, + struct afs_status_cb *scb, + struct afs_cb_interest *cbi, struct afs_vnode *parent_vnode) { - struct afs_iget_data data = { .fid = *fid }; struct afs_super_info *as; struct afs_vnode *vnode; + struct afs_fid *fid = &iget_data->fid; struct inode *inode; int ret; _enter(",{%llx:%llu.%u},,", fid->vid, fid->vnode, fid->unique); as = sb->s_fs_info; - data.volume = as->volume; + iget_data->volume = as->volume; inode = iget5_locked(sb, fid->vnode, afs_iget5_test, afs_iget5_set, - &data); + iget_data); if (!inode) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); @@ -332,43 +530,25 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, return inode; } - if (!status) { + if (!scb) { /* it's a remotely extant inode */ - ret = afs_fetch_status(vnode, key, true); + ret = afs_fetch_status(vnode, key, true, NULL); if (ret < 0) goto bad_inode; } else { - /* it's an inode we just created */ - memcpy(&vnode->status, status, sizeof(vnode->status)); - - if (!cb) { - /* it's a symlink we just created (the fileserver - * didn't give us a callback) */ - vnode->cb_version = 0; - vnode->cb_type = 0; - vnode->cb_expires_at = ktime_get(); - } else { - vnode->cb_version = cb->version; - vnode->cb_type = cb->type; - vnode->cb_expires_at = cb->expires_at; - vnode->cb_interest = afs_get_cb_interest(cbi); - set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - } - - vnode->cb_expires_at += ktime_get_real_seconds(); + ret = afs_inode_init_from_status(vnode, key, cbi, parent_vnode, + scb); + if (ret < 0) + goto bad_inode; } - ret = afs_inode_init_from_status(vnode, key, parent_vnode); - if (ret < 0) - goto bad_inode; - afs_get_inode_cache(vnode); /* success */ clear_bit(AFS_VNODE_UNSET, &vnode->flags); inode->i_flags |= S_NOATIME; unlock_new_inode(inode); - _leave(" = %p [CB { v=%u t=%u }]", inode, vnode->cb_version, vnode->cb_type); + _leave(" = %p", inode); return inode; /* failure */ @@ -400,6 +580,66 @@ void afs_zap_data(struct afs_vnode *vnode) } /* + * Check the validity of a vnode/inode. + */ +bool afs_check_validity(struct afs_vnode *vnode) +{ + struct afs_cb_interest *cbi; + struct afs_server *server; + struct afs_volume *volume = vnode->volume; + time64_t now = ktime_get_real_seconds(); + bool valid, need_clear = false; + unsigned int cb_break, cb_s_break, cb_v_break; + int seq = 0; + + do { + read_seqbegin_or_lock(&vnode->cb_lock, &seq); + cb_v_break = READ_ONCE(volume->cb_v_break); + cb_break = vnode->cb_break; + + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + cbi = rcu_dereference(vnode->cb_interest); + server = rcu_dereference(cbi->server); + cb_s_break = READ_ONCE(server->cb_s_break); + + if (vnode->cb_s_break != cb_s_break || + vnode->cb_v_break != cb_v_break) { + vnode->cb_s_break = cb_s_break; + vnode->cb_v_break = cb_v_break; + need_clear = true; + valid = false; + } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { + need_clear = true; + valid = false; + } else if (vnode->cb_expires_at - 10 <= now) { + need_clear = true; + valid = false; + } else { + valid = true; + } + } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { + valid = true; + } else { + vnode->cb_v_break = cb_v_break; + valid = false; + } + + } while (need_seqretry(&vnode->cb_lock, seq)); + + done_seqretry(&vnode->cb_lock, seq); + + if (need_clear) { + write_seqlock(&vnode->cb_lock); + if (cb_break == vnode->cb_break) + __afs_break_callback(vnode); + write_sequnlock(&vnode->cb_lock); + valid = false; + } + + return valid; +} + +/* * validate a vnode/inode * - there are several things we need to check * - parent dir data changes (rm, rmdir, rename, mkdir, create, link, @@ -410,7 +650,6 @@ void afs_zap_data(struct afs_vnode *vnode) */ int afs_validate(struct afs_vnode *vnode, struct key *key) { - time64_t now = ktime_get_real_seconds(); bool valid; int ret; @@ -418,36 +657,9 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) vnode->fid.vid, vnode->fid.vnode, vnode->flags, key_serial(key)); - /* Quickly check the callback state. Ideally, we'd use read_seqbegin - * here, but we have no way to pass the net namespace to the RCU - * cleanup for the server record. - */ - read_seqlock_excl(&vnode->cb_lock); - - if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { - if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break || - vnode->cb_v_break != vnode->volume->cb_v_break) { - vnode->cb_s_break = vnode->cb_interest->server->cb_s_break; - vnode->cb_v_break = vnode->volume->cb_v_break; - valid = false; - } else if (vnode->status.type == AFS_FTYPE_DIR && - (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) || - vnode->cb_expires_at - 10 <= now)) { - valid = false; - } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) || - vnode->cb_expires_at - 10 <= now) { - valid = false; - } else { - valid = true; - } - } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { - valid = true; - } else { - vnode->cb_v_break = vnode->volume->cb_v_break; - valid = false; - } - - read_sequnlock_excl(&vnode->cb_lock); + rcu_read_lock(); + valid = afs_check_validity(vnode); + rcu_read_unlock(); if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) clear_nlink(&vnode->vfs_inode); @@ -463,7 +675,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * access */ if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { _debug("not promised"); - ret = afs_fetch_status(vnode, key, false); + ret = afs_fetch_status(vnode, key, false, NULL); if (ret < 0) { if (ret == -ENOENT) { set_bit(AFS_VNODE_DELETED, &vnode->flags); @@ -534,6 +746,7 @@ int afs_drop_inode(struct inode *inode) */ void afs_evict_inode(struct inode *inode) { + struct afs_cb_interest *cbi; struct afs_vnode *vnode; vnode = AFS_FS_I(inode); @@ -550,10 +763,14 @@ void afs_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); clear_inode(inode); - if (vnode->cb_interest) { - afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest); - vnode->cb_interest = NULL; + write_seqlock(&vnode->cb_lock); + cbi = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->cb_lock.lock)); + if (cbi) { + afs_put_cb_interest(afs_i2net(inode), cbi); + rcu_assign_pointer(vnode->cb_interest, NULL); } + write_sequnlock(&vnode->cb_lock); while (!list_empty(&vnode->wb_keys)) { struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next, @@ -573,6 +790,7 @@ void afs_evict_inode(struct inode *inode) } #endif + afs_prune_wb_keys(vnode); afs_put_permits(rcu_access_pointer(vnode->permit_cache)); key_put(vnode->silly_key); vnode->silly_key = NULL; @@ -587,9 +805,10 @@ void afs_evict_inode(struct inode *inode) int afs_setattr(struct dentry *dentry, struct iattr *attr) { struct afs_fs_cursor fc; + struct afs_status_cb *scb; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct key *key; - int ret; + int ret = -ENOMEM; _enter("{%llx:%llu},{n=%pd},%x", vnode->fid.vid, vnode->fid.vnode, dentry, @@ -601,6 +820,10 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) return 0; } + scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); + if (!scb) + goto error; + /* flush any dirty data outstanding on a regular file */ if (S_ISREG(vnode->vfs_inode.i_mode)) filemap_write_and_wait(vnode->vfs_inode.i_mapping); @@ -611,25 +834,33 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); - goto error; + goto error_scb; } } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, false)) { + afs_dataversion_t data_version = vnode->status.data_version; + + if (attr->ia_valid & ATTR_SIZE) + data_version++; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_setattr(&fc, attr); + afs_fs_setattr(&fc, attr, scb); } - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break); + afs_check_for_remote_deletion(&fc, vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break, + &data_version, scb); ret = afs_end_vnode_operation(&fc); } if (!(attr->ia_valid & ATTR_FILE)) key_put(key); +error_scb: + kfree(scb); error: _leave(" = %d", ret); return ret; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index b3cd6e8ad59d..2073c1a3ab4b 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -66,6 +66,8 @@ struct afs_fs_context { struct afs_iget_data { struct afs_fid fid; struct afs_volume *volume; /* volume on which resides */ + unsigned int cb_v_break; /* Pre-fetch volume break count */ + unsigned int cb_s_break; /* Pre-fetch server break count */ }; enum afs_call_state { @@ -111,8 +113,12 @@ struct afs_call { struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ struct afs_net *net; /* The network namespace */ - struct afs_server *cm_server; /* Server affected by incoming CM call */ + union { + struct afs_server *server; + struct afs_vlserver *vlserver; + }; struct afs_cb_interest *cbi; /* Callback interest for server used */ + struct afs_vnode *lvnode; /* vnode being locked */ void *request; /* request data (first part) */ struct address_space *mapping; /* Pages being written from */ struct iov_iter iter; /* Buffer iterator */ @@ -122,7 +128,20 @@ struct afs_call { struct bio_vec bvec[1]; }; void *buffer; /* reply receive buffer */ - void *reply[4]; /* Where to put the reply */ + union { + long ret0; /* Value to reply with instead of 0 */ + struct afs_addr_list *ret_alist; + struct afs_vldb_entry *ret_vldb; + struct afs_acl *ret_acl; + }; + struct afs_fid *out_fid; + struct afs_status_cb *out_dir_scb; + struct afs_status_cb *out_scb; + struct yfs_acl *out_yacl; + struct afs_volsync *out_volsync; + struct afs_volume_status *out_volstatus; + struct afs_read *read_request; + unsigned int server_index; pgoff_t first; /* first page in mapping to deal with */ pgoff_t last; /* last page in mapping to deal with */ atomic_t usage; @@ -131,10 +150,10 @@ struct afs_call { int error; /* error code */ u32 abort_code; /* Remote abort ID or 0 */ u32 epoch; + unsigned int max_lifespan; /* Maximum lifespan to set if not 0 */ unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned first_offset; /* offset into mapping[first] */ - unsigned int cb_break; /* cb_break + cb_s_break before the call */ union { unsigned last_to; /* amount of mapping[last] */ unsigned count2; /* count used in unmarshalling */ @@ -145,9 +164,9 @@ struct afs_call { bool send_pages; /* T if data from mapping should be sent */ bool need_attention; /* T if RxRPC poked us */ bool async; /* T if asynchronous */ - bool ret_reply0; /* T if should return reply[0] on success */ bool upgrade; /* T to request service upgrade */ - bool want_reply_time; /* T if want reply_time */ + bool have_reply_time; /* T if have got reply_time */ + bool intr; /* T if interruptible */ u16 service_id; /* Actual service ID (after upgrade) */ unsigned int debug_id; /* Trace ID */ u32 operation_ID; /* operation ID for an incoming call */ @@ -159,8 +178,6 @@ struct afs_call { } __attribute__((packed)); __be64 tmp64; }; - afs_dataversion_t expected_version; /* Updated version expected from store */ - afs_dataversion_t expected_version_2; /* 2nd updated version expected from store */ ktime_t reply_time; /* Time of first reply packet */ }; @@ -221,7 +238,8 @@ struct afs_read { unsigned int index; /* Which page we're reading into */ unsigned int nr_pages; unsigned int offset; /* offset into current page */ - void (*page_done)(struct afs_call *, struct afs_read *); + struct afs_vnode *vnode; + void (*page_done)(struct afs_read *); struct page **pages; struct page *array[]; }; @@ -367,13 +385,13 @@ struct afs_cell { time64_t last_inactive; /* Time of last drop of usage count */ atomic_t usage; unsigned long flags; -#define AFS_CELL_FL_NOT_READY 0 /* The cell record is not ready for use */ -#define AFS_CELL_FL_NO_GC 1 /* The cell was added manually, don't auto-gc */ -#define AFS_CELL_FL_NOT_FOUND 2 /* Permanent DNS error */ -#define AFS_CELL_FL_DNS_FAIL 3 /* Failed to access DNS */ -#define AFS_CELL_FL_NO_LOOKUP_YET 4 /* Not completed first DNS lookup yet */ +#define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */ +#define AFS_CELL_FL_DO_LOOKUP 1 /* DNS lookup requested */ enum afs_cell_state state; short error; + enum dns_record_source dns_source:8; /* Latest source of data from lookup */ + enum dns_lookup_status dns_status:8; /* Latest status of data from lookup */ + unsigned int dns_lookup_count; /* Counter of DNS lookups */ /* Active fileserver interaction state. */ struct list_head proc_volumes; /* procfs volume list */ @@ -538,7 +556,10 @@ struct afs_server { struct afs_vol_interest { struct hlist_node srv_link; /* Link in server->cb_volumes */ struct hlist_head cb_interests; /* List of callback interests on the server */ - afs_volid_t vid; /* Volume ID to match */ + union { + struct rcu_head rcu; + afs_volid_t vid; /* Volume ID to match */ + }; unsigned int usage; }; @@ -550,7 +571,10 @@ struct afs_cb_interest { struct afs_vol_interest *vol_interest; struct afs_server *server; /* Server on which this interest resides */ struct super_block *sb; /* Superblock on which inodes reside */ - afs_volid_t vid; /* Volume ID to match */ + union { + struct rcu_head rcu; + afs_volid_t vid; /* Volume ID to match */ + }; refcount_t usage; }; @@ -660,15 +684,13 @@ struct afs_vnode { afs_lock_type_t lock_type : 8; /* outstanding callback notification on this file */ - struct afs_cb_interest *cb_interest; /* Server on which this resides */ + struct afs_cb_interest __rcu *cb_interest; /* Server on which this resides */ unsigned int cb_s_break; /* Mass break counter on ->server */ unsigned int cb_v_break; /* Mass break counter on ->volume */ unsigned int cb_break; /* Break counter on vnode */ seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */ time64_t cb_expires_at; /* time at which callback expires */ - unsigned cb_version; /* callback version */ - afs_callback_type_t cb_type; /* type of callback */ }; static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode) @@ -755,6 +777,7 @@ struct afs_vl_cursor { * Cursor for iterating over a set of fileservers. */ struct afs_fs_cursor { + const struct afs_call_type *type; /* Type of call done */ struct afs_addr_cursor ac; struct afs_vnode *vnode; struct afs_server_list *server_list; /* Current server list (pins ref) */ @@ -772,6 +795,7 @@ struct afs_fs_cursor { #define AFS_FS_CURSOR_VNOVOL 0x0008 /* Set if seen VNOVOL */ #define AFS_FS_CURSOR_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */ #define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ +#define AFS_FS_CURSOR_INTR 0x0040 /* Set if op is interruptible */ unsigned short nr_iterations; /* Number of server iterations */ }; @@ -882,7 +906,6 @@ extern const struct address_space_operations afs_dir_aops; extern const struct dentry_operations afs_fs_dentry_operations; extern void afs_d_release(struct dentry *); -extern int afs_dir_remove_link(struct dentry *, struct key *, unsigned long, unsigned long); /* * dir_edit.c @@ -940,50 +963,48 @@ extern int afs_flock(struct file *, int, struct file_lock *); /* * fsclient.c */ -#define AFS_VNODE_NOT_YET_SET 0x01 -#define AFS_VNODE_META_CHANGED 0x02 -#define AFS_VNODE_DATA_CHANGED 0x04 -extern void afs_update_inode_from_status(struct afs_vnode *, struct afs_file_status *, - const afs_dataversion_t *, u8); - -extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool); +extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_status_cb *, + struct afs_volsync *); extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *); -extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *); -extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, u64, - struct afs_fid *, struct afs_file_status *, struct afs_callback *); -extern int afs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64); -extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64); -extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64, - struct afs_fid *, struct afs_file_status *); +extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_status_cb *, struct afs_read *); +extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, + struct afs_status_cb *, struct afs_fid *, struct afs_status_cb *); +extern int afs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, + struct afs_status_cb *); +extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, + struct afs_status_cb *, struct afs_status_cb *); +extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, + struct afs_status_cb *, struct afs_fid *, struct afs_status_cb *); extern int afs_fs_rename(struct afs_fs_cursor *, const char *, - struct afs_vnode *, const char *, u64, u64); + struct afs_vnode *, const char *, + struct afs_status_cb *, struct afs_status_cb *); extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *, - pgoff_t, pgoff_t, unsigned, unsigned); -extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *); + pgoff_t, pgoff_t, unsigned, unsigned, struct afs_status_cb *); +extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *, struct afs_status_cb *); extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *); -extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t); -extern int afs_fs_extend_lock(struct afs_fs_cursor *); -extern int afs_fs_release_lock(struct afs_fs_cursor *); +extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t, struct afs_status_cb *); +extern int afs_fs_extend_lock(struct afs_fs_cursor *, struct afs_status_cb *); +extern int afs_fs_release_lock(struct afs_fs_cursor *, struct afs_status_cb *); extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *, struct afs_addr_cursor *, struct key *); extern struct afs_call *afs_fs_get_capabilities(struct afs_net *, struct afs_server *, struct afs_addr_cursor *, struct key *, unsigned int); extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *, - struct afs_fid *, struct afs_file_status *, - struct afs_callback *, unsigned int, - struct afs_volsync *); + struct afs_fid *, struct afs_status_cb *, + unsigned int, struct afs_volsync *); extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *, - struct afs_fid *, struct afs_file_status *, - struct afs_callback *, struct afs_volsync *); + struct afs_fid *, struct afs_status_cb *, + struct afs_volsync *); struct afs_acl { u32 size; u8 data[]; }; -extern struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *); -extern int afs_fs_store_acl(struct afs_fs_cursor *, const struct afs_acl *); +extern struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *, struct afs_status_cb *); +extern int afs_fs_store_acl(struct afs_fs_cursor *, const struct afs_acl *, + struct afs_status_cb *); /* * fs_probe.c @@ -995,15 +1016,20 @@ extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long); /* * inode.c */ -extern int afs_fetch_status(struct afs_vnode *, struct key *, bool); +extern void afs_vnode_commit_status(struct afs_fs_cursor *, + struct afs_vnode *, + unsigned int, + const afs_dataversion_t *, + struct afs_status_cb *); +extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *); extern int afs_iget5_test(struct inode *, void *); extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool); extern struct inode *afs_iget(struct super_block *, struct key *, - struct afs_fid *, struct afs_file_status *, - struct afs_callback *, + struct afs_iget_data *, struct afs_status_cb *, struct afs_cb_interest *, struct afs_vnode *); extern void afs_zap_data(struct afs_vnode *); +extern bool afs_check_validity(struct afs_vnode *); extern int afs_validate(struct afs_vnode *, struct key *); extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int afs_setattr(struct dentry *, struct iattr *); @@ -1096,7 +1122,7 @@ static inline void afs_put_sysnames(struct afs_sysnames *sysnames) {} * rotate.c */ extern bool afs_begin_vnode_operation(struct afs_fs_cursor *, struct afs_vnode *, - struct key *); + struct key *, bool); extern bool afs_select_fileserver(struct afs_fs_cursor *); extern bool afs_select_current_fileserver(struct afs_fs_cursor *); extern int afs_end_vnode_operation(struct afs_fs_cursor *); @@ -1121,6 +1147,12 @@ extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); extern int afs_extract_data(struct afs_call *, bool); extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause); +static inline void afs_set_fc_call(struct afs_call *call, struct afs_fs_cursor *fc) +{ + call->intr = fc->flags & AFS_FS_CURSOR_INTR; + fc->type = call->type; +} + static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size) { call->kvec[0].iov_base = buf; @@ -1201,7 +1233,8 @@ static inline void afs_set_call_complete(struct afs_call *call, */ extern void afs_put_permits(struct afs_permits *); extern void afs_clear_permits(struct afs_vnode *); -extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int); +extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int, + struct afs_status_cb *); extern void afs_zap_permits(struct rcu_head *); extern struct key *afs_request_key(struct afs_cell *); extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *); @@ -1327,7 +1360,6 @@ extern int afs_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata); extern int afs_writepage(struct page *, struct writeback_control *); extern int afs_writepages(struct address_space *, struct writeback_control *); -extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); extern int afs_fsync(struct file *, loff_t, loff_t, int); extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf); @@ -1343,33 +1375,36 @@ extern ssize_t afs_listxattr(struct dentry *, char *, size_t); /* * yfsclient.c */ -extern int yfs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool); -extern int yfs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *); -extern int yfs_fs_create_file(struct afs_fs_cursor *, const char *, umode_t, u64, - struct afs_fid *, struct afs_file_status *, struct afs_callback *); -extern int yfs_fs_make_dir(struct afs_fs_cursor *, const char *, umode_t, u64, - struct afs_fid *, struct afs_file_status *, struct afs_callback *); -extern int yfs_fs_remove_file2(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64); -extern int yfs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64); -extern int yfs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64); -extern int yfs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64, - struct afs_fid *, struct afs_file_status *); -extern int yfs_fs_rename(struct afs_fs_cursor *, const char *, - struct afs_vnode *, const char *, u64, u64); +extern int yfs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_status_cb *, + struct afs_volsync *); +extern int yfs_fs_fetch_data(struct afs_fs_cursor *, struct afs_status_cb *, struct afs_read *); +extern int yfs_fs_create_file(struct afs_fs_cursor *, const char *, umode_t, struct afs_status_cb *, + struct afs_fid *, struct afs_status_cb *); +extern int yfs_fs_make_dir(struct afs_fs_cursor *, const char *, umode_t, struct afs_status_cb *, + struct afs_fid *, struct afs_status_cb *); +extern int yfs_fs_remove_file2(struct afs_fs_cursor *, struct afs_vnode *, const char *, + struct afs_status_cb *, struct afs_status_cb *); +extern int yfs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, + struct afs_status_cb *); +extern int yfs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, + struct afs_status_cb *, struct afs_status_cb *); +extern int yfs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, + struct afs_status_cb *, struct afs_fid *, struct afs_status_cb *); +extern int yfs_fs_rename(struct afs_fs_cursor *, const char *, struct afs_vnode *, const char *, + struct afs_status_cb *, struct afs_status_cb *); extern int yfs_fs_store_data(struct afs_fs_cursor *, struct address_space *, - pgoff_t, pgoff_t, unsigned, unsigned); -extern int yfs_fs_setattr(struct afs_fs_cursor *, struct iattr *); + pgoff_t, pgoff_t, unsigned, unsigned, struct afs_status_cb *); +extern int yfs_fs_setattr(struct afs_fs_cursor *, struct iattr *, struct afs_status_cb *); extern int yfs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *); -extern int yfs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t); -extern int yfs_fs_extend_lock(struct afs_fs_cursor *); -extern int yfs_fs_release_lock(struct afs_fs_cursor *); +extern int yfs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t, struct afs_status_cb *); +extern int yfs_fs_extend_lock(struct afs_fs_cursor *, struct afs_status_cb *); +extern int yfs_fs_release_lock(struct afs_fs_cursor *, struct afs_status_cb *); extern int yfs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *, - struct afs_fid *, struct afs_file_status *, - struct afs_callback *, struct afs_volsync *); + struct afs_fid *, struct afs_status_cb *, + struct afs_volsync *); extern int yfs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *, - struct afs_fid *, struct afs_file_status *, - struct afs_callback *, unsigned int, - struct afs_volsync *); + struct afs_fid *, struct afs_status_cb *, + unsigned int, struct afs_volsync *); struct yfs_acl { struct afs_acl *acl; /* Dir/file/symlink ACL */ @@ -1382,8 +1417,10 @@ struct yfs_acl { }; extern void yfs_free_opaque_acl(struct yfs_acl *); -extern struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *, unsigned int); -extern int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *, const struct afs_acl *); +extern struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *, struct yfs_acl *, + struct afs_status_cb *); +extern int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *, const struct afs_acl *, + struct afs_status_cb *); /* * Miscellaneous inline functions. @@ -1398,14 +1435,6 @@ static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode) return &vnode->vfs_inode; } -static inline void afs_vnode_commit_status(struct afs_fs_cursor *fc, - struct afs_vnode *vnode, - unsigned int cb_break) -{ - if (fc->ac.error == 0) - afs_cache_permit(vnode, fc->key, cb_break); -} - static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc, struct afs_vnode *vnode) { diff --git a/fs/afs/proc.c b/fs/afs/proc.c index be2ee3bbd0a9..371501d28e08 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -53,7 +53,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) seq_printf(m, "%3u %6lld %2u %s\n", atomic_read(&cell->usage), cell->dns_expiry - ktime_get_real_seconds(), - vllist ? vllist->nr_servers : 0, + vllist->nr_servers, cell->name); return 0; } @@ -296,8 +296,8 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) if (v == SEQ_START_TOKEN) { seq_printf(m, "# source %s, status %s\n", - dns_record_sources[vllist->source], - dns_lookup_statuses[vllist->status]); + dns_record_sources[vllist ? vllist->source : 0], + dns_lookup_statuses[vllist ? vllist->status : 0]); return 0; } @@ -336,7 +336,7 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) if (pos == 0) return SEQ_START_TOKEN; - if (!vllist || pos - 1 >= vllist->nr_servers) + if (pos - 1 >= vllist->nr_servers) return NULL; return &vllist->servers[pos - 1]; diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index c3ae324781f8..b00c739e0e63 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -25,7 +25,7 @@ * them here also using the io_lock. */ bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - struct key *key) + struct key *key, bool intr) { memset(fc, 0, sizeof(*fc)); fc->vnode = vnode; @@ -33,10 +33,15 @@ bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode fc->ac.error = SHRT_MAX; fc->error = -EDESTADDRREQ; - if (mutex_lock_interruptible(&vnode->io_lock) < 0) { - fc->error = -EINTR; - fc->flags |= AFS_FS_CURSOR_STOP; - return false; + if (intr) { + fc->flags |= AFS_FS_CURSOR_INTR; + if (mutex_lock_interruptible(&vnode->io_lock) < 0) { + fc->error = -EINTR; + fc->flags |= AFS_FS_CURSOR_STOP; + return false; + } + } else { + mutex_lock(&vnode->io_lock); } if (vnode->lock_state != AFS_VNODE_LOCK_NONE) @@ -61,7 +66,8 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, fc->untried = (1UL << fc->server_list->nr_servers) - 1; fc->index = READ_ONCE(fc->server_list->preferred); - cbi = vnode->cb_interest; + cbi = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->io_lock)); if (cbi) { /* See if the vnode's preferred record is still available */ for (i = 0; i < fc->server_list->nr_servers; i++) { @@ -82,8 +88,8 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, /* Note that the callback promise is effectively broken */ write_seqlock(&vnode->cb_lock); - ASSERTCMP(cbi, ==, vnode->cb_interest); - vnode->cb_interest = NULL; + ASSERTCMP(cbi, ==, rcu_access_pointer(vnode->cb_interest)); + rcu_assign_pointer(vnode->cb_interest, NULL); if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) vnode->cb_break++; write_sequnlock(&vnode->cb_lock); @@ -118,10 +124,14 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code) */ static bool afs_sleep_and_retry(struct afs_fs_cursor *fc) { - msleep_interruptible(1000); - if (signal_pending(current)) { - fc->error = -ERESTARTSYS; - return false; + if (fc->flags & AFS_FS_CURSOR_INTR) { + msleep_interruptible(1000); + if (signal_pending(current)) { + fc->error = -ERESTARTSYS; + return false; + } + } else { + msleep(1000); } return true; @@ -408,7 +418,9 @@ selected_server: if (error < 0) goto failed_set_error; - fc->cbi = afs_get_cb_interest(vnode->cb_interest); + fc->cbi = afs_get_cb_interest( + rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->io_lock))); read_lock(&server->fs_lock); alist = rcu_dereference_protected(server->addresses, @@ -459,6 +471,8 @@ no_more_servers: s->probe.abort_code); } + error = e.error; + failed_set_error: fc->error = error; failed: @@ -476,12 +490,15 @@ failed: bool afs_select_current_fileserver(struct afs_fs_cursor *fc) { struct afs_vnode *vnode = fc->vnode; - struct afs_cb_interest *cbi = vnode->cb_interest; + struct afs_cb_interest *cbi; struct afs_addr_list *alist; int error = fc->ac.error; _enter(""); + cbi = rcu_dereference_protected(vnode->cb_interest, + lockdep_is_held(&vnode->io_lock)); + switch (error) { case SHRT_MAX: if (!cbi) { @@ -490,7 +507,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc) return false; } - fc->cbi = afs_get_cb_interest(vnode->cb_interest); + fc->cbi = afs_get_cb_interest(cbi); read_lock(&cbi->server->fs_lock); alist = rcu_dereference_protected(cbi->server->addresses, diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index a34a89c75c6a..4fa5ce92b9b9 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -188,7 +188,7 @@ void afs_put_call(struct afs_call *call) if (call->type->destructor) call->type->destructor(call); - afs_put_server(call->net, call->cm_server); + afs_put_server(call->net, call->server); afs_put_cb_interest(call->net, call->cbi); afs_put_addrlist(call->alist); kfree(call->request); @@ -417,6 +417,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) afs_wake_up_async_call : afs_wake_up_call_waiter), call->upgrade, + call->intr, call->debug_id); if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); @@ -426,6 +427,10 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) call->rxcall = rxcall; + if (call->max_lifespan) + rxrpc_kernel_set_max_life(call->net->socket, rxcall, + call->max_lifespan); + /* send the request */ iov[0].iov_base = call->request; iov[0].iov_len = call->request_size; @@ -529,11 +534,11 @@ static void afs_deliver_to_call(struct afs_call *call) return; } - if (call->want_reply_time && + if (!call->have_reply_time && rxrpc_kernel_get_reply_time(call->net->socket, call->rxcall, &call->reply_time)) - call->want_reply_time = false; + call->have_reply_time = true; ret = call->type->deliver(call); state = READ_ONCE(call->state); @@ -648,7 +653,7 @@ long afs_wait_for_call_to_complete(struct afs_call *call, break; } - if (timeout == 0 && + if (call->intr && timeout == 0 && life == last_life && signal_pending(current)) { if (stalled) break; @@ -691,10 +696,9 @@ long afs_wait_for_call_to_complete(struct afs_call *call, ret = ac->error; switch (ret) { case 0: - if (call->ret_reply0) { - ret = (long)call->reply[0]; - call->reply[0] = NULL; - } + ret = call->ret0; + call->ret0 = 0; + /* Fall through */ case -ECONNABORTED: ac->responded = true; diff --git a/fs/afs/security.c b/fs/afs/security.c index 5f58a9a17e69..5d8ece98561e 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -87,11 +87,9 @@ void afs_clear_permits(struct afs_vnode *vnode) permits = rcu_dereference_protected(vnode->permit_cache, lockdep_is_held(&vnode->lock)); RCU_INIT_POINTER(vnode->permit_cache, NULL); - vnode->cb_break++; spin_unlock(&vnode->lock); - if (permits) - afs_put_permits(permits); + afs_put_permits(permits); } /* @@ -118,10 +116,10 @@ static void afs_hash_permits(struct afs_permits *permits) * as the ACL *may* have changed. */ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, - unsigned int cb_break) + unsigned int cb_break, struct afs_status_cb *scb) { struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL; - afs_access_t caller_access = READ_ONCE(vnode->status.caller_access); + afs_access_t caller_access = scb->status.caller_access; size_t size = 0; bool changed = false; int i, j; @@ -148,7 +146,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, } if (afs_cb_is_broken(cb_break, vnode, - vnode->cb_interest)) { + rcu_dereference(vnode->cb_interest))) { changed = true; break; } @@ -178,7 +176,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, } } - if (afs_cb_is_broken(cb_break, vnode, vnode->cb_interest)) + if (afs_cb_is_broken(cb_break, vnode, rcu_dereference(vnode->cb_interest))) goto someone_else_changed_it; /* We need a ref on any permits list we want to copy as we'll have to @@ -255,14 +253,16 @@ found: kfree(new); + rcu_read_lock(); spin_lock(&vnode->lock); zap = rcu_access_pointer(vnode->permit_cache); - if (!afs_cb_is_broken(cb_break, vnode, vnode->cb_interest) && + if (!afs_cb_is_broken(cb_break, vnode, rcu_dereference(vnode->cb_interest)) && zap == permits) rcu_assign_pointer(vnode->permit_cache, replacement); else zap = replacement; spin_unlock(&vnode->lock); + rcu_read_unlock(); afs_put_permits(zap); out_put: afs_put_permits(permits); @@ -322,13 +322,12 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key, */ _debug("no valid permit"); - ret = afs_fetch_status(vnode, key, false); + ret = afs_fetch_status(vnode, key, false, _access); if (ret < 0) { *_access = 0; _leave(" = %d", ret); return ret; } - *_access = vnode->status.caller_access; } _leave(" = 0 [access %x]", *_access); diff --git a/fs/afs/server.c b/fs/afs/server.c index 65b33b6da48b..52c170b59cfd 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -521,8 +521,15 @@ static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct a alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key, &server->uuid); if (IS_ERR(alist)) { - fc->ac.error = PTR_ERR(alist); - _leave(" = f [%d]", fc->ac.error); + if ((PTR_ERR(alist) == -ERESTARTSYS || + PTR_ERR(alist) == -EINTR) && + !(fc->flags & AFS_FS_CURSOR_INTR) && + server->addresses) { + _leave(" = t [intr]"); + return true; + } + fc->error = PTR_ERR(alist); + _leave(" = f [%d]", fc->error); return false; } @@ -574,7 +581,11 @@ retry: ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING, TASK_INTERRUPTIBLE); if (ret == -ERESTARTSYS) { - fc->ac.error = ret; + if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) { + _leave(" = t [intr]"); + return true; + } + fc->error = ret; _leave(" = f [intr]"); return false; } diff --git a/fs/afs/super.c b/fs/afs/super.c index 783c68cd1a35..f18911e8d770 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -426,7 +426,7 @@ static int afs_set_super(struct super_block *sb, struct fs_context *fc) static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) { struct afs_super_info *as = AFS_FS_S(sb); - struct afs_fid fid; + struct afs_iget_data iget_data; struct inode *inode = NULL; int ret; @@ -451,11 +451,13 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) } else { sprintf(sb->s_id, "%llu", as->volume->vid); afs_activate_volume(as->volume); - fid.vid = as->volume->vid; - fid.vnode = 1; - fid.vnode_hi = 0; - fid.unique = 1; - inode = afs_iget(sb, ctx->key, &fid, NULL, NULL, NULL, NULL); + iget_data.fid.vid = as->volume->vid; + iget_data.fid.vnode = 1; + iget_data.fid.vnode_hi = 0; + iget_data.fid.unique = 1; + iget_data.cb_v_break = as->volume->cb_v_break; + iget_data.cb_s_break = 0; + inode = afs_iget(sb, ctx->key, &iget_data, NULL, NULL, NULL); } if (IS_ERR(inode)) @@ -677,13 +679,12 @@ static struct inode *afs_alloc_inode(struct super_block *sb) vnode->volume = NULL; vnode->lock_key = NULL; vnode->permit_cache = NULL; - vnode->cb_interest = NULL; + RCU_INIT_POINTER(vnode->cb_interest, NULL); #ifdef CONFIG_AFS_FSCACHE vnode->cache = NULL; #endif vnode->flags = 1 << AFS_VNODE_UNSET; - vnode->cb_type = 0; vnode->lock_state = AFS_VNODE_LOCK_NONE; init_rwsem(&vnode->rmdir_lock); @@ -708,7 +709,7 @@ static void afs_destroy_inode(struct inode *inode) _debug("DESTROY INODE %p", inode); - ASSERTCMP(vnode->cb_interest, ==, NULL); + ASSERTCMP(rcu_access_pointer(vnode->cb_interest), ==, NULL); atomic_dec(&afs_count_active_inodes); } @@ -741,7 +742,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) return PTR_ERR(key); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { fc.flags |= AFS_FS_CURSOR_NO_VSLEEP; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); @@ -749,7 +750,6 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) } afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break); ret = afs_end_vnode_operation(&fc); } diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c index b4f1a84519b9..61e25010ff33 100644 --- a/fs/afs/vl_list.c +++ b/fs/afs/vl_list.c @@ -232,18 +232,16 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell, if (bs.status > NR__dns_lookup_status) bs.status = NR__dns_lookup_status; + /* See if we can update an old server record */ server = NULL; - if (previous) { - /* See if we can update an old server record */ - for (i = 0; i < previous->nr_servers; i++) { - struct afs_vlserver *p = previous->servers[i].server; - - if (p->name_len == bs.name_len && - p->port == bs.port && - strncasecmp(b, p->name, bs.name_len) == 0) { - server = afs_get_vlserver(p); - break; - } + for (i = 0; i < previous->nr_servers; i++) { + struct afs_vlserver *p = previous->servers[i].server; + + if (p->name_len == bs.name_len && + p->port == bs.port && + strncasecmp(b, p->name, bs.name_len) == 0) { + server = afs_get_vlserver(p); + break; } } diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index b05e0de04f42..beb991563939 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -33,8 +33,8 @@ static bool afs_vl_probe_done(struct afs_vlserver *server) void afs_vlserver_probe_result(struct afs_call *call) { struct afs_addr_list *alist = call->alist; - struct afs_vlserver *server = call->reply[0]; - unsigned int server_index = (long)call->reply[1]; + struct afs_vlserver *server = call->vlserver; + unsigned int server_index = call->server_index; unsigned int index = call->addr_ix; unsigned int rtt = UINT_MAX; bool have_result = false; diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index 7adde83a0648..3f845489a9f0 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -43,11 +43,29 @@ bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cel static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) { struct afs_cell *cell = vc->cell; + unsigned int dns_lookup_count; + + if (cell->dns_source == DNS_RECORD_UNAVAILABLE || + cell->dns_expiry <= ktime_get_real_seconds()) { + dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); + set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags); + queue_work(afs_wq, &cell->manager); + + if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { + if (wait_var_event_interruptible( + &cell->dns_lookup_count, + smp_load_acquire(&cell->dns_lookup_count) + != dns_lookup_count) < 0) { + vc->error = -ERESTARTSYS; + return false; + } + } - if (wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET, - TASK_INTERRUPTIBLE)) { - vc->error = -ERESTARTSYS; - return false; + /* Status load is ordered after lookup counter load */ + if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { + vc->error = -EDESTADDRREQ; + return false; + } } read_lock(&cell->vl_servers_lock); @@ -55,7 +73,7 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) rcu_dereference_protected(cell->vl_servers, lockdep_is_held(&cell->vl_servers_lock))); read_unlock(&cell->vl_servers_lock); - if (!vc->server_list || !vc->server_list->nr_servers) + if (!vc->server_list->nr_servers) return false; vc->untried = (1UL << vc->server_list->nr_servers) - 1; diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index dd9ba4e96fb3..3d4b9836a2e2 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -34,7 +34,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) /* unmarshall the reply once we've received all of it */ uvldb = call->buffer; - entry = call->reply[0]; + entry = call->ret_vldb; nr_servers = ntohl(uvldb->nServers); if (nr_servers > AFS_NMAXNSERVERS) @@ -110,7 +110,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call) { - kfree(call->reply[0]); + kfree(call->ret_vldb); afs_flat_call_destructor(call); } @@ -155,8 +155,8 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc, } call->key = vc->key; - call->reply[0] = entry; - call->ret_reply0 = true; + call->ret_vldb = entry; + call->max_lifespan = AFS_VL_MAX_LIFESPAN; /* Marshall the parameters */ bp = call->request; @@ -214,7 +214,7 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call) if (!alist) return -ENOMEM; alist->version = uniquifier; - call->reply[0] = alist; + call->ret_alist = alist; call->count = count; call->count2 = nentries; call->unmarshall++; @@ -229,7 +229,7 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call) if (ret < 0) return ret; - alist = call->reply[0]; + alist = call->ret_alist; bp = call->buffer; count = min(call->count, 4U); for (i = 0; i < count; i++) @@ -249,8 +249,7 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call) static void afs_vl_get_addrs_u_destructor(struct afs_call *call) { - afs_put_server(call->net, (struct afs_server *)call->reply[0]); - kfree(call->reply[1]); + afs_put_addrlist(call->ret_alist); return afs_flat_call_destructor(call); } @@ -287,8 +286,8 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc, return ERR_PTR(-ENOMEM); call->key = vc->key; - call->reply[0] = NULL; - call->ret_reply0 = true; + call->ret_alist = NULL; + call->max_lifespan = AFS_VL_MAX_LIFESPAN; /* Marshall the parameters */ bp = call->request; @@ -358,9 +357,7 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call) static void afs_destroy_vl_get_capabilities(struct afs_call *call) { - struct afs_vlserver *server = call->reply[0]; - - afs_put_vlserver(call->net, server); + afs_put_vlserver(call->net, call->vlserver); afs_flat_call_destructor(call); } @@ -398,11 +395,11 @@ struct afs_call *afs_vl_get_capabilities(struct afs_net *net, return ERR_PTR(-ENOMEM); call->key = key; - call->reply[0] = afs_get_vlserver(server); - call->reply[1] = (void *)(long)server_index; + call->vlserver = afs_get_vlserver(server); + call->server_index = server_index; call->upgrade = true; - call->want_reply_time = true; call->async = true; + call->max_lifespan = AFS_PROBE_MAX_LIFESPAN; /* marshall the parameters */ bp = call->request; @@ -460,7 +457,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) if (!alist) return -ENOMEM; alist->version = uniquifier; - call->reply[0] = alist; + call->ret_alist = alist; if (call->count == 0) goto extract_volendpoints; @@ -488,7 +485,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) if (ret < 0) return ret; - alist = call->reply[0]; + alist = call->ret_alist; bp = call->buffer; switch (call->count2) { case YFS_ENDPOINT_IPV4: @@ -609,7 +606,6 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) break; } - alist = call->reply[0]; _leave(" = 0 [done]"); return 0; } @@ -644,8 +640,8 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc, return ERR_PTR(-ENOMEM); call->key = vc->key; - call->reply[0] = NULL; - call->ret_reply0 = true; + call->ret_alist = NULL; + call->max_lifespan = AFS_VL_MAX_LIFESPAN; /* Marshall the parameters */ bp = call->request; diff --git a/fs/afs/write.c b/fs/afs/write.c index 0122d7445fba..8bcab95f1127 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -314,6 +314,46 @@ static void afs_redirty_pages(struct writeback_control *wbc, } /* + * completion of write to server + */ +static void afs_pages_written_back(struct afs_vnode *vnode, + pgoff_t first, pgoff_t last) +{ + struct pagevec pv; + unsigned long priv; + unsigned count, loop; + + _enter("{%llx:%llu},{%lx-%lx}", + vnode->fid.vid, vnode->fid.vnode, first, last); + + pagevec_init(&pv); + + do { + _debug("done %lx-%lx", first, last); + + count = last - first + 1; + if (count > PAGEVEC_SIZE) + count = PAGEVEC_SIZE; + pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, + first, count, pv.pages); + ASSERTCMP(pv.nr, ==, count); + + for (loop = 0; loop < count; loop++) { + priv = page_private(pv.pages[loop]); + trace_afs_page_dirty(vnode, tracepoint_string("clear"), + pv.pages[loop]->index, priv); + set_page_private(pv.pages[loop], 0); + end_page_writeback(pv.pages[loop]); + } + first += count; + __pagevec_release(&pv); + } while (first <= last); + + afs_prune_wb_keys(vnode); + _leave(""); +} + +/* * write to a file */ static int afs_store_data(struct address_space *mapping, @@ -322,6 +362,7 @@ static int afs_store_data(struct address_space *mapping, { struct afs_vnode *vnode = AFS_FS_I(mapping->host); struct afs_fs_cursor fc; + struct afs_status_cb *scb; struct afs_wb_key *wbk = NULL; struct list_head *p; int ret = -ENOKEY, ret2; @@ -333,6 +374,10 @@ static int afs_store_data(struct address_space *mapping, vnode->fid.unique, first, last, offset, to); + scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS); + if (!scb) + return -ENOMEM; + spin_lock(&vnode->wb_lock); p = vnode->wb_keys.next; @@ -351,6 +396,7 @@ try_next_key: spin_unlock(&vnode->wb_lock); afs_put_wb_key(wbk); + kfree(scb); _leave(" = %d [no keys]", ret); return ret; @@ -361,14 +407,19 @@ found_key: _debug("USE WB KEY %u", key_serial(wbk->key)); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) { + if (afs_begin_vnode_operation(&fc, vnode, wbk->key, false)) { + afs_dataversion_t data_version = vnode->status.data_version + 1; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_store_data(&fc, mapping, first, last, offset, to); + afs_fs_store_data(&fc, mapping, first, last, offset, to, scb); } - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break); + afs_check_for_remote_deletion(&fc, vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break, + &data_version, scb); + if (fc.ac.error == 0) + afs_pages_written_back(vnode, first, last); ret = afs_end_vnode_operation(&fc); } @@ -393,6 +444,7 @@ found_key: } afs_put_wb_key(wbk); + kfree(scb); _leave(" = %d", ret); return ret; } @@ -679,46 +731,6 @@ int afs_writepages(struct address_space *mapping, } /* - * completion of write to server - */ -void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) -{ - struct pagevec pv; - unsigned long priv; - unsigned count, loop; - pgoff_t first = call->first, last = call->last; - - _enter("{%llx:%llu},{%lx-%lx}", - vnode->fid.vid, vnode->fid.vnode, first, last); - - pagevec_init(&pv); - - do { - _debug("done %lx-%lx", first, last); - - count = last - first + 1; - if (count > PAGEVEC_SIZE) - count = PAGEVEC_SIZE; - pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, - first, count, pv.pages); - ASSERTCMP(pv.nr, ==, count); - - for (loop = 0; loop < count; loop++) { - priv = page_private(pv.pages[loop]); - trace_afs_page_dirty(vnode, tracepoint_string("clear"), - pv.pages[loop]->index, priv); - set_page_private(pv.pages[loop], 0); - end_page_writeback(pv.pages[loop]); - } - first += count; - __pagevec_release(&pv); - } while (first <= last); - - afs_prune_wb_keys(vnode); - _leave(""); -} - -/* * write to an AFS file */ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index c81f85003fc7..17f58fea7ec1 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -47,40 +47,52 @@ static int afs_xattr_get_acl(const struct xattr_handler *handler, void *buffer, size_t size) { struct afs_fs_cursor fc; + struct afs_status_cb *scb; struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_acl *acl = NULL; struct key *key; - int ret; + int ret = -ENOMEM; + + scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS); + if (!scb) + goto error; key = afs_request_key(vnode->volume->cell); - if (IS_ERR(key)) - return PTR_ERR(key); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto error_scb; + } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { + afs_dataversion_t data_version = vnode->status.data_version; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); - acl = afs_fs_fetch_acl(&fc); + acl = afs_fs_fetch_acl(&fc, scb); } afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break); + afs_vnode_commit_status(&fc, vnode, fc.cb_break, + &data_version, scb); ret = afs_end_vnode_operation(&fc); } if (ret == 0) { ret = acl->size; if (size > 0) { - ret = -ERANGE; - if (acl->size > size) - return -ERANGE; - memcpy(buffer, acl->data, acl->size); - ret = acl->size; + if (acl->size <= size) + memcpy(buffer, acl->data, acl->size); + else + ret = -ERANGE; } kfree(acl); } key_put(key); +error_scb: + kfree(scb); +error: return ret; } @@ -93,41 +105,53 @@ static int afs_xattr_set_acl(const struct xattr_handler *handler, const void *buffer, size_t size, int flags) { struct afs_fs_cursor fc; + struct afs_status_cb *scb; struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_acl *acl = NULL; struct key *key; - int ret; + int ret = -ENOMEM; if (flags == XATTR_CREATE) return -EINVAL; - key = afs_request_key(vnode->volume->cell); - if (IS_ERR(key)) - return PTR_ERR(key); + scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS); + if (!scb) + goto error; acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL); - if (!acl) { - key_put(key); - return -ENOMEM; + if (!acl) + goto error_scb; + + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto error_acl; } acl->size = size; memcpy(acl->data, buffer, size); ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { + afs_dataversion_t data_version = vnode->status.data_version; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_store_acl(&fc, acl); + afs_fs_store_acl(&fc, acl, scb); } afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break); + afs_vnode_commit_status(&fc, vnode, fc.cb_break, + &data_version, scb); ret = afs_end_vnode_operation(&fc); } - kfree(acl); key_put(key); +error_acl: + kfree(acl); +error_scb: + kfree(scb); +error: return ret; } @@ -146,12 +170,12 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler, void *buffer, size_t size) { struct afs_fs_cursor fc; + struct afs_status_cb *scb; struct afs_vnode *vnode = AFS_FS_I(inode); struct yfs_acl *yacl = NULL; struct key *key; - unsigned int flags = 0; char buf[16], *data; - int which = 0, dsize, ret; + int which = 0, dsize, ret = -ENOMEM; if (strcmp(name, "acl") == 0) which = 0; @@ -164,65 +188,81 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler, else return -EOPNOTSUPP; + yacl = kzalloc(sizeof(struct yfs_acl), GFP_KERNEL); + if (!yacl) + goto error; + if (which == 0) - flags |= YFS_ACL_WANT_ACL; + yacl->flags |= YFS_ACL_WANT_ACL; else if (which == 3) - flags |= YFS_ACL_WANT_VOL_ACL; + yacl->flags |= YFS_ACL_WANT_VOL_ACL; + + scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS); + if (!scb) + goto error_yacl; key = afs_request_key(vnode->volume->cell); - if (IS_ERR(key)) - return PTR_ERR(key); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto error_scb; + } ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { + afs_dataversion_t data_version = vnode->status.data_version; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); - yacl = yfs_fs_fetch_opaque_acl(&fc, flags); + yfs_fs_fetch_opaque_acl(&fc, yacl, scb); } afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break); + afs_vnode_commit_status(&fc, vnode, fc.cb_break, + &data_version, scb); ret = afs_end_vnode_operation(&fc); } - if (ret == 0) { - switch (which) { - case 0: - data = yacl->acl->data; - dsize = yacl->acl->size; - break; - case 1: - data = buf; - dsize = snprintf(buf, sizeof(buf), "%u", - yacl->inherit_flag); - break; - case 2: - data = buf; - dsize = snprintf(buf, sizeof(buf), "%u", - yacl->num_cleaned); - break; - case 3: - data = yacl->vol_acl->data; - dsize = yacl->vol_acl->size; - break; - default: - ret = -EOPNOTSUPP; - goto out; - } + if (ret < 0) + goto error_key; + + switch (which) { + case 0: + data = yacl->acl->data; + dsize = yacl->acl->size; + break; + case 1: + data = buf; + dsize = snprintf(buf, sizeof(buf), "%u", yacl->inherit_flag); + break; + case 2: + data = buf; + dsize = snprintf(buf, sizeof(buf), "%u", yacl->num_cleaned); + break; + case 3: + data = yacl->vol_acl->data; + dsize = yacl->vol_acl->size; + break; + default: + ret = -EOPNOTSUPP; + goto error_key; + } - ret = dsize; - if (size > 0) { - if (dsize > size) { - ret = -ERANGE; - goto out; - } - memcpy(buffer, data, dsize); + ret = dsize; + if (size > 0) { + if (dsize > size) { + ret = -ERANGE; + goto error_key; } + memcpy(buffer, data, dsize); } -out: - yfs_free_opaque_acl(yacl); +error_key: key_put(key); +error_scb: + kfree(scb); +error_yacl: + yfs_free_opaque_acl(yacl); +error: return ret; } @@ -235,42 +275,54 @@ static int afs_xattr_set_yfs(const struct xattr_handler *handler, const void *buffer, size_t size, int flags) { struct afs_fs_cursor fc; + struct afs_status_cb *scb; struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_acl *acl = NULL; struct key *key; - int ret; + int ret = -ENOMEM; if (flags == XATTR_CREATE || strcmp(name, "acl") != 0) return -EINVAL; - key = afs_request_key(vnode->volume->cell); - if (IS_ERR(key)) - return PTR_ERR(key); + scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS); + if (!scb) + goto error; acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL); - if (!acl) { - key_put(key); - return -ENOMEM; - } + if (!acl) + goto error_scb; acl->size = size; memcpy(acl->data, buffer, size); + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto error_acl; + } + ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key)) { + if (afs_begin_vnode_operation(&fc, vnode, key, true)) { + afs_dataversion_t data_version = vnode->status.data_version; + while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(vnode); - yfs_fs_store_opaque_acl2(&fc, acl); + yfs_fs_store_opaque_acl2(&fc, acl, scb); } afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break); + afs_vnode_commit_status(&fc, vnode, fc.cb_break, + &data_version, scb); ret = afs_end_vnode_operation(&fc); } +error_acl: kfree(acl); key_put(key); +error_scb: + kfree(scb); +error: return ret; } diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 6cf7d161baa1..10de675dc6fc 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -183,24 +183,19 @@ static void xdr_dump_bad(const __be32 *bp) /* * Decode a YFSFetchStatus block */ -static int xdr_decode_YFSFetchStatus(struct afs_call *call, - const __be32 **_bp, - struct afs_file_status *status, - struct afs_vnode *vnode, - const afs_dataversion_t *expected_version, - struct afs_read *read_req) +static int xdr_decode_YFSFetchStatus(const __be32 **_bp, + struct afs_call *call, + struct afs_status_cb *scb) { const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp; + struct afs_file_status *status = &scb->status; u32 type; - u8 flags = 0; status->abort_code = ntohl(xdr->abort_code); if (status->abort_code != 0) { - if (vnode && status->abort_code == VNOVNODE) { - set_bit(AFS_VNODE_DELETED, &vnode->flags); + if (status->abort_code == VNOVNODE) status->nlink = 0; - __afs_break_callback(vnode); - } + scb->have_error = true; return 0; } @@ -209,77 +204,28 @@ static int xdr_decode_YFSFetchStatus(struct afs_call *call, case AFS_FTYPE_FILE: case AFS_FTYPE_DIR: case AFS_FTYPE_SYMLINK: - if (type != status->type && - vnode && - !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { - pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n", - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - status->type, type); - goto bad; - } status->type = type; break; default: goto bad; } -#define EXTRACT_M4(FIELD) \ - do { \ - u32 x = ntohl(xdr->FIELD); \ - if (status->FIELD != x) { \ - flags |= AFS_VNODE_META_CHANGED; \ - status->FIELD = x; \ - } \ - } while (0) - -#define EXTRACT_M8(FIELD) \ - do { \ - u64 x = xdr_to_u64(xdr->FIELD); \ - if (status->FIELD != x) { \ - flags |= AFS_VNODE_META_CHANGED; \ - status->FIELD = x; \ - } \ - } while (0) - -#define EXTRACT_D8(FIELD) \ - do { \ - u64 x = xdr_to_u64(xdr->FIELD); \ - if (status->FIELD != x) { \ - flags |= AFS_VNODE_DATA_CHANGED; \ - status->FIELD = x; \ - } \ - } while (0) - - EXTRACT_M4(nlink); - EXTRACT_D8(size); - EXTRACT_D8(data_version); - EXTRACT_M8(author); - EXTRACT_M8(owner); - EXTRACT_M8(group); - EXTRACT_M4(mode); - EXTRACT_M4(caller_access); /* call ticket dependent */ - EXTRACT_M4(anon_access); - - status->mtime_client = xdr_to_time(xdr->mtime_client); - status->mtime_server = xdr_to_time(xdr->mtime_server); - status->lock_count = ntohl(xdr->lock_count); - - if (read_req) { - read_req->data_version = status->data_version; - read_req->file_size = status->size; - } + status->nlink = ntohl(xdr->nlink); + status->author = xdr_to_u64(xdr->author); + status->owner = xdr_to_u64(xdr->owner); + status->caller_access = ntohl(xdr->caller_access); /* Ticket dependent */ + status->anon_access = ntohl(xdr->anon_access); + status->mode = ntohl(xdr->mode) & S_IALLUGO; + status->group = xdr_to_u64(xdr->group); + status->lock_count = ntohl(xdr->lock_count); + + status->mtime_client = xdr_to_time(xdr->mtime_client); + status->mtime_server = xdr_to_time(xdr->mtime_server); + status->size = xdr_to_u64(xdr->size); + status->data_version = xdr_to_u64(xdr->data_version); + scb->have_status = true; *_bp += xdr_size(xdr); - - if (vnode) { - if (test_bit(AFS_VNODE_UNSET, &vnode->flags)) - flags |= AFS_VNODE_NOT_YET_SET; - afs_update_inode_from_status(vnode, status, expected_version, - flags); - } - return 0; bad: @@ -288,73 +234,20 @@ bad: } /* - * Decode the file status. We need to lock the target vnode if we're going to - * update its status so that stat() sees the attributes update atomically. - */ -static int yfs_decode_status(struct afs_call *call, - const __be32 **_bp, - struct afs_file_status *status, - struct afs_vnode *vnode, - const afs_dataversion_t *expected_version, - struct afs_read *read_req) -{ - int ret; - - if (!vnode) - return xdr_decode_YFSFetchStatus(call, _bp, status, vnode, - expected_version, read_req); - - write_seqlock(&vnode->cb_lock); - ret = xdr_decode_YFSFetchStatus(call, _bp, status, vnode, - expected_version, read_req); - write_sequnlock(&vnode->cb_lock); - return ret; -} - -/* * Decode a YFSCallBack block */ -static void xdr_decode_YFSCallBack(struct afs_call *call, - struct afs_vnode *vnode, - const __be32 **_bp) -{ - struct yfs_xdr_YFSCallBack *xdr = (void *)*_bp; - struct afs_cb_interest *old, *cbi = call->cbi; - u64 cb_expiry; - - write_seqlock(&vnode->cb_lock); - - if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) { - cb_expiry = xdr_to_u64(xdr->expiration_time); - do_div(cb_expiry, 10 * 1000 * 1000); - vnode->cb_version = ntohl(xdr->version); - vnode->cb_type = ntohl(xdr->type); - vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds(); - old = vnode->cb_interest; - if (old != call->cbi) { - vnode->cb_interest = cbi; - cbi = old; - } - set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - } - - write_sequnlock(&vnode->cb_lock); - call->cbi = cbi; - *_bp += xdr_size(xdr); -} - -static void xdr_decode_YFSCallBack_raw(const __be32 **_bp, - struct afs_callback *cb) +static void xdr_decode_YFSCallBack(const __be32 **_bp, + struct afs_call *call, + struct afs_status_cb *scb) { struct yfs_xdr_YFSCallBack *x = (void *)*_bp; - u64 cb_expiry; - - cb_expiry = xdr_to_u64(x->expiration_time); - do_div(cb_expiry, 10 * 1000 * 1000); - cb->version = ntohl(x->version); - cb->type = ntohl(x->type); - cb->expires_at = cb_expiry + ktime_get_real_seconds(); + struct afs_callback *cb = &scb->callback; + ktime_t cb_expiry; + cb_expiry = call->reply_time; + cb_expiry = ktime_add(cb_expiry, xdr_to_u64(x->expiration_time) * 100); + cb->expires_at = ktime_divns(cb_expiry, NSEC_PER_SEC); + scb->have_cb = true; *_bp += xdr_size(x); } @@ -442,11 +335,10 @@ static void xdr_decode_YFSFetchVolumeStatus(const __be32 **_bp, } /* - * deliver reply data to an FS.FetchStatus + * Deliver a reply that's a status, callback and volsync. */ -static int yfs_deliver_fs_fetch_status_vnode(struct afs_call *call) +static int yfs_deliver_fs_status_cb_and_volsync(struct afs_call *call) { - struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -454,16 +346,36 @@ static int yfs_deliver_fs_fetch_status_vnode(struct afs_call *call) if (ret < 0) return ret; - _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); - /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = yfs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL); + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - xdr_decode_YFSCallBack(call, vnode, &bp); - xdr_decode_YFSVolSync(&bp, call->reply[1]); + xdr_decode_YFSCallBack(&bp, call, call->out_scb); + xdr_decode_YFSVolSync(&bp, call->out_volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * Deliver reply data to operations that just return a file status and a volume + * sync record. + */ +static int yfs_deliver_status_and_volsync(struct afs_call *call) +{ + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + bp = call->buffer; + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); + if (ret < 0) + return ret; + xdr_decode_YFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -475,15 +387,15 @@ static int yfs_deliver_fs_fetch_status_vnode(struct afs_call *call) static const struct afs_call_type yfs_RXYFSFetchStatus_vnode = { .name = "YFS.FetchStatus(vnode)", .op = yfs_FS_FetchStatus, - .deliver = yfs_deliver_fs_fetch_status_vnode, + .deliver = yfs_deliver_fs_status_cb_and_volsync, .destructor = afs_flat_call_destructor, }; /* * Fetch the status information for a file. */ -int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync, - bool new_inode) +int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_status_cb *scb, + struct afs_volsync *volsync) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -505,9 +417,8 @@ int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy } call->key = fc->key; - call->reply[0] = vnode; - call->reply[1] = volsync; - call->expected_version = new_inode ? 1 : vnode->status.data_version; + call->out_scb = scb; + call->out_volsync = volsync; /* marshall the parameters */ bp = call->request; @@ -516,9 +427,9 @@ int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy bp = xdr_encode_YFSFid(bp, &vnode->fid); yfs_check_req(call, bp); - call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -528,8 +439,7 @@ int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy */ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) { - struct afs_vnode *vnode = call->reply[0]; - struct afs_read *req = call->reply[2]; + struct afs_read *req = call->read_request; const __be32 *bp; unsigned int size; int ret; @@ -586,7 +496,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) if (req->offset == PAGE_SIZE) { req->offset = 0; if (req->page_done) - req->page_done(call, req); + req->page_done(req); req->index++; if (req->remain > 0) goto begin_page; @@ -623,12 +533,14 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) return ret; bp = call->buffer; - ret = yfs_decode_status(call, &bp, &vnode->status, vnode, - &vnode->status.data_version, req); + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - xdr_decode_YFSCallBack(call, vnode, &bp); - xdr_decode_YFSVolSync(&bp, call->reply[1]); + xdr_decode_YFSCallBack(&bp, call, call->out_scb); + xdr_decode_YFSVolSync(&bp, call->out_volsync); + + req->data_version = call->out_scb->status.data_version; + req->file_size = call->out_scb->status.size; call->unmarshall++; @@ -642,7 +554,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) zero_user_segment(req->pages[req->index], req->offset, PAGE_SIZE); if (req->page_done) - req->page_done(call, req); + req->page_done(req); req->offset = 0; } @@ -652,9 +564,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) static void yfs_fetch_data_destructor(struct afs_call *call) { - struct afs_read *req = call->reply[2]; - - afs_put_read(req); + afs_put_read(call->read_request); afs_flat_call_destructor(call); } @@ -671,7 +581,8 @@ static const struct afs_call_type yfs_RXYFSFetchData64 = { /* * Fetch data from a file. */ -int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) +int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_status_cb *scb, + struct afs_read *req) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -693,11 +604,9 @@ int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->reply[1] = NULL; /* volsync */ - call->reply[2] = req; - call->expected_version = vnode->status.data_version; - call->want_reply_time = true; + call->out_scb = scb; + call->out_volsync = NULL; + call->read_request = req; /* marshall the parameters */ bp = call->request; @@ -709,9 +618,9 @@ int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) yfs_check_req(call, bp); refcount_inc(&req->usage); - call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -721,7 +630,6 @@ int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) */ static int yfs_deliver_fs_create_vnode(struct afs_call *call) { - struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -733,16 +641,15 @@ static int yfs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_YFSFid(&bp, call->reply[1]); - ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL); + xdr_decode_YFSFid(&bp, call->out_fid); + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - ret = yfs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL); + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - xdr_decode_YFSCallBack_raw(&bp, call->reply[3]); - xdr_decode_YFSVolSync(&bp, NULL); + xdr_decode_YFSCallBack(&bp, call, call->out_scb); + xdr_decode_YFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -764,14 +671,13 @@ static const struct afs_call_type afs_RXFSCreateFile = { int yfs_fs_create_file(struct afs_fs_cursor *fc, const char *name, umode_t mode, - u64 current_data_version, + struct afs_status_cb *dvnode_scb, struct afs_fid *newfid, - struct afs_file_status *newstatus, - struct afs_callback *newcb) + struct afs_status_cb *new_scb) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + struct afs_net *net = afs_v2net(dvnode); size_t namesz, reqsz, rplsz; __be32 *bp; @@ -795,24 +701,23 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc, return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->reply[1] = newfid; - call->reply[2] = newstatus; - call->reply[3] = newcb; - call->expected_version = current_data_version + 1; + call->out_dir_scb = dvnode_scb; + call->out_fid = newfid; + call->out_scb = new_scb; /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSCREATEFILE); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFSFid(bp, &dvnode->fid); bp = xdr_encode_string(bp, name, namesz); bp = xdr_encode_YFSStoreStatus_mode(bp, mode); bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */ yfs_check_req(call, bp); afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &vnode->fid, name); + trace_afs_make_fs_call1(call, &dvnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -830,14 +735,13 @@ static const struct afs_call_type yfs_RXFSMakeDir = { int yfs_fs_make_dir(struct afs_fs_cursor *fc, const char *name, umode_t mode, - u64 current_data_version, + struct afs_status_cb *dvnode_scb, struct afs_fid *newfid, - struct afs_file_status *newstatus, - struct afs_callback *newcb) + struct afs_status_cb *new_scb) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + struct afs_net *net = afs_v2net(dvnode); size_t namesz, reqsz, rplsz; __be32 *bp; @@ -860,23 +764,22 @@ int yfs_fs_make_dir(struct afs_fs_cursor *fc, return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->reply[1] = newfid; - call->reply[2] = newstatus; - call->reply[3] = newcb; - call->expected_version = current_data_version + 1; + call->out_dir_scb = dvnode_scb; + call->out_fid = newfid; + call->out_scb = new_scb; /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSMAKEDIR); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFSFid(bp, &dvnode->fid); bp = xdr_encode_string(bp, name, namesz); bp = xdr_encode_YFSStoreStatus_mode(bp, mode); yfs_check_req(call, bp); afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &vnode->fid, name); + trace_afs_make_fs_call1(call, &dvnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -886,8 +789,6 @@ int yfs_fs_make_dir(struct afs_fs_cursor *fc, */ static int yfs_deliver_fs_remove_file2(struct afs_call *call) { - struct afs_vnode *dvnode = call->reply[0]; - struct afs_vnode *vnode = call->reply[1]; struct afs_fid fid; const __be32 *bp; int ret; @@ -898,20 +799,18 @@ static int yfs_deliver_fs_remove_file2(struct afs_call *call) if (ret < 0) return ret; - /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode, - &call->expected_version, NULL); + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; xdr_decode_YFSFid(&bp, &fid); - ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL); + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; /* Was deleted if vnode->status.abort_code == VNOVNODE. */ - xdr_decode_YFSVolSync(&bp, NULL); + xdr_decode_YFSVolSync(&bp, call->out_volsync); return 0; } @@ -929,7 +828,8 @@ static const struct afs_call_type yfs_RXYFSRemoveFile2 = { * Remove a file and retrieve new file status. */ int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - const char *name, u64 current_data_version) + const char *name, struct afs_status_cb *dvnode_scb, + struct afs_status_cb *vnode_scb) { struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; @@ -954,9 +854,8 @@ int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode, return -ENOMEM; call->key = fc->key; - call->reply[0] = dvnode; - call->reply[1] = vnode; - call->expected_version = current_data_version + 1; + call->out_dir_scb = dvnode_scb; + call->out_scb = vnode_scb; /* marshall the parameters */ bp = call->request; @@ -968,6 +867,7 @@ int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &dvnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -977,7 +877,6 @@ int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode, */ static int yfs_deliver_fs_remove(struct afs_call *call) { - struct afs_vnode *dvnode = call->reply[0]; const __be32 *bp; int ret; @@ -987,14 +886,12 @@ static int yfs_deliver_fs_remove(struct afs_call *call) if (ret < 0) return ret; - /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode, - &call->expected_version, NULL); + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - xdr_decode_YFSVolSync(&bp, NULL); + xdr_decode_YFSVolSync(&bp, call->out_volsync); return 0; } @@ -1019,7 +916,8 @@ static const struct afs_call_type yfs_RXYFSRemoveDir = { * remove a file or directory */ int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - const char *name, bool isdir, u64 current_data_version) + const char *name, bool isdir, + struct afs_status_cb *dvnode_scb) { struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; @@ -1042,9 +940,7 @@ int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, return -ENOMEM; call->key = fc->key; - call->reply[0] = dvnode; - call->reply[1] = vnode; - call->expected_version = current_data_version + 1; + call->out_dir_scb = dvnode_scb; /* marshall the parameters */ bp = call->request; @@ -1056,6 +952,7 @@ int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &dvnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1065,7 +962,6 @@ int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, */ static int yfs_deliver_fs_link(struct afs_call *call) { - struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1]; const __be32 *bp; int ret; @@ -1075,16 +971,14 @@ static int yfs_deliver_fs_link(struct afs_call *call) if (ret < 0) return ret; - /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL); + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode, - &call->expected_version, NULL); + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - xdr_decode_YFSVolSync(&bp, NULL); + xdr_decode_YFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; } @@ -1103,7 +997,9 @@ static const struct afs_call_type yfs_RXYFSLink = { * Make a hard link. */ int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - const char *name, u64 current_data_version) + const char *name, + struct afs_status_cb *dvnode_scb, + struct afs_status_cb *vnode_scb) { struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; @@ -1127,9 +1023,8 @@ int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, return -ENOMEM; call->key = fc->key; - call->reply[0] = dvnode; - call->reply[1] = vnode; - call->expected_version = current_data_version + 1; + call->out_dir_scb = dvnode_scb; + call->out_scb = vnode_scb; /* marshall the parameters */ bp = call->request; @@ -1142,6 +1037,7 @@ int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &vnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1151,7 +1047,6 @@ int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, */ static int yfs_deliver_fs_symlink(struct afs_call *call) { - struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -1163,15 +1058,14 @@ static int yfs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_YFSFid(&bp, call->reply[1]); - ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL); + xdr_decode_YFSFid(&bp, call->out_fid); + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - ret = yfs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL); + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - xdr_decode_YFSVolSync(&bp, NULL); + xdr_decode_YFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -1193,9 +1087,9 @@ static const struct afs_call_type yfs_RXYFSSymlink = { int yfs_fs_symlink(struct afs_fs_cursor *fc, const char *name, const char *contents, - u64 current_data_version, + struct afs_status_cb *dvnode_scb, struct afs_fid *newfid, - struct afs_file_status *newstatus) + struct afs_status_cb *vnode_scb) { struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; @@ -1222,10 +1116,9 @@ int yfs_fs_symlink(struct afs_fs_cursor *fc, return -ENOMEM; call->key = fc->key; - call->reply[0] = dvnode; - call->reply[1] = newfid; - call->reply[2] = newstatus; - call->expected_version = current_data_version + 1; + call->out_dir_scb = dvnode_scb; + call->out_fid = newfid; + call->out_scb = vnode_scb; /* marshall the parameters */ bp = call->request; @@ -1239,6 +1132,7 @@ int yfs_fs_symlink(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call1(call, &dvnode->fid, name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1248,8 +1142,6 @@ int yfs_fs_symlink(struct afs_fs_cursor *fc, */ static int yfs_deliver_fs_rename(struct afs_call *call) { - struct afs_vnode *orig_dvnode = call->reply[0]; - struct afs_vnode *new_dvnode = call->reply[1]; const __be32 *bp; int ret; @@ -1259,20 +1151,17 @@ static int yfs_deliver_fs_rename(struct afs_call *call) if (ret < 0) return ret; - /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = yfs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode, - &call->expected_version, NULL); + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - if (new_dvnode != orig_dvnode) { - ret = yfs_decode_status(call, &bp, &new_dvnode->status, new_dvnode, - &call->expected_version_2, NULL); + if (call->out_dir_scb != call->out_scb) { + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; } - xdr_decode_YFSVolSync(&bp, NULL); + xdr_decode_YFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; } @@ -1294,8 +1183,8 @@ int yfs_fs_rename(struct afs_fs_cursor *fc, const char *orig_name, struct afs_vnode *new_dvnode, const char *new_name, - u64 current_orig_data_version, - u64 current_new_data_version) + struct afs_status_cb *orig_dvnode_scb, + struct afs_status_cb *new_dvnode_scb) { struct afs_vnode *orig_dvnode = fc->vnode; struct afs_call *call; @@ -1321,10 +1210,8 @@ int yfs_fs_rename(struct afs_fs_cursor *fc, return -ENOMEM; call->key = fc->key; - call->reply[0] = orig_dvnode; - call->reply[1] = new_dvnode; - call->expected_version = current_orig_data_version + 1; - call->expected_version_2 = current_new_data_version + 1; + call->out_dir_scb = orig_dvnode_scb; + call->out_scb = new_dvnode_scb; /* marshall the parameters */ bp = call->request; @@ -1338,46 +1225,18 @@ int yfs_fs_rename(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } /* - * Deliver reply data to a YFS.StoreData64 operation. - */ -static int yfs_deliver_fs_store_data(struct afs_call *call) -{ - struct afs_vnode *vnode = call->reply[0]; - const __be32 *bp; - int ret; - - _enter(""); - - ret = afs_transfer_reply(call); - if (ret < 0) - return ret; - - /* unmarshall the reply once we've received all of it */ - bp = call->buffer; - ret = yfs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL); - if (ret < 0) - return ret; - xdr_decode_YFSVolSync(&bp, NULL); - - afs_pages_written_back(vnode, call); - - _leave(" = 0 [done]"); - return 0; -} - -/* * YFS.StoreData64 operation type. */ static const struct afs_call_type yfs_RXYFSStoreData64 = { .name = "YFS.StoreData64", .op = yfs_FS_StoreData64, - .deliver = yfs_deliver_fs_store_data, + .deliver = yfs_deliver_status_and_volsync, .destructor = afs_flat_call_destructor, }; @@ -1386,7 +1245,8 @@ static const struct afs_call_type yfs_RXYFSStoreData64 = { */ int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, pgoff_t first, pgoff_t last, - unsigned offset, unsigned to) + unsigned offset, unsigned to, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1424,13 +1284,12 @@ int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, call->key = fc->key; call->mapping = mapping; - call->reply[0] = vnode; call->first = first; call->last = last; call->first_offset = offset; call->last_to = to; call->send_pages = true; - call->expected_version = vnode->status.data_version + 1; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1445,51 +1304,25 @@ int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } /* - * deliver reply data to an FS.StoreStatus - */ -static int yfs_deliver_fs_store_status(struct afs_call *call) -{ - struct afs_vnode *vnode = call->reply[0]; - const __be32 *bp; - int ret; - - _enter(""); - - ret = afs_transfer_reply(call); - if (ret < 0) - return ret; - - /* unmarshall the reply once we've received all of it */ - bp = call->buffer; - ret = yfs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL); - if (ret < 0) - return ret; - xdr_decode_YFSVolSync(&bp, NULL); - - _leave(" = 0 [done]"); - return 0; -} - -/* * YFS.StoreStatus operation type */ static const struct afs_call_type yfs_RXYFSStoreStatus = { .name = "YFS.StoreStatus", .op = yfs_FS_StoreStatus, - .deliver = yfs_deliver_fs_store_status, + .deliver = yfs_deliver_status_and_volsync, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = { .name = "YFS.StoreData64", .op = yfs_FS_StoreData64, - .deliver = yfs_deliver_fs_store_status, + .deliver = yfs_deliver_status_and_volsync, .destructor = afs_flat_call_destructor, }; @@ -1497,7 +1330,8 @@ static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = { * Set the attributes on a file, using YFS.StoreData64 rather than * YFS.StoreStatus so as to alter the file size also. */ -static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) +static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1518,8 +1352,7 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->expected_version = vnode->status.data_version + 1; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1534,6 +1367,7 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1542,7 +1376,8 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) * Set the attributes on a file, using YFS.StoreData64 if there's a change in * file size, and YFS.StoreStatus otherwise. */ -int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) +int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1550,7 +1385,7 @@ int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) __be32 *bp; if (attr->ia_valid & ATTR_SIZE) - return yfs_fs_setattr_size(fc, attr); + return yfs_fs_setattr_size(fc, attr, scb); _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); @@ -1565,8 +1400,7 @@ int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->expected_version = vnode->status.data_version; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1578,6 +1412,7 @@ int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1607,7 +1442,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) return ret; bp = call->buffer; - xdr_decode_YFSFetchVolumeStatus(&bp, call->reply[1]); + xdr_decode_YFSFetchVolumeStatus(&bp, call->out_volstatus); call->unmarshall++; afs_extract_to_tmp(call); @@ -1623,7 +1458,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) return afs_protocol_error(call, -EBADMSG, afs_eproto_volname_len); size = (call->count + 3) & ~3; /* It's padded */ - afs_extract_begin(call, call->reply[2], size); + afs_extract_to_buf(call, size); call->unmarshall++; /* Fall through - and extract the volume name */ @@ -1633,7 +1468,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) if (ret < 0) return ret; - p = call->reply[2]; + p = call->buffer; p[call->count] = 0; _debug("volname '%s'", p); afs_extract_to_tmp(call); @@ -1651,7 +1486,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) return afs_protocol_error(call, -EBADMSG, afs_eproto_offline_msg_len); size = (call->count + 3) & ~3; /* It's padded */ - afs_extract_begin(call, call->reply[2], size); + afs_extract_to_buf(call, size); call->unmarshall++; /* Fall through - and extract the offline message */ @@ -1661,7 +1496,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) if (ret < 0) return ret; - p = call->reply[2]; + p = call->buffer; p[call->count] = 0; _debug("offline '%s'", p); @@ -1680,7 +1515,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) return afs_protocol_error(call, -EBADMSG, afs_eproto_motd_len); size = (call->count + 3) & ~3; /* It's padded */ - afs_extract_begin(call, call->reply[2], size); + afs_extract_to_buf(call, size); call->unmarshall++; /* Fall through - and extract the message of the day */ @@ -1690,7 +1525,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) if (ret < 0) return ret; - p = call->reply[2]; + p = call->buffer; p[call->count] = 0; _debug("motd '%s'", p); @@ -1706,23 +1541,13 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) } /* - * Destroy a YFS.GetVolumeStatus call. - */ -static void yfs_get_volume_status_call_destructor(struct afs_call *call) -{ - kfree(call->reply[2]); - call->reply[2] = NULL; - afs_flat_call_destructor(call); -} - -/* * YFS.GetVolumeStatus operation type */ static const struct afs_call_type yfs_RXYFSGetVolumeStatus = { .name = "YFS.GetVolumeStatus", .op = yfs_FS_GetVolumeStatus, .deliver = yfs_deliver_fs_get_volume_status, - .destructor = yfs_get_volume_status_call_destructor, + .destructor = afs_flat_call_destructor, }; /* @@ -1735,28 +1560,21 @@ int yfs_fs_get_volume_status(struct afs_fs_cursor *fc, struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; - void *tmpbuf; _enter(""); - tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL); - if (!tmpbuf) - return -ENOMEM; - call = afs_alloc_flat_call(net, &yfs_RXYFSGetVolumeStatus, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_u64), - sizeof(struct yfs_xdr_YFSFetchVolumeStatus) + - sizeof(__be32)); - if (!call) { - kfree(tmpbuf); + max_t(size_t, + sizeof(struct yfs_xdr_YFSFetchVolumeStatus) + + sizeof(__be32), + AFSOPAQUEMAX + 1)); + if (!call) return -ENOMEM; - } call->key = fc->key; - call->reply[0] = vnode; - call->reply[1] = vs; - call->reply[2] = tmpbuf; + call->out_volstatus = vs; /* marshall the parameters */ bp = call->request; @@ -1767,39 +1585,12 @@ int yfs_fs_get_volume_status(struct afs_fs_cursor *fc, afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } /* - * Deliver reply data to operations that just return a file status and a volume - * sync record. - */ -static int yfs_deliver_status_and_volsync(struct afs_call *call) -{ - struct afs_vnode *vnode = call->reply[0]; - const __be32 *bp; - int ret; - - _enter("{%u}", call->unmarshall); - - ret = afs_transfer_reply(call); - if (ret < 0) - return ret; - - /* unmarshall the reply once we've received all of it */ - bp = call->buffer; - ret = yfs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL); - if (ret < 0) - return ret; - xdr_decode_YFSVolSync(&bp, NULL); - - _leave(" = 0 [done]"); - return 0; -} - -/* * YFS.SetLock operation type */ static const struct afs_call_type yfs_RXYFSSetLock = { @@ -1834,7 +1625,8 @@ static const struct afs_call_type yfs_RXYFSReleaseLock = { /* * Set a lock on a file */ -int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) +int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1853,8 +1645,8 @@ int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->want_reply_time = true; + call->lvnode = vnode; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1866,6 +1658,7 @@ int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_calli(call, &vnode->fid, type); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1873,7 +1666,7 @@ int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) /* * extend a lock on a file */ -int yfs_fs_extend_lock(struct afs_fs_cursor *fc) +int yfs_fs_extend_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1891,8 +1684,8 @@ int yfs_fs_extend_lock(struct afs_fs_cursor *fc) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; - call->want_reply_time = true; + call->lvnode = vnode; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1903,6 +1696,7 @@ int yfs_fs_extend_lock(struct afs_fs_cursor *fc) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -1910,7 +1704,7 @@ int yfs_fs_extend_lock(struct afs_fs_cursor *fc) /* * release a lock on a file */ -int yfs_fs_release_lock(struct afs_fs_cursor *fc) +int yfs_fs_release_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -1928,7 +1722,8 @@ int yfs_fs_release_lock(struct afs_fs_cursor *fc) return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; + call->lvnode = vnode; + call->out_scb = scb; /* marshall the parameters */ bp = call->request; @@ -1939,48 +1734,18 @@ int yfs_fs_release_lock(struct afs_fs_cursor *fc) afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } /* - * Deliver reply data to an FS.FetchStatus with no vnode. - */ -static int yfs_deliver_fs_fetch_status(struct afs_call *call) -{ - struct afs_file_status *status = call->reply[1]; - struct afs_callback *callback = call->reply[2]; - struct afs_volsync *volsync = call->reply[3]; - struct afs_vnode *vnode = call->reply[0]; - const __be32 *bp; - int ret; - - ret = afs_transfer_reply(call); - if (ret < 0) - return ret; - - _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); - - /* unmarshall the reply once we've received all of it */ - bp = call->buffer; - ret = yfs_decode_status(call, &bp, status, vnode, - &call->expected_version, NULL); - if (ret < 0) - return ret; - xdr_decode_YFSCallBack_raw(&bp, callback); - xdr_decode_YFSVolSync(&bp, volsync); - - _leave(" = 0 [done]"); - return 0; -} - -/* * YFS.FetchStatus operation type */ static const struct afs_call_type yfs_RXYFSFetchStatus = { .name = "YFS.FetchStatus", .op = yfs_FS_FetchStatus, - .deliver = yfs_deliver_fs_fetch_status, + .deliver = yfs_deliver_fs_status_cb_and_volsync, .destructor = afs_flat_call_destructor, }; @@ -1990,8 +1755,7 @@ static const struct afs_call_type yfs_RXYFSFetchStatus = { int yfs_fs_fetch_status(struct afs_fs_cursor *fc, struct afs_net *net, struct afs_fid *fid, - struct afs_file_status *status, - struct afs_callback *callback, + struct afs_status_cb *scb, struct afs_volsync *volsync) { struct afs_call *call; @@ -2012,11 +1776,8 @@ int yfs_fs_fetch_status(struct afs_fs_cursor *fc, } call->key = fc->key; - call->reply[0] = NULL; /* vnode for fid[0] */ - call->reply[1] = status; - call->reply[2] = callback; - call->reply[3] = volsync; - call->expected_version = 1; /* vnode->status.data_version */ + call->out_scb = scb; + call->out_volsync = volsync; /* marshall the parameters */ bp = call->request; @@ -2025,9 +1786,9 @@ int yfs_fs_fetch_status(struct afs_fs_cursor *fc, bp = xdr_encode_YFSFid(bp, fid); yfs_check_req(call, bp); - call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, fid); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -2037,9 +1798,7 @@ int yfs_fs_fetch_status(struct afs_fs_cursor *fc, */ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) { - struct afs_file_status *statuses; - struct afs_callback *callbacks; - struct afs_vnode *vnode = call->reply[0]; + struct afs_status_cb *scb; const __be32 *bp; u32 tmp; int ret; @@ -2078,10 +1837,8 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) return ret; bp = call->buffer; - statuses = call->reply[1]; - ret = yfs_decode_status(call, &bp, &statuses[call->count], - call->count == 0 ? vnode : NULL, - NULL, NULL); + scb = &call->out_scb[call->count]; + ret = xdr_decode_YFSFetchStatus(&bp, call, scb); if (ret < 0) return ret; @@ -2120,13 +1877,8 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) _debug("unmarshall CB array"); bp = call->buffer; - callbacks = call->reply[2]; - xdr_decode_YFSCallBack_raw(&bp, &callbacks[call->count]); - statuses = call->reply[1]; - if (call->count == 0 && vnode && statuses[0].abort_code == 0) { - bp = call->buffer; - xdr_decode_YFSCallBack(call, vnode, &bp); - } + scb = &call->out_scb[call->count]; + xdr_decode_YFSCallBack(&bp, call, scb); call->count++; if (call->count < call->count2) goto more_cbs; @@ -2141,7 +1893,7 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) return ret; bp = call->buffer; - xdr_decode_YFSVolSync(&bp, call->reply[3]); + xdr_decode_YFSVolSync(&bp, call->out_volsync); call->unmarshall++; @@ -2170,8 +1922,7 @@ static const struct afs_call_type yfs_RXYFSInlineBulkStatus = { int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc, struct afs_net *net, struct afs_fid *fids, - struct afs_file_status *statuses, - struct afs_callback *callbacks, + struct afs_status_cb *statuses, unsigned int nr_fids, struct afs_volsync *volsync) { @@ -2194,10 +1945,8 @@ int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc, } call->key = fc->key; - call->reply[0] = NULL; /* vnode for fid[0] */ - call->reply[1] = statuses; - call->reply[2] = callbacks; - call->reply[3] = volsync; + call->out_scb = statuses; + call->out_volsync = volsync; call->count2 = nr_fids; /* marshall the parameters */ @@ -2209,9 +1958,9 @@ int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc, bp = xdr_encode_YFSFid(bp, &fids[i]); yfs_check_req(call, bp); - call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &fids[0]); + afs_set_fc_call(call, fc); afs_make_call(&fc->ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, &fc->ac); } @@ -2221,9 +1970,7 @@ int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc, */ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) { - struct afs_volsync *volsync = call->reply[2]; - struct afs_vnode *vnode = call->reply[1]; - struct yfs_acl *yacl = call->reply[0]; + struct yfs_acl *yacl = call->out_yacl; struct afs_acl *acl; const __be32 *bp; unsigned int size; @@ -2308,11 +2055,10 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) bp = call->buffer; yacl->inherit_flag = ntohl(*bp++); yacl->num_cleaned = ntohl(*bp++); - ret = yfs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL); + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); if (ret < 0) return ret; - xdr_decode_YFSVolSync(&bp, volsync); + xdr_decode_YFSVolSync(&bp, call->out_volsync); call->unmarshall++; @@ -2333,12 +2079,6 @@ void yfs_free_opaque_acl(struct yfs_acl *yacl) } } -static void yfs_destroy_fs_fetch_opaque_acl(struct afs_call *call) -{ - yfs_free_opaque_acl(call->reply[0]); - afs_flat_call_destructor(call); -} - /* * YFS.FetchOpaqueACL operation type */ @@ -2346,18 +2086,18 @@ static const struct afs_call_type yfs_RXYFSFetchOpaqueACL = { .name = "YFS.FetchOpaqueACL", .op = yfs_FS_FetchOpaqueACL, .deliver = yfs_deliver_fs_fetch_opaque_acl, - .destructor = yfs_destroy_fs_fetch_opaque_acl, + .destructor = afs_flat_call_destructor, }; /* * Fetch the YFS advanced ACLs for a file. */ struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *fc, - unsigned int flags) + struct yfs_acl *yacl, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; - struct yfs_acl *yacl; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -2370,19 +2110,15 @@ struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *fc, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); - if (!call) - goto nomem; - - yacl = kzalloc(sizeof(struct yfs_acl), GFP_KERNEL); - if (!yacl) - goto nomem_call; + if (!call) { + fc->ac.error = -ENOMEM; + return ERR_PTR(-ENOMEM); + } - yacl->flags = flags; call->key = fc->key; - call->reply[0] = yacl; - call->reply[1] = vnode; - call->reply[2] = NULL; /* volsync */ - call->ret_reply0 = true; + call->out_yacl = yacl; + call->out_scb = scb; + call->out_volsync = NULL; /* marshall the parameters */ bp = call->request; @@ -2391,17 +2127,10 @@ struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *fc, bp = xdr_encode_YFSFid(bp, &vnode->fid); yfs_check_req(call, bp); - call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); afs_make_call(&fc->ac, call, GFP_KERNEL); return (struct yfs_acl *)afs_wait_for_call_to_complete(call, &fc->ac); - -nomem_call: - afs_put_call(call); -nomem: - fc->ac.error = -ENOMEM; - return ERR_PTR(-ENOMEM); } /* @@ -2417,7 +2146,8 @@ static const struct afs_call_type yfs_RXYFSStoreOpaqueACL2 = { /* * Fetch the YFS ACL for a file. */ -int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *fc, const struct afs_acl *acl) +int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *fc, const struct afs_acl *acl, + struct afs_status_cb *scb) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -2441,8 +2171,8 @@ int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *fc, const struct afs_acl *acl } call->key = fc->key; - call->reply[0] = vnode; - call->reply[2] = NULL; /* volsync */ + call->out_scb = scb; + call->out_volsync = NULL; /* marshall the parameters */ bp = call->request; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 36a8dc699448..72f8e1311392 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -892,8 +892,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) int have = ci->i_snap_caps; if ((have & mask) == mask) { - dout("__ceph_caps_issued_mask %p snap issued %s" - " (mask %s)\n", &ci->vfs_inode, + dout("__ceph_caps_issued_mask ino 0x%lx snap issued %s" + " (mask %s)\n", ci->vfs_inode.i_ino, ceph_cap_string(have), ceph_cap_string(mask)); return 1; @@ -904,8 +904,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) if (!__cap_is_valid(cap)) continue; if ((cap->issued & mask) == mask) { - dout("__ceph_caps_issued_mask %p cap %p issued %s" - " (mask %s)\n", &ci->vfs_inode, cap, + dout("__ceph_caps_issued_mask ino 0x%lx cap %p issued %s" + " (mask %s)\n", ci->vfs_inode.i_ino, cap, ceph_cap_string(cap->issued), ceph_cap_string(mask)); if (touch) @@ -916,8 +916,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) /* does a combination of caps satisfy mask? */ have |= cap->issued; if ((have & mask) == mask) { - dout("__ceph_caps_issued_mask %p combo issued %s" - " (mask %s)\n", &ci->vfs_inode, + dout("__ceph_caps_issued_mask ino 0x%lx combo issued %s" + " (mask %s)\n", ci->vfs_inode.i_ino, ceph_cap_string(cap->issued), ceph_cap_string(mask)); if (touch) { @@ -2257,8 +2257,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) if (datasync) goto out; - inode_lock(inode); - dirty = try_flush_caps(inode, &flush_tid); dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); @@ -2273,7 +2271,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) ret = wait_event_interruptible(ci->i_cap_wq, caps_are_flushed(inode, flush_tid)); } - inode_unlock(inode); out: dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); return ret; @@ -2528,9 +2525,14 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got, * to (when applicable), and check against max_size here as well. * Note that caller is responsible for ensuring max_size increases are * requested from the MDS. + * + * Returns 0 if caps were not able to be acquired (yet), a 1 if they were, + * or a negative error code. + * + * FIXME: how does a 0 return differ from -EAGAIN? */ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, - loff_t endoff, bool nonblock, int *got, int *err) + loff_t endoff, bool nonblock, int *got) { struct inode *inode = &ci->vfs_inode; struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; @@ -2550,8 +2552,7 @@ again: if ((file_wanted & need) != need) { dout("try_get_cap_refs need %s file_wanted %s, EBADF\n", ceph_cap_string(need), ceph_cap_string(file_wanted)); - *err = -EBADF; - ret = 1; + ret = -EBADF; goto out_unlock; } @@ -2572,10 +2573,8 @@ again: if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { dout("get_cap_refs %p endoff %llu > maxsize %llu\n", inode, endoff, ci->i_max_size); - if (endoff > ci->i_requested_max_size) { - *err = -EAGAIN; - ret = 1; - } + if (endoff > ci->i_requested_max_size) + ret = -EAGAIN; goto out_unlock; } /* @@ -2610,8 +2609,7 @@ again: * task isn't in TASK_RUNNING state */ if (nonblock) { - *err = -EAGAIN; - ret = 1; + ret = -EAGAIN; goto out_unlock; } @@ -2640,8 +2638,7 @@ again: if (session_readonly) { dout("get_cap_refs %p needed %s but mds%d readonly\n", inode, ceph_cap_string(need), ci->i_auth_cap->mds); - *err = -EROFS; - ret = 1; + ret = -EROFS; goto out_unlock; } @@ -2650,16 +2647,14 @@ again: if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { dout("get_cap_refs %p forced umount\n", inode); - *err = -EIO; - ret = 1; + ret = -EIO; goto out_unlock; } mds_wanted = __ceph_caps_mds_wanted(ci, false); if (need & ~(mds_wanted & need)) { dout("get_cap_refs %p caps were dropped" " (session killed?)\n", inode); - *err = -ESTALE; - ret = 1; + ret = -ESTALE; goto out_unlock; } if (!(file_wanted & ~mds_wanted)) @@ -2710,7 +2705,7 @@ static void check_max_size(struct inode *inode, loff_t endoff) int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, bool nonblock, int *got) { - int ret, err = 0; + int ret; BUG_ON(need & ~CEPH_CAP_FILE_RD); BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED)); @@ -2718,15 +2713,8 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, if (ret < 0) return ret; - ret = try_get_cap_refs(ci, need, want, 0, nonblock, got, &err); - if (ret) { - if (err == -EAGAIN) { - ret = 0; - } else if (err < 0) { - ret = err; - } - } - return ret; + ret = try_get_cap_refs(ci, need, want, 0, nonblock, got); + return ret == -EAGAIN ? 0 : ret; } /* @@ -2737,7 +2725,7 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, loff_t endoff, int *got, struct page **pinned_page) { - int _got, ret, err = 0; + int _got, ret; ret = ceph_pool_perm_check(ci, need); if (ret < 0) @@ -2747,21 +2735,19 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, if (endoff > 0) check_max_size(&ci->vfs_inode, endoff); - err = 0; _got = 0; ret = try_get_cap_refs(ci, need, want, endoff, - false, &_got, &err); - if (ret) { - if (err == -EAGAIN) - continue; - if (err < 0) - ret = err; - } else { + false, &_got); + if (ret == -EAGAIN) { + continue; + } else if (!ret) { + int err; + DEFINE_WAIT_FUNC(wait, woken_wake_function); add_wait_queue(&ci->i_cap_wq, &wait); - while (!try_get_cap_refs(ci, need, want, endoff, - true, &_got, &err)) { + while (!(err = try_get_cap_refs(ci, need, want, endoff, + true, &_got))) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -2770,19 +2756,14 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, } remove_wait_queue(&ci->i_cap_wq, &wait); - if (err == -EAGAIN) continue; - if (err < 0) - ret = err; } - if (ret < 0) { - if (err == -ESTALE) { - /* session was killed, try renew caps */ - ret = ceph_renew_caps(&ci->vfs_inode); - if (ret == 0) - continue; - } + if (ret == -ESTALE) { + /* session was killed, try renew caps */ + ret = ceph_renew_caps(&ci->vfs_inode); + if (ret == 0) + continue; return ret; } @@ -4099,7 +4080,7 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode) } /* - * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it + * For a soon-to-be unlinked file, drop the LINK caps. If it * looks like the link count will hit 0, drop any other caps (other * than PIN) we don't specifically want (due to the file still being * open). diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 98365e74cb4a..b3fc5fe26a1a 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -37,7 +37,7 @@ static int mdsmap_show(struct seq_file *s, void *p) struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr; int state = mdsmap->m_info[i].state; seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, - ceph_pr_addr(&addr->in_addr), + ceph_pr_addr(addr), ceph_mds_state_name(state)); } return 0; @@ -88,7 +88,7 @@ static int mdsc_show(struct seq_file *s, void *p) req->r_dentry, path ? path : ""); spin_unlock(&req->r_dentry->d_lock); - kfree(path); + ceph_mdsc_free_path(path, pathlen); } else if (req->r_path1) { seq_printf(s, " #%llx/%s", req->r_ino1.ino, req->r_path1); @@ -108,7 +108,7 @@ static int mdsc_show(struct seq_file *s, void *p) req->r_old_dentry, path ? path : ""); spin_unlock(&req->r_old_dentry->d_lock); - kfree(path); + ceph_mdsc_free_path(path, pathlen); } else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) { if (req->r_ino2.ino) seq_printf(s, " #%llx/%s", req->r_ino2.ino, @@ -124,18 +124,48 @@ static int mdsc_show(struct seq_file *s, void *p) return 0; } +static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p) +{ + struct seq_file *s = p; + + seq_printf(s, "0x%-17lx%-17s%-17s\n", inode->i_ino, + ceph_cap_string(cap->issued), + ceph_cap_string(cap->implemented)); + return 0; +} + static int caps_show(struct seq_file *s, void *p) { struct ceph_fs_client *fsc = s->private; - int total, avail, used, reserved, min; + struct ceph_mds_client *mdsc = fsc->mdsc; + int total, avail, used, reserved, min, i; ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min); seq_printf(s, "total\t\t%d\n" "avail\t\t%d\n" "used\t\t%d\n" "reserved\t%d\n" - "min\t%d\n", + "min\t\t%d\n\n", total, avail, used, reserved, min); + seq_printf(s, "ino issued implemented\n"); + seq_printf(s, "-----------------------------------------------\n"); + + mutex_lock(&mdsc->mutex); + for (i = 0; i < mdsc->max_sessions; i++) { + struct ceph_mds_session *session; + + session = __ceph_lookup_mds_session(mdsc, i); + if (!session) + continue; + mutex_unlock(&mdsc->mutex); + mutex_lock(&session->s_mutex); + ceph_iterate_session_caps(session, caps_show_cb, s); + mutex_unlock(&session->s_mutex); + ceph_put_mds_session(session); + mutex_lock(&mdsc->mutex); + } + mutex_unlock(&mdsc->mutex); + return 0; } diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 3c59ad180ef0..d3ef7ee429ec 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -22,18 +22,77 @@ struct ceph_nfs_confh { u64 ino, parent_ino; } __attribute__ ((packed)); +/* + * fh for snapped inode + */ +struct ceph_nfs_snapfh { + u64 ino; + u64 snapid; + u64 parent_ino; + u32 hash; +} __attribute__ ((packed)); + +static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len, + struct inode *parent_inode) +{ + const static int snap_handle_length = + sizeof(struct ceph_nfs_snapfh) >> 2; + struct ceph_nfs_snapfh *sfh = (void *)rawfh; + u64 snapid = ceph_snap(inode); + int ret; + bool no_parent = true; + + if (*max_len < snap_handle_length) { + *max_len = snap_handle_length; + ret = FILEID_INVALID; + goto out; + } + + ret = -EINVAL; + if (snapid != CEPH_SNAPDIR) { + struct inode *dir; + struct dentry *dentry = d_find_alias(inode); + if (!dentry) + goto out; + + rcu_read_lock(); + dir = d_inode_rcu(dentry->d_parent); + if (ceph_snap(dir) != CEPH_SNAPDIR) { + sfh->parent_ino = ceph_ino(dir); + sfh->hash = ceph_dentry_hash(dir, dentry); + no_parent = false; + } + rcu_read_unlock(); + dput(dentry); + } + + if (no_parent) { + if (!S_ISDIR(inode->i_mode)) + goto out; + sfh->parent_ino = sfh->ino; + sfh->hash = 0; + } + sfh->ino = ceph_ino(inode); + sfh->snapid = snapid; + + *max_len = snap_handle_length; + ret = FILEID_BTRFS_WITH_PARENT; +out: + dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret); + return ret; +} + static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, struct inode *parent_inode) { + const static int handle_length = + sizeof(struct ceph_nfs_fh) >> 2; + const static int connected_handle_length = + sizeof(struct ceph_nfs_confh) >> 2; int type; - struct ceph_nfs_fh *fh = (void *)rawfh; - struct ceph_nfs_confh *cfh = (void *)rawfh; - int connected_handle_length = sizeof(*cfh)/4; - int handle_length = sizeof(*fh)/4; - /* don't re-export snaps */ if (ceph_snap(inode) != CEPH_NOSNAP) - return -EINVAL; + return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode); if (parent_inode && (*max_len < connected_handle_length)) { *max_len = connected_handle_length; @@ -44,6 +103,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, } if (parent_inode) { + struct ceph_nfs_confh *cfh = (void *)rawfh; dout("encode_fh %llx with parent %llx\n", ceph_ino(inode), ceph_ino(parent_inode)); cfh->ino = ceph_ino(inode); @@ -51,6 +111,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, *max_len = connected_handle_length; type = FILEID_INO32_GEN_PARENT; } else { + struct ceph_nfs_fh *fh = (void *)rawfh; dout("encode_fh %llx\n", ceph_ino(inode)); fh->ino = ceph_ino(inode); *max_len = handle_length; @@ -59,7 +120,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, return type; } -static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) +static struct inode *__lookup_inode(struct super_block *sb, u64 ino) { struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; struct inode *inode; @@ -81,7 +142,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) mask = CEPH_STAT_CAP_INODE; if (ceph_security_xattr_wanted(d_inode(sb->s_root))) mask |= CEPH_CAP_XATTR_SHARED; - req->r_args.getattr.mask = cpu_to_le32(mask); + req->r_args.lookupino.mask = cpu_to_le32(mask); req->r_ino1 = vino; req->r_num_caps = 1; @@ -91,16 +152,114 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) ihold(inode); ceph_mdsc_put_request(req); if (!inode) - return ERR_PTR(-ESTALE); - if (inode->i_nlink == 0) { - iput(inode); - return ERR_PTR(-ESTALE); - } + return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE); } + return inode; +} + +struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino) +{ + struct inode *inode = __lookup_inode(sb, ino); + if (IS_ERR(inode)) + return inode; + if (inode->i_nlink == 0) { + iput(inode); + return ERR_PTR(-ESTALE); + } + return inode; +} +static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) +{ + struct inode *inode = __lookup_inode(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (inode->i_nlink == 0) { + iput(inode); + return ERR_PTR(-ESTALE); + } return d_obtain_alias(inode); } +static struct dentry *__snapfh_to_dentry(struct super_block *sb, + struct ceph_nfs_snapfh *sfh, + bool want_parent) +{ + struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; + struct ceph_mds_request *req; + struct inode *inode; + struct ceph_vino vino; + int mask; + int err; + bool unlinked = false; + + if (want_parent) { + vino.ino = sfh->parent_ino; + if (sfh->snapid == CEPH_SNAPDIR) + vino.snap = CEPH_NOSNAP; + else if (sfh->ino == sfh->parent_ino) + vino.snap = CEPH_SNAPDIR; + else + vino.snap = sfh->snapid; + } else { + vino.ino = sfh->ino; + vino.snap = sfh->snapid; + } + inode = ceph_find_inode(sb, vino); + if (inode) + return d_obtain_alias(inode); + + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO, + USE_ANY_MDS); + if (IS_ERR(req)) + return ERR_CAST(req); + + mask = CEPH_STAT_CAP_INODE; + if (ceph_security_xattr_wanted(d_inode(sb->s_root))) + mask |= CEPH_CAP_XATTR_SHARED; + req->r_args.lookupino.mask = cpu_to_le32(mask); + if (vino.snap < CEPH_NOSNAP) { + req->r_args.lookupino.snapid = cpu_to_le64(vino.snap); + if (!want_parent && sfh->ino != sfh->parent_ino) { + req->r_args.lookupino.parent = + cpu_to_le64(sfh->parent_ino); + req->r_args.lookupino.hash = + cpu_to_le32(sfh->hash); + } + } + + req->r_ino1 = vino; + req->r_num_caps = 1; + err = ceph_mdsc_do_request(mdsc, NULL, req); + inode = req->r_target_inode; + if (inode) { + if (vino.snap == CEPH_SNAPDIR) { + if (inode->i_nlink == 0) + unlinked = true; + inode = ceph_get_snapdir(inode); + } else if (ceph_snap(inode) == vino.snap) { + ihold(inode); + } else { + /* mds does not support lookup snapped inode */ + err = -EOPNOTSUPP; + inode = NULL; + } + } + ceph_mdsc_put_request(req); + + if (want_parent) { + dout("snapfh_to_parent %llx.%llx\n err=%d\n", + vino.ino, vino.snap, err); + } else { + dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d", + vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err); + } + if (!inode) + return ERR_PTR(-ESTALE); + /* see comments in ceph_get_parent() */ + return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode); +} + /* * convert regular fh to dentry */ @@ -110,6 +269,11 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb, { struct ceph_nfs_fh *fh = (void *)fid->raw; + if (fh_type == FILEID_BTRFS_WITH_PARENT) { + struct ceph_nfs_snapfh *sfh = (void *)fid->raw; + return __snapfh_to_dentry(sb, sfh, false); + } + if (fh_type != FILEID_INO32_GEN && fh_type != FILEID_INO32_GEN_PARENT) return NULL; @@ -163,13 +327,49 @@ static struct dentry *__get_parent(struct super_block *sb, static struct dentry *ceph_get_parent(struct dentry *child) { - /* don't re-export snaps */ - if (ceph_snap(d_inode(child)) != CEPH_NOSNAP) - return ERR_PTR(-EINVAL); - - dout("get_parent %p ino %llx.%llx\n", - child, ceph_vinop(d_inode(child))); - return __get_parent(child->d_sb, child, 0); + struct inode *inode = d_inode(child); + struct dentry *dn; + + if (ceph_snap(inode) != CEPH_NOSNAP) { + struct inode* dir; + bool unlinked = false; + /* do not support non-directory */ + if (!d_is_dir(child)) { + dn = ERR_PTR(-EINVAL); + goto out; + } + dir = __lookup_inode(inode->i_sb, ceph_ino(inode)); + if (IS_ERR(dir)) { + dn = ERR_CAST(dir); + goto out; + } + /* There can be multiple paths to access snapped inode. + * For simplicity, treat snapdir of head inode as parent */ + if (ceph_snap(inode) != CEPH_SNAPDIR) { + struct inode *snapdir = ceph_get_snapdir(dir); + if (dir->i_nlink == 0) + unlinked = true; + iput(dir); + if (IS_ERR(snapdir)) { + dn = ERR_CAST(snapdir); + goto out; + } + dir = snapdir; + } + /* If directory has already been deleted, futher get_parent + * will fail. Do not mark snapdir dentry as disconnected, + * this prevent exportfs from doing futher get_parent. */ + if (unlinked) + dn = d_obtain_root(dir); + else + dn = d_obtain_alias(dir); + } else { + dn = __get_parent(child->d_sb, child, 0); + } +out: + dout("get_parent %p ino %llx.%llx err=%ld\n", + child, ceph_vinop(inode), (IS_ERR(dn) ? PTR_ERR(dn) : 0)); + return dn; } /* @@ -182,6 +382,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, struct ceph_nfs_confh *cfh = (void *)fid->raw; struct dentry *dentry; + if (fh_type == FILEID_BTRFS_WITH_PARENT) { + struct ceph_nfs_snapfh *sfh = (void *)fid->raw; + return __snapfh_to_dentry(sb, sfh, true); + } + if (fh_type != FILEID_INO32_GEN_PARENT) return NULL; if (fh_len < sizeof(*cfh) / 4) @@ -194,14 +399,115 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, return dentry; } +static int __get_snap_name(struct dentry *parent, char *name, + struct dentry *child) +{ + struct inode *inode = d_inode(child); + struct inode *dir = d_inode(parent); + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_mds_request *req = NULL; + char *last_name = NULL; + unsigned next_offset = 2; + int err = -EINVAL; + + if (ceph_ino(inode) != ceph_ino(dir)) + goto out; + if (ceph_snap(inode) == CEPH_SNAPDIR) { + if (ceph_snap(dir) == CEPH_NOSNAP) { + strcpy(name, fsc->mount_options->snapdir_name); + err = 0; + } + goto out; + } + if (ceph_snap(dir) != CEPH_SNAPDIR) + goto out; + + while (1) { + struct ceph_mds_reply_info_parsed *rinfo; + struct ceph_mds_reply_dir_entry *rde; + int i; + + req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP, + USE_AUTH_MDS); + if (IS_ERR(req)) { + err = PTR_ERR(req); + req = NULL; + goto out; + } + err = ceph_alloc_readdir_reply_buffer(req, inode); + if (err) + goto out; + + req->r_direct_mode = USE_AUTH_MDS; + req->r_readdir_offset = next_offset; + req->r_args.readdir.flags = + cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS); + if (last_name) { + req->r_path2 = last_name; + last_name = NULL; + } + + req->r_inode = dir; + ihold(dir); + req->r_dentry = dget(parent); + + inode_lock(dir); + err = ceph_mdsc_do_request(fsc->mdsc, NULL, req); + inode_unlock(dir); + + if (err < 0) + goto out; + + rinfo = &req->r_reply_info; + for (i = 0; i < rinfo->dir_nr; i++) { + rde = rinfo->dir_entries + i; + BUG_ON(!rde->inode.in); + if (ceph_snap(inode) == + le64_to_cpu(rde->inode.in->snapid)) { + memcpy(name, rde->name, rde->name_len); + name[rde->name_len] = '\0'; + err = 0; + goto out; + } + } + + if (rinfo->dir_end) + break; + + BUG_ON(rinfo->dir_nr <= 0); + rde = rinfo->dir_entries + (rinfo->dir_nr - 1); + next_offset += rinfo->dir_nr; + last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL); + if (!last_name) { + err = -ENOMEM; + goto out; + } + + ceph_mdsc_put_request(req); + req = NULL; + } + err = -ENOENT; +out: + if (req) + ceph_mdsc_put_request(req); + kfree(last_name); + dout("get_snap_name %p ino %llx.%llx err=%d\n", + child, ceph_vinop(inode), err); + return err; +} + static int ceph_get_name(struct dentry *parent, char *name, struct dentry *child) { struct ceph_mds_client *mdsc; struct ceph_mds_request *req; + struct inode *inode = d_inode(child); int err; - mdsc = ceph_inode_to_client(d_inode(child))->mdsc; + if (ceph_snap(inode) != CEPH_NOSNAP) + return __get_snap_name(parent, name, child); + + mdsc = ceph_inode_to_client(inode)->mdsc; req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME, USE_ANY_MDS); if (IS_ERR(req)) @@ -209,8 +515,8 @@ static int ceph_get_name(struct dentry *parent, char *name, inode_lock(d_inode(parent)); - req->r_inode = d_inode(child); - ihold(d_inode(child)); + req->r_inode = inode; + ihold(inode); req->r_ino2 = ceph_vino(d_inode(parent)); req->r_parent = d_inode(parent); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); @@ -224,10 +530,10 @@ static int ceph_get_name(struct dentry *parent, char *name, memcpy(name, rinfo->dname, rinfo->dname_len); name[rinfo->dname_len] = 0; dout("get_name %p ino %llx.%llx name %s\n", - child, ceph_vinop(d_inode(child)), name); + child, ceph_vinop(inode), name); } else { dout("get_name %p ino %llx.%llx err %d\n", - child, ceph_vinop(d_inode(child)), err); + child, ceph_vinop(inode), err); } ceph_mdsc_put_request(req); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 84725b53ac21..305daf043eb0 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -929,7 +929,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n", (write ? "write" : "read"), file, pos, (unsigned)count, - snapc, snapc->seq); + snapc, snapc ? snapc->seq : 0); ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count - 1); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 35dae6d5493a..f85355bf49c4 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2266,43 +2266,72 @@ int ceph_permission(struct inode *inode, int mask) return err; } +/* Craft a mask of needed caps given a set of requested statx attrs. */ +static int statx_to_caps(u32 want) +{ + int mask = 0; + + if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME)) + mask |= CEPH_CAP_AUTH_SHARED; + + if (want & (STATX_NLINK|STATX_CTIME)) + mask |= CEPH_CAP_LINK_SHARED; + + if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE| + STATX_BLOCKS)) + mask |= CEPH_CAP_FILE_SHARED; + + if (want & (STATX_CTIME)) + mask |= CEPH_CAP_XATTR_SHARED; + + return mask; +} + /* - * Get all attributes. Hopefully somedata we'll have a statlite() - * and can limit the fields we require to be accurate. + * Get all the attributes. If we have sufficient caps for the requested attrs, + * then we can avoid talking to the MDS at all. */ int ceph_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct inode *inode = d_inode(path->dentry); struct ceph_inode_info *ci = ceph_inode(inode); - int err; + int err = 0; - err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL, false); - if (!err) { - generic_fillattr(inode, stat); - stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino); - if (ceph_snap(inode) == CEPH_NOSNAP) - stat->dev = inode->i_sb->s_dev; + /* Skip the getattr altogether if we're asked not to sync */ + if (!(flags & AT_STATX_DONT_SYNC)) { + err = ceph_do_getattr(inode, statx_to_caps(request_mask), + flags & AT_STATX_FORCE_SYNC); + if (err) + return err; + } + + generic_fillattr(inode, stat); + stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino); + if (ceph_snap(inode) == CEPH_NOSNAP) + stat->dev = inode->i_sb->s_dev; + else + stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0; + + if (S_ISDIR(inode->i_mode)) { + if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), + RBYTES)) + stat->size = ci->i_rbytes; else - stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0; - - if (S_ISDIR(inode->i_mode)) { - if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), - RBYTES)) - stat->size = ci->i_rbytes; - else - stat->size = ci->i_files + ci->i_subdirs; - stat->blocks = 0; - stat->blksize = 65536; - /* - * Some applications rely on the number of st_nlink - * value on directories to be either 0 (if unlinked) - * or 2 + number of subdirectories. - */ - if (stat->nlink == 1) - /* '.' + '..' + subdirs */ - stat->nlink = 1 + 1 + ci->i_subdirs; - } + stat->size = ci->i_files + ci->i_subdirs; + stat->blocks = 0; + stat->blksize = 65536; + /* + * Some applications rely on the number of st_nlink + * value on directories to be either 0 (if unlinked) + * or 2 + number of subdirectories. + */ + if (stat->nlink == 1) + /* '.' + '..' + subdirs */ + stat->nlink = 1 + 1 + ci->i_subdirs; } + + /* Mask off any higher bits (e.g. btime) until we have support */ + stat->result_mask = request_mask & STATX_BASIC_STATS; return err; } diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 9dae2ec7e1fa..ac9b53b89365 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -237,15 +237,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { err = -EIO; - } else if (op == CEPH_MDS_OP_SETFILELOCK) { - /* - * increasing i_filelock_ref closes race window between - * handling request reply and adding file_lock struct to - * inode. Otherwise, i_auth_cap may get trimmed in the - * window. Caller function will decrease the counter. - */ - fl->fl_ops = &ceph_fl_lock_ops; - atomic_inc(&ci->i_filelock_ref); } spin_unlock(&ci->i_ceph_lock); if (err < 0) { @@ -299,10 +290,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { err = -EIO; - } else { - /* see comment in ceph_lock */ - fl->fl_ops = &ceph_fl_lock_ops; - atomic_inc(&ci->i_filelock_ref); } spin_unlock(&ci->i_ceph_lock); if (err < 0) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9049c2a3e972..959b1bf7c327 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -550,15 +550,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s) struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc, int mds) { - struct ceph_mds_session *session; - if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) return NULL; - session = mdsc->sessions[mds]; - dout("lookup_mds_session %p %d\n", session, - refcount_read(&session->s_ref)); - get_session(session); - return session; + return get_session(mdsc->sessions[mds]); } static bool __have_session(struct ceph_mds_client *mdsc, int mds) @@ -1284,9 +1278,9 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, * * Caller must hold session s_mutex. */ -static int iterate_session_caps(struct ceph_mds_session *session, - int (*cb)(struct inode *, struct ceph_cap *, - void *), void *arg) +int ceph_iterate_session_caps(struct ceph_mds_session *session, + int (*cb)(struct inode *, struct ceph_cap *, + void *), void *arg) { struct list_head *p; struct ceph_cap *cap; @@ -1451,7 +1445,7 @@ static void remove_session_caps(struct ceph_mds_session *session) LIST_HEAD(dispose); dout("remove_session_caps on %p\n", session); - iterate_session_caps(session, remove_session_caps_cb, fsc); + ceph_iterate_session_caps(session, remove_session_caps_cb, fsc); wake_up_all(&fsc->mdsc->cap_flushing_wq); @@ -1534,8 +1528,8 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, static void wake_up_session_caps(struct ceph_mds_session *session, int ev) { dout("wake_up_session_caps %p mds%d\n", session, session->s_mds); - iterate_session_caps(session, wake_up_session_cb, - (void *)(unsigned long)ev); + ceph_iterate_session_caps(session, wake_up_session_cb, + (void *)(unsigned long)ev); } /* @@ -1768,7 +1762,7 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc, session->s_mds, session->s_nr_caps, max_caps, trim_caps); if (trim_caps > 0) { session->s_trim_caps = trim_caps; - iterate_session_caps(session, trim_caps_cb, session); + ceph_iterate_session_caps(session, trim_caps_cb, session); dout("trim_caps mds%d done: %d / %d, trimmed %d\n", session->s_mds, session->s_nr_caps, max_caps, trim_caps - session->s_trim_caps); @@ -1861,7 +1855,8 @@ again: num_cap_releases--; head = msg->front.iov_base; - le32_add_cpu(&head->num, 1); + put_unaligned_le32(get_unaligned_le32(&head->num) + 1, + &head->num); item = msg->front.iov_base + msg->front.iov_len; item->ino = cpu_to_le64(cap->cap_ino); item->cap_id = cpu_to_le64(cap->cap_id); @@ -2089,43 +2084,29 @@ static inline u64 __get_oldest_tid(struct ceph_mds_client *mdsc) * Encode hidden .snap dirs as a double /, i.e. * foo/.snap/bar -> foo//bar */ -char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, +char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase, int stop_on_nosnap) { struct dentry *temp; char *path; - int len, pos; + int pos; unsigned seq; + u64 base; if (!dentry) return ERR_PTR(-EINVAL); -retry: - len = 0; - seq = read_seqbegin(&rename_lock); - rcu_read_lock(); - for (temp = dentry; !IS_ROOT(temp);) { - struct inode *inode = d_inode(temp); - if (inode && ceph_snap(inode) == CEPH_SNAPDIR) - len++; /* slash only */ - else if (stop_on_nosnap && inode && - ceph_snap(inode) == CEPH_NOSNAP) - break; - else - len += 1 + temp->d_name.len; - temp = temp->d_parent; - } - rcu_read_unlock(); - if (len) - len--; /* no leading '/' */ - - path = kmalloc(len+1, GFP_NOFS); + path = __getname(); if (!path) return ERR_PTR(-ENOMEM); - pos = len; - path[pos] = 0; /* trailing null */ +retry: + pos = PATH_MAX - 1; + path[pos] = '\0'; + + seq = read_seqbegin(&rename_lock); rcu_read_lock(); - for (temp = dentry; !IS_ROOT(temp) && pos != 0; ) { + temp = dentry; + for (;;) { struct inode *inode; spin_lock(&temp->d_lock); @@ -2143,83 +2124,54 @@ retry: spin_unlock(&temp->d_lock); break; } - strncpy(path + pos, temp->d_name.name, - temp->d_name.len); + memcpy(path + pos, temp->d_name.name, temp->d_name.len); } spin_unlock(&temp->d_lock); - if (pos) - path[--pos] = '/'; temp = temp->d_parent; + + /* Are we at the root? */ + if (IS_ROOT(temp)) + break; + + /* Are we out of buffer? */ + if (--pos < 0) + break; + + path[pos] = '/'; } + base = ceph_ino(d_inode(temp)); rcu_read_unlock(); - if (pos != 0 || read_seqretry(&rename_lock, seq)) { + if (pos < 0 || read_seqretry(&rename_lock, seq)) { pr_err("build_path did not end path lookup where " - "expected, namelen is %d, pos is %d\n", len, pos); + "expected, pos is %d\n", pos); /* presumably this is only possible if racing with a rename of one of the parent directories (we can not lock the dentries above us to prevent this, but retrying should be harmless) */ - kfree(path); goto retry; } - *base = ceph_ino(d_inode(temp)); - *plen = len; + *pbase = base; + *plen = PATH_MAX - 1 - pos; dout("build_path on %p %d built %llx '%.*s'\n", - dentry, d_count(dentry), *base, len, path); - return path; -} - -/* Duplicate the dentry->d_name.name safely */ -static int clone_dentry_name(struct dentry *dentry, const char **ppath, - int *ppathlen) -{ - u32 len; - char *name; - -retry: - len = READ_ONCE(dentry->d_name.len); - name = kmalloc(len + 1, GFP_NOFS); - if (!name) - return -ENOMEM; - - spin_lock(&dentry->d_lock); - if (dentry->d_name.len != len) { - spin_unlock(&dentry->d_lock); - kfree(name); - goto retry; - } - memcpy(name, dentry->d_name.name, len); - spin_unlock(&dentry->d_lock); - - name[len] = '\0'; - *ppath = name; - *ppathlen = len; - return 0; + dentry, d_count(dentry), base, *plen, path + pos); + return path + pos; } static int build_dentry_path(struct dentry *dentry, struct inode *dir, const char **ppath, int *ppathlen, u64 *pino, bool *pfreepath, bool parent_locked) { - int ret; char *path; rcu_read_lock(); if (!dir) dir = d_inode_rcu(dentry->d_parent); - if (dir && ceph_snap(dir) == CEPH_NOSNAP) { + if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP) { *pino = ceph_ino(dir); rcu_read_unlock(); - if (parent_locked) { - *ppath = dentry->d_name.name; - *ppathlen = dentry->d_name.len; - } else { - ret = clone_dentry_name(dentry, ppath, ppathlen); - if (ret) - return ret; - *pfreepath = true; - } + *ppath = dentry->d_name.name; + *ppathlen = dentry->d_name.len; return 0; } rcu_read_unlock(); @@ -2331,9 +2283,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, (!!req->r_inode_drop + !!req->r_dentry_drop + !!req->r_old_inode_drop + !!req->r_old_dentry_drop); if (req->r_dentry_drop) - len += req->r_dentry->d_name.len; + len += pathlen1; if (req->r_old_dentry_drop) - len += req->r_old_dentry->d_name.len; + len += pathlen2; msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false); if (!msg) { @@ -2410,10 +2362,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, out_free2: if (freepath2) - kfree((char *)path2); + ceph_mdsc_free_path((char *)path2, pathlen2); out_free1: if (freepath1) - kfree((char *)path1); + ceph_mdsc_free_path((char *)path1, pathlen1); out: return msg; } @@ -2427,8 +2379,7 @@ static void complete_request(struct ceph_mds_client *mdsc, { if (req->r_callback) req->r_callback(mdsc, req); - else - complete_all(&req->r_completion); + complete_all(&req->r_completion); } /* @@ -2670,28 +2621,11 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) } } -void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, +int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, struct ceph_mds_request *req) { - dout("submit_request on %p\n", req); - mutex_lock(&mdsc->mutex); - __register_request(mdsc, req, NULL); - __do_request(mdsc, req); - mutex_unlock(&mdsc->mutex); -} - -/* - * Synchrously perform an mds request. Take care of all of the - * session setup, forwarding, retry details. - */ -int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, - struct inode *dir, - struct ceph_mds_request *req) -{ int err; - dout("do_request on %p\n", req); - /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */ if (req->r_inode) ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); @@ -2701,18 +2635,21 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), CEPH_CAP_PIN); - /* issue */ + dout("submit_request on %p for inode %p\n", req, dir); mutex_lock(&mdsc->mutex); __register_request(mdsc, req, dir); __do_request(mdsc, req); + err = req->r_err; + mutex_unlock(&mdsc->mutex); + return err; +} - if (req->r_err) { - err = req->r_err; - goto out; - } +static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc, + struct ceph_mds_request *req) +{ + int err; /* wait */ - mutex_unlock(&mdsc->mutex); dout("do_request waiting\n"); if (!req->r_timeout && req->r_wait_for_completion) { err = req->r_wait_for_completion(mdsc, req); @@ -2753,8 +2690,26 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, err = req->r_err; } -out: mutex_unlock(&mdsc->mutex); + return err; +} + +/* + * Synchrously perform an mds request. Take care of all of the + * session setup, forwarding, retry details. + */ +int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, + struct inode *dir, + struct ceph_mds_request *req) +{ + int err; + + dout("do_request on %p\n", req); + + /* issue */ + err = ceph_mdsc_submit_request(mdsc, dir, req); + if (!err) + err = ceph_mdsc_wait_request(mdsc, req); dout("do_request %p done, result %d\n", req, err); return err; } @@ -3485,7 +3440,7 @@ out_freeflocks: ceph_pagelist_encode_string(pagelist, path, pathlen); ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); out_freepath: - kfree(path); + ceph_mdsc_free_path(path, pathlen); } out_err: @@ -3642,7 +3597,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, recon_state.msg_version = 2; } /* trsaverse this session's caps */ - err = iterate_session_caps(session, encode_caps_cb, &recon_state); + err = ceph_iterate_session_caps(session, encode_caps_cb, &recon_state); spin_lock(&session->s_cap_lock); session->s_cap_reconnect = 0; @@ -4125,6 +4080,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) mdsc->max_sessions = 0; mdsc->stopping = 0; atomic64_set(&mdsc->quotarealms_count, 0); + mdsc->quotarealms_inodes = RB_ROOT; + mutex_init(&mdsc->quotarealms_inodes_mutex); mdsc->last_snap_seq = 0; init_rwsem(&mdsc->snap_rwsem); mdsc->snap_realms = RB_ROOT; @@ -4216,6 +4173,8 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) * their inode/dcache refs */ ceph_msgr_flush(); + + ceph_cleanup_quotarealms_inodes(mdsc); } /* diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 50385a481fdb..a83f28bc2387 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -326,6 +326,18 @@ struct ceph_snapid_map { }; /* + * node for list of quotarealm inodes that are not visible from the filesystem + * mountpoint, but required to handle, e.g. quotas. + */ +struct ceph_quotarealm_inode { + struct rb_node node; + u64 ino; + unsigned long timeout; /* last time a lookup failed for this inode */ + struct mutex mutex; + struct inode *inode; +}; + +/* * mds client state */ struct ceph_mds_client { @@ -344,6 +356,12 @@ struct ceph_mds_client { int stopping; /* true if shutting down */ atomic64_t quotarealms_count; /* # realms with quota */ + /* + * We keep a list of inodes we don't see in the mountpoint but that we + * need to track quota realms. + */ + struct rb_root quotarealms_inodes; + struct mutex quotarealms_inodes_mutex; /* * snap_rwsem will cover cap linkage into snaprealms, and @@ -447,8 +465,9 @@ extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, struct inode *dir); extern struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); -extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, - struct ceph_mds_request *req); +extern int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, + struct inode *dir, + struct ceph_mds_request *req); extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, struct inode *dir, struct ceph_mds_request *req); @@ -468,8 +487,18 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc); extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr); +extern int ceph_iterate_session_caps(struct ceph_mds_session *session, + int (*cb)(struct inode *, + struct ceph_cap *, void *), + void *arg); extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); +static inline void ceph_mdsc_free_path(char *path, int len) +{ + if (path) + __putname(path - (PATH_MAX - 1 - len)); +} + extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, int stop_on_nosnap); diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 1a2c5d390f7f..701b4fb0fb5a 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -205,7 +205,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", i+1, n, global_id, mds, inc, - ceph_pr_addr(&addr.in_addr), + ceph_pr_addr(&addr), ceph_mds_state_name(state)); if (mds < 0 || state <= 0) diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 9455d3aef0c3..c4522212872c 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -22,7 +22,16 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) static inline bool ceph_has_realms_with_quotas(struct inode *inode) { struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; - return atomic64_read(&mdsc->quotarealms_count) > 0; + struct super_block *sb = mdsc->fsc->sb; + + if (atomic64_read(&mdsc->quotarealms_count) > 0) + return true; + /* if root is the real CephFS root, we don't have quota realms */ + if (sb->s_root->d_inode && + (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT)) + return false; + /* otherwise, we can't know for sure */ + return true; } void ceph_handle_quota(struct ceph_mds_client *mdsc, @@ -68,6 +77,108 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, iput(inode); } +static struct ceph_quotarealm_inode * +find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino) +{ + struct ceph_quotarealm_inode *qri = NULL; + struct rb_node **node, *parent = NULL; + + mutex_lock(&mdsc->quotarealms_inodes_mutex); + node = &(mdsc->quotarealms_inodes.rb_node); + while (*node) { + parent = *node; + qri = container_of(*node, struct ceph_quotarealm_inode, node); + + if (ino < qri->ino) + node = &((*node)->rb_left); + else if (ino > qri->ino) + node = &((*node)->rb_right); + else + break; + } + if (!qri || (qri->ino != ino)) { + /* Not found, create a new one and insert it */ + qri = kmalloc(sizeof(*qri), GFP_KERNEL); + if (qri) { + qri->ino = ino; + qri->inode = NULL; + qri->timeout = 0; + mutex_init(&qri->mutex); + rb_link_node(&qri->node, parent, node); + rb_insert_color(&qri->node, &mdsc->quotarealms_inodes); + } else + pr_warn("Failed to alloc quotarealms_inode\n"); + } + mutex_unlock(&mdsc->quotarealms_inodes_mutex); + + return qri; +} + +/* + * This function will try to lookup a realm inode which isn't visible in the + * filesystem mountpoint. A list of these kind of inodes (not visible) is + * maintained in the mdsc and freed only when the filesystem is umounted. + * + * Note that these inodes are kept in this list even if the lookup fails, which + * allows to prevent useless lookup requests. + */ +static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, + struct super_block *sb, + struct ceph_snap_realm *realm) +{ + struct ceph_quotarealm_inode *qri; + struct inode *in; + + qri = find_quotarealm_inode(mdsc, realm->ino); + if (!qri) + return NULL; + + mutex_lock(&qri->mutex); + if (qri->inode) { + /* A request has already returned the inode */ + mutex_unlock(&qri->mutex); + return qri->inode; + } + /* Check if this inode lookup has failed recently */ + if (qri->timeout && + time_before_eq(jiffies, qri->timeout)) { + mutex_unlock(&qri->mutex); + return NULL; + } + in = ceph_lookup_inode(sb, realm->ino); + if (IS_ERR(in)) { + pr_warn("Can't lookup inode %llx (err: %ld)\n", + realm->ino, PTR_ERR(in)); + qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */ + } else { + qri->timeout = 0; + qri->inode = in; + } + mutex_unlock(&qri->mutex); + + return in; +} + +void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc) +{ + struct ceph_quotarealm_inode *qri; + struct rb_node *node; + + /* + * It should now be safe to clean quotarealms_inode tree without holding + * mdsc->quotarealms_inodes_mutex... + */ + mutex_lock(&mdsc->quotarealms_inodes_mutex); + while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) { + node = rb_first(&mdsc->quotarealms_inodes); + qri = rb_entry(node, struct ceph_quotarealm_inode, node); + rb_erase(node, &mdsc->quotarealms_inodes); + iput(qri->inode); + kfree(qri); + } + mutex_unlock(&mdsc->quotarealms_inodes_mutex); +} + /* * This function walks through the snaprealm for an inode and returns the * ceph_snap_realm for the first snaprealm that has quotas set (either max_files @@ -76,9 +187,15 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, * * Note that the caller is responsible for calling ceph_put_snap_realm() on the * returned realm. + * + * Callers of this function need to hold mdsc->snap_rwsem. However, if there's + * a need to do an inode lookup, this rwsem will be temporarily dropped. Hence + * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false' + * this function will return -EAGAIN; otherwise, the snaprealms walk-through + * will be restarted. */ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, - struct inode *inode) + struct inode *inode, bool retry) { struct ceph_inode_info *ci = NULL; struct ceph_snap_realm *realm, *next; @@ -88,6 +205,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, if (ceph_snap(inode) != CEPH_NOSNAP) return NULL; +restart: realm = ceph_inode(inode)->i_snap_realm; if (realm) ceph_get_snap_realm(mdsc, realm); @@ -95,11 +213,25 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) " "null i_snap_realm\n", ceph_vinop(inode)); while (realm) { + bool has_inode; + spin_lock(&realm->inodes_with_caps_lock); - in = realm->inode ? igrab(realm->inode) : NULL; + has_inode = realm->inode; + in = has_inode ? igrab(realm->inode) : NULL; spin_unlock(&realm->inodes_with_caps_lock); - if (!in) + if (has_inode && !in) break; + if (!in) { + up_read(&mdsc->snap_rwsem); + in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm); + down_read(&mdsc->snap_rwsem); + if (IS_ERR_OR_NULL(in)) + break; + ceph_put_snap_realm(mdsc, realm); + if (!retry) + return ERR_PTR(-EAGAIN); + goto restart; + } ci = ceph_inode(in); has_quota = __ceph_has_any_quota(ci); @@ -125,9 +257,22 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) struct ceph_snap_realm *old_realm, *new_realm; bool is_same; +restart: + /* + * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem. + * However, get_quota_realm may drop it temporarily. By setting the + * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was + * dropped and we can then restart the whole operation. + */ down_read(&mdsc->snap_rwsem); - old_realm = get_quota_realm(mdsc, old); - new_realm = get_quota_realm(mdsc, new); + old_realm = get_quota_realm(mdsc, old, true); + new_realm = get_quota_realm(mdsc, new, false); + if (PTR_ERR(new_realm) == -EAGAIN) { + up_read(&mdsc->snap_rwsem); + if (old_realm) + ceph_put_snap_realm(mdsc, old_realm); + goto restart; + } is_same = (old_realm == new_realm); up_read(&mdsc->snap_rwsem); @@ -166,6 +311,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, return false; down_read(&mdsc->snap_rwsem); +restart: realm = ceph_inode(inode)->i_snap_realm; if (realm) ceph_get_snap_realm(mdsc, realm); @@ -173,12 +319,23 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) " "null i_snap_realm\n", ceph_vinop(inode)); while (realm) { + bool has_inode; + spin_lock(&realm->inodes_with_caps_lock); - in = realm->inode ? igrab(realm->inode) : NULL; + has_inode = realm->inode; + in = has_inode ? igrab(realm->inode) : NULL; spin_unlock(&realm->inodes_with_caps_lock); - if (!in) + if (has_inode && !in) break; - + if (!in) { + up_read(&mdsc->snap_rwsem); + in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm); + down_read(&mdsc->snap_rwsem); + if (IS_ERR_OR_NULL(in)) + break; + ceph_put_snap_realm(mdsc, realm); + goto restart; + } ci = ceph_inode(in); spin_lock(&ci->i_ceph_lock); if (op == QUOTA_CHECK_MAX_FILES_OP) { @@ -314,7 +471,7 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) bool is_updated = false; down_read(&mdsc->snap_rwsem); - realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root)); + realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true); up_read(&mdsc->snap_rwsem); if (!realm) return false; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 285edda4fc3b..c864b44c8341 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -845,6 +845,12 @@ static void ceph_umount_begin(struct super_block *sb) return; } +static int ceph_remount(struct super_block *sb, int *flags, char *data) +{ + sync_filesystem(sb); + return 0; +} + static const struct super_operations ceph_super_ops = { .alloc_inode = ceph_alloc_inode, .destroy_inode = ceph_destroy_inode, @@ -853,6 +859,7 @@ static const struct super_operations ceph_super_ops = { .drop_inode = ceph_drop_inode, .sync_fs = ceph_sync_fs, .put_super = ceph_put_super, + .remount_fs = ceph_remount, .show_options = ceph_show_options, .statfs = ceph_statfs, .umount_begin = ceph_umount_begin, diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c5b4a05905c0..6edab9a750f8 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1083,6 +1083,7 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* export.c */ extern const struct export_operations ceph_export_ops; +struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino); /* locks.c */ extern __init void ceph_flock_init(void); @@ -1133,5 +1134,6 @@ extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newlen); extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf); +extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc); #endif /* _FS_CEPH_SUPER_H */ diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 6a69f11aacf7..45e74da40f3a 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -380,6 +380,8 @@ skip_rdma: atomic_read(&server->in_send), atomic_read(&server->num_waiters)); #endif + /* dump session id helpful for use with network trace */ + seq_printf(m, " SessionId: 0x%llx", ses->Suid); if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) seq_puts(m, " encrypted"); if (ses->sign) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index b1a5fcfa3ce1..f5fcd6360056 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -878,6 +878,9 @@ out: static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) { + struct cifsFileInfo *cfile = file->private_data; + struct cifs_tcon *tcon; + /* * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate * the cached file length @@ -909,6 +912,12 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) if (rc < 0) return (loff_t)rc; } + if (cfile && cfile->tlink) { + tcon = tlink_tcon(cfile->tlink); + if (tcon->ses->server->ops->llseek) + return tcon->ses->server->ops->llseek(file, tcon, + offset, whence); + } return generic_file_llseek(file, offset, whence); } @@ -1070,11 +1079,6 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, cifs_dbg(FYI, "copychunk range\n"); - if (src_inode == target_inode) { - rc = -EINVAL; - goto out; - } - if (!src_file->private_data || !dst_file->private_data) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 33c251b408aa..334ff5f9c3f3 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -497,6 +497,8 @@ struct smb_version_operations { /* version specific fiemap implementation */ int (*fiemap)(struct cifs_tcon *tcon, struct cifsFileInfo *, struct fiemap_extent_info *, u64, u64); + /* version specific llseek implementation */ + loff_t (*llseek)(struct file *, struct cifs_tcon *, loff_t, int); }; struct smb_version_values { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 084756cfdaee..8c4121da624e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -528,6 +528,21 @@ cifs_reconnect(struct TCP_Server_Info *server) /* do not want to be sending data on a socket we are freeing */ cifs_dbg(FYI, "%s: tearing down socket\n", __func__); mutex_lock(&server->srv_mutex); + if (server->ssocket) { + cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n", + server->ssocket->state, server->ssocket->flags); + kernel_sock_shutdown(server->ssocket, SHUT_WR); + cifs_dbg(FYI, "Post shutdown state: 0x%x Flags: 0x%lx\n", + server->ssocket->state, server->ssocket->flags); + sock_release(server->ssocket); + server->ssocket = NULL; + } + server->sequence_number = 0; + server->session_estab = false; + kfree(server->session_key.response); + server->session_key.response = NULL; + server->session_key.len = 0; + server->lstrp = jiffies; /* mark submitted MIDs for retry and issue callback */ INIT_LIST_HEAD(&retry_list); @@ -540,6 +555,7 @@ cifs_reconnect(struct TCP_Server_Info *server) list_move(&mid_entry->qhead, &retry_list); } spin_unlock(&GlobalMid_Lock); + mutex_unlock(&server->srv_mutex); cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__); list_for_each_safe(tmp, tmp2, &retry_list) { @@ -548,24 +564,11 @@ cifs_reconnect(struct TCP_Server_Info *server) mid_entry->callback(mid_entry); } - if (server->ssocket) { - cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n", - server->ssocket->state, server->ssocket->flags); - kernel_sock_shutdown(server->ssocket, SHUT_WR); - cifs_dbg(FYI, "Post shutdown state: 0x%x Flags: 0x%lx\n", - server->ssocket->state, server->ssocket->flags); - sock_release(server->ssocket); - server->ssocket = NULL; - } else if (cifs_rdma_enabled(server)) + if (cifs_rdma_enabled(server)) { + mutex_lock(&server->srv_mutex); smbd_destroy(server); - server->sequence_number = 0; - server->session_estab = false; - kfree(server->session_key.response); - server->session_key.response = NULL; - server->session_key.len = 0; - server->lstrp = jiffies; - - mutex_unlock(&server->srv_mutex); + mutex_unlock(&server->srv_mutex); + } do { try_to_freeze(); @@ -2443,6 +2446,10 @@ match_port(struct TCP_Server_Info *server, struct sockaddr *addr) { __be16 port, *sport; + /* SMBDirect manages its own ports, don't match it here */ + if (server->rdma) + return true; + switch (addr->sa_family) { case AF_INET: sport = &((struct sockaddr_in *) &server->dstaddr)->sin_port; diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 7ede7306599f..1e21b2528cfb 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -77,7 +77,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) goto name_is_IP_address; /* Perform the upcall */ - rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL); + rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL, false); if (rc < 0) cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n", __func__, len, len, hostname); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index a930c8965e5c..e921e6511728 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * SMB2 version specific operations * @@ -282,7 +283,7 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf) __u64 wire_mid = le64_to_cpu(shdr->MessageId); if (shdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) { - cifs_dbg(VFS, "encrypted frame parsing not supported yet"); + cifs_dbg(VFS, "Encrypted frame parsing not supported yet\n"); return NULL; } @@ -324,6 +325,7 @@ static int smb2_negotiate(const unsigned int xid, struct cifs_ses *ses) { int rc; + ses->server->CurrentMid = 0; rc = SMB2_negotiate(xid, ses); /* BB we probably don't need to retry with modern servers */ @@ -789,8 +791,6 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); else close_shroot(&tcon->crfid); - - return; } static void @@ -818,7 +818,6 @@ smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid, FS_DEVICE_INFORMATION); SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); - return; } static int @@ -906,9 +905,8 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size, value = &src->ea_data[src->ea_name_length + 1]; value_len = (size_t)le16_to_cpu(src->ea_value_length); - if (name_len == 0) { + if (name_len == 0) break; - } if (src_size < 8 + name_len + 1 + value_len) { cifs_dbg(FYI, "EA entry goes beyond length of list\n"); @@ -1161,6 +1159,7 @@ static void smb2_clear_stats(struct cifs_tcon *tcon) { int i; + for (i = 0; i < NUMBER_OF_SMB2_COMMANDS; i++) { atomic_set(&tcon->stats.smb2_stats.smb2_com_sent[i], 0); atomic_set(&tcon->stats.smb2_stats.smb2_com_failed[i], 0); @@ -1529,7 +1528,7 @@ smb2_copychunk_range(const unsigned int xid, if (pcchunk == NULL) return -ENOMEM; - cifs_dbg(FYI, "in smb2_copychunk_range - about to call request res key\n"); + cifs_dbg(FYI, "%s: about to call request res key\n", __func__); /* Request a key from the server to identify the source of the copy */ rc = SMB2_request_res_key(xid, tlink_tcon(srcfile->tlink), srcfile->fid.persistent_fid, @@ -1649,6 +1648,7 @@ static unsigned int smb2_read_data_offset(char *buf) { struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf; + return rsp->DataOffset; } @@ -1777,7 +1777,7 @@ smb2_duplicate_extents(const unsigned int xid, dup_ext_buf.SourceFileOffset = cpu_to_le64(src_off); dup_ext_buf.TargetFileOffset = cpu_to_le64(dest_off); dup_ext_buf.ByteCount = cpu_to_le64(len); - cifs_dbg(FYI, "duplicate extents: src off %lld dst off %lld len %lld", + cifs_dbg(FYI, "Duplicate extents: src off %lld dst off %lld len %lld\n", src_off, dest_off, len); rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false); @@ -1794,7 +1794,7 @@ smb2_duplicate_extents(const unsigned int xid, &ret_data_len); if (ret_data_len > 0) - cifs_dbg(FYI, "non-zero response length in duplicate extents"); + cifs_dbg(FYI, "Non-zero response length in duplicate extents\n"); duplicate_extents_out: return rc; @@ -1983,9 +1983,9 @@ smb2_close_dir(const unsigned int xid, struct cifs_tcon *tcon, } /* -* If we negotiate SMB2 protocol and get STATUS_PENDING - update -* the number of credits and return true. Otherwise - return false. -*/ + * If we negotiate SMB2 protocol and get STATUS_PENDING - update + * the number of credits and return true. Otherwise - return false. + */ static bool smb2_is_status_pending(char *buf, struct TCP_Server_Info *server) { @@ -2306,7 +2306,7 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, struct get_dfs_referral_rsp *dfs_rsp = NULL; u32 dfs_req_size = 0, dfs_rsp_size = 0; - cifs_dbg(FYI, "smb2_get_dfs_refer path <%s>\n", search_name); + cifs_dbg(FYI, "%s: path: %s\n", __func__, search_name); /* * Try to use the IPC tcon, otherwise just use any @@ -2360,7 +2360,7 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, if (rc) { if ((rc != -ENOENT) && (rc != -EOPNOTSUPP)) - cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc); + cifs_dbg(VFS, "ioctl error in %s rc=%d\n", __func__, rc); goto out; } @@ -2369,7 +2369,7 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, nls_codepage, remap, search_name, true /* is_unicode */); if (rc) { - cifs_dbg(VFS, "parse error in smb2_get_dfs_refer rc=%d\n", rc); + cifs_dbg(VFS, "parse error in %s rc=%d\n", __func__, rc); goto out; } @@ -2745,7 +2745,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, inode = d_inode(cfile->dentry); cifsi = CIFS_I(inode); - trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid, + trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid, ses->Suid, offset, len); @@ -2759,7 +2759,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, return rc; } - cifs_dbg(FYI, "offset %lld len %lld", offset, len); + cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len); fsctl_buf.FileOffset = cpu_to_le64(offset); fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); @@ -2816,7 +2816,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, return rc; } - cifs_dbg(FYI, "offset %lld len %lld", offset, len); + cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len); fsctl_buf.FileOffset = cpu_to_le64(offset); fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); @@ -2922,6 +2922,90 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, return rc; } +static loff_t smb3_llseek(struct file *file, struct cifs_tcon *tcon, loff_t offset, int whence) +{ + struct cifsFileInfo *wrcfile, *cfile = file->private_data; + struct cifsInodeInfo *cifsi; + struct inode *inode; + int rc = 0; + struct file_allocated_range_buffer in_data, *out_data = NULL; + u32 out_data_len; + unsigned int xid; + + if (whence != SEEK_HOLE && whence != SEEK_DATA) + return generic_file_llseek(file, offset, whence); + + inode = d_inode(cfile->dentry); + cifsi = CIFS_I(inode); + + if (offset < 0 || offset >= i_size_read(inode)) + return -ENXIO; + + xid = get_xid(); + /* + * We need to be sure that all dirty pages are written as they + * might fill holes on the server. + * Note that we also MUST flush any written pages since at least + * some servers (Windows2016) will not reflect recent writes in + * QUERY_ALLOCATED_RANGES until SMB2_flush is called. + */ + wrcfile = find_writable_file(cifsi, false); + if (wrcfile) { + filemap_write_and_wait(inode->i_mapping); + smb2_flush_file(xid, tcon, &wrcfile->fid); + cifsFileInfo_put(wrcfile); + } + + if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE)) { + if (whence == SEEK_HOLE) + offset = i_size_read(inode); + goto lseek_exit; + } + + in_data.file_offset = cpu_to_le64(offset); + in_data.length = cpu_to_le64(i_size_read(inode)); + + rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, + FSCTL_QUERY_ALLOCATED_RANGES, true, + (char *)&in_data, sizeof(in_data), + sizeof(struct file_allocated_range_buffer), + (char **)&out_data, &out_data_len); + if (rc == -E2BIG) + rc = 0; + if (rc) + goto lseek_exit; + + if (whence == SEEK_HOLE && out_data_len == 0) + goto lseek_exit; + + if (whence == SEEK_DATA && out_data_len == 0) { + rc = -ENXIO; + goto lseek_exit; + } + + if (out_data_len < sizeof(struct file_allocated_range_buffer)) { + rc = -EINVAL; + goto lseek_exit; + } + if (whence == SEEK_DATA) { + offset = le64_to_cpu(out_data->file_offset); + goto lseek_exit; + } + if (offset < le64_to_cpu(out_data->file_offset)) + goto lseek_exit; + + offset = le64_to_cpu(out_data->file_offset) + le64_to_cpu(out_data->length); + + lseek_exit: + free_xid(xid); + kfree(out_data); + if (!rc) + return vfs_setpos(file, offset, inode->i_sb->s_maxbytes); + else + return rc; +} + static int smb3_fiemap(struct cifs_tcon *tcon, struct cifsFileInfo *cfile, struct fiemap_extent_info *fei, u64 start, u64 len) @@ -3384,7 +3468,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, req = aead_request_alloc(tfm, GFP_KERNEL); if (!req) { - cifs_dbg(VFS, "%s: Failed to alloc aead request", __func__); + cifs_dbg(VFS, "%s: Failed to alloc aead request\n", __func__); return -ENOMEM; } @@ -3395,7 +3479,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, sg = init_sg(num_rqst, rqst, sign); if (!sg) { - cifs_dbg(VFS, "%s: Failed to init sg", __func__); + cifs_dbg(VFS, "%s: Failed to init sg\n", __func__); rc = -ENOMEM; goto free_req; } @@ -3403,7 +3487,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, iv_len = crypto_aead_ivsize(tfm); iv = kzalloc(iv_len, GFP_KERNEL); if (!iv) { - cifs_dbg(VFS, "%s: Failed to alloc IV", __func__); + cifs_dbg(VFS, "%s: Failed to alloc iv\n", __func__); rc = -ENOMEM; goto free_sg; } @@ -3511,7 +3595,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, fill_transform_hdr(tr_hdr, orig_len, old_rq); rc = crypt_message(server, num_rqst, new_rq, 1); - cifs_dbg(FYI, "encrypt message returned %d", rc); + cifs_dbg(FYI, "Encrypt message returned %d\n", rc); if (rc) goto err_free; @@ -3552,7 +3636,7 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf, rqst.rq_tailsz = (page_data_size % PAGE_SIZE) ? : PAGE_SIZE; rc = crypt_message(server, 1, &rqst, 0); - cifs_dbg(FYI, "decrypt message returned %d\n", rc); + cifs_dbg(FYI, "Decrypt message returned %d\n", rc); if (rc) return rc; @@ -4166,6 +4250,7 @@ struct smb_version_operations smb20_operations = { .ioctl_query_info = smb2_ioctl_query_info, .make_node = smb2_make_node, .fiemap = smb3_fiemap, + .llseek = smb3_llseek, }; struct smb_version_operations smb21_operations = { @@ -4266,6 +4351,7 @@ struct smb_version_operations smb21_operations = { .ioctl_query_info = smb2_ioctl_query_info, .make_node = smb2_make_node, .fiemap = smb3_fiemap, + .llseek = smb3_llseek, }; struct smb_version_operations smb30_operations = { @@ -4375,6 +4461,7 @@ struct smb_version_operations smb30_operations = { .ioctl_query_info = smb2_ioctl_query_info, .make_node = smb2_make_node, .fiemap = smb3_fiemap, + .llseek = smb3_llseek, }; struct smb_version_operations smb311_operations = { @@ -4485,6 +4572,7 @@ struct smb_version_operations smb311_operations = { .ioctl_query_info = smb2_ioctl_query_info, .make_node = smb2_make_node, .fiemap = smb3_fiemap, + .llseek = smb3_llseek, }; struct smb_version_values smb20_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 29f011d8d8e2..710ceb875161 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2538,11 +2538,25 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, struct kvec *iov = rqst->rq_iov; unsigned int total_len; int rc; + char *in_data_buf; rc = smb2_plain_req_init(SMB2_IOCTL, tcon, (void **) &req, &total_len); if (rc) return rc; + if (indatalen) { + /* + * indatalen is usually small at a couple of bytes max, so + * just allocate through generic pool + */ + in_data_buf = kmalloc(indatalen, GFP_NOFS); + if (!in_data_buf) { + cifs_small_buf_release(req); + return -ENOMEM; + } + memcpy(in_data_buf, in_data, indatalen); + } + req->CtlCode = cpu_to_le32(opcode); req->PersistentFileId = persistent_fid; req->VolatileFileId = volatile_fid; @@ -2563,7 +2577,7 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, cpu_to_le32(offsetof(struct smb2_ioctl_req, Buffer)); rqst->rq_nvec = 2; iov[0].iov_len = total_len - 1; - iov[1].iov_base = in_data; + iov[1].iov_base = in_data_buf; iov[1].iov_len = indatalen; } else { rqst->rq_nvec = 1; @@ -2605,8 +2619,11 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, void SMB2_ioctl_free(struct smb_rqst *rqst) { - if (rqst && rqst->rq_iov) + if (rqst && rqst->rq_iov) { cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */ + if (rqst->rq_iov[1].iov_len) + kfree(rqst->rq_iov[1].iov_base); + } } diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 251ef1223206..caac37b1de8c 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -903,7 +903,7 @@ static int smbd_create_header(struct smbd_connection *info, request->sge[0].addr = ib_dma_map_single(info->id->device, (void *)packet, header_length, - DMA_BIDIRECTIONAL); + DMA_TO_DEVICE); if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { mempool_free(request, info->request_mempool); rc = -EIO; @@ -1005,7 +1005,7 @@ static int smbd_post_send_sgl(struct smbd_connection *info, for_each_sg(sgl, sg, num_sgs, i) { request->sge[i+1].addr = ib_dma_map_page(info->id->device, sg_page(sg), - sg->offset, sg->length, DMA_BIDIRECTIONAL); + sg->offset, sg->length, DMA_TO_DEVICE); if (ib_dma_mapping_error( info->id->device, request->sge[i+1].addr)) { rc = -EIO; @@ -2110,8 +2110,10 @@ int smbd_send(struct TCP_Server_Info *server, goto done; } - rqst_idx = 0; + log_write(INFO, "num_rqst=%d total length=%u\n", + num_rqst, remaining_data_length); + rqst_idx = 0; next_rqst: rqst = &rqst_array[rqst_idx]; iov = rqst->rq_iov; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 9a16ff4b9f5e..60661b3f983a 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -33,7 +33,7 @@ #include <linux/uaccess.h> #include <asm/processor.h> #include <linux/mempool.h> -#include <linux/signal.h> +#include <linux/sched/signal.h> #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 591e82ba443c..5e7932d668ab 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1757,12 +1757,19 @@ int configfs_register_group(struct config_group *parent_group, inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); ret = create_default_group(parent_group, group); - if (!ret) { - spin_lock(&configfs_dirent_lock); - configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata); - spin_unlock(&configfs_dirent_lock); - } + if (ret) + goto err_out; + + spin_lock(&configfs_dirent_lock); + configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata); + spin_unlock(&configfs_dirent_lock); + inode_unlock(d_inode(parent)); + return 0; +err_out: inode_unlock(d_inode(parent)); + mutex_lock(&subsys->su_mutex); + unlink_group(group); + mutex_unlock(&subsys->su_mutex); return ret; } EXPORT_SYMBOL(configfs_register_group); diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 968f163b5feb..8e83741b02e0 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -142,7 +142,8 @@ static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino) struct inode *inode; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_map_blocks map; - u32 i = 0, err = 0, num, n; + u32 i = 0, num; + int err = 0, n; if ((ino < EXT4_ROOT_INO) || (ino > le32_to_cpu(sbi->s_es->s_inodes_count))) @@ -276,6 +277,11 @@ int ext4_check_blockref(const char *function, unsigned int line, __le32 *bref = p; unsigned int blk; + if (ext4_has_feature_journal(inode->i_sb) && + (inode->i_ino == + le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) + return 0; + while (bref < p+max) { blk = le32_to_cpu(*bref++); if (blk && diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0f89f5190cd7..f2c62e2a0c98 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1035,6 +1035,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, __le32 border; ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ int err = 0; + size_t ext_size = 0; /* make decision: where to split? */ /* FIXME: now decision is simplest: at current extent */ @@ -1126,6 +1127,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, le16_add_cpu(&neh->eh_entries, m); } + /* zero out unused area in the extent block */ + ext_size = sizeof(struct ext4_extent_header) + + sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries); + memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size); ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1205,6 +1210,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, sizeof(struct ext4_extent_idx) * m); le16_add_cpu(&neh->eh_entries, m); } + /* zero out unused area in the extent block */ + ext_size = sizeof(struct ext4_extent_header) + + (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries)); + memset(bh->b_data + ext_size, 0, + inode->i_sb->s_blocksize - ext_size); ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1270,6 +1280,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, ext4_fsblk_t newblock, goal = 0; struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; int err = 0; + size_t ext_size = 0; /* Try to prepend new index to old one */ if (ext_depth(inode)) @@ -1295,9 +1306,11 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, goto out; } + ext_size = sizeof(EXT4_I(inode)->i_data); /* move top-level index/leaf into new block */ - memmove(bh->b_data, EXT4_I(inode)->i_data, - sizeof(EXT4_I(inode)->i_data)); + memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size); + /* zero out unused area in the extent block */ + memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size); /* set size of new block */ neh = ext_block_hdr(bh); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 98ec11f69cd4..2c5baa5e8291 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -264,6 +264,13 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } ret = __generic_file_write_iter(iocb, from); + /* + * Unaligned direct AIO must be the only IO in flight. Otherwise + * overlapping aligned IO after unaligned might result in data + * corruption. + */ + if (ret == -EIOCBQUEUED && unaligned_aio) + ext4_unwritten_wait(inode); inode_unlock(inode); if (ret > 0) diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 4b99e2db95b8..dbccf46f1770 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -626,7 +626,7 @@ int ext4_getfsmap(struct super_block *sb, struct ext4_fsmap_head *head, { struct ext4_fsmap dkeys[2]; /* per-dev keys */ struct ext4_getfsmap_dev handlers[EXT4_GETFSMAP_DEVS]; - struct ext4_getfsmap_info info = {0}; + struct ext4_getfsmap_info info = { NULL }; int i; int error = 0; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7e85ecf0b849..e486e49b31ed 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -608,7 +608,7 @@ static int ext4_getfsmap_format(struct ext4_fsmap *xfm, void *priv) static int ext4_ioc_getfsmap(struct super_block *sb, struct fsmap_head __user *arg) { - struct getfsmap_info info = {0}; + struct getfsmap_info info = { NULL }; struct ext4_fsmap_head xhead = {0}; struct fsmap_head head; bool aborted = false; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6d50f53b7a15..cd01c4a67ffb 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -872,12 +872,15 @@ static void dx_release(struct dx_frame *frames) { struct dx_root_info *info; int i; + unsigned int indirect_levels; if (frames[0].bh == NULL) return; info = &((struct dx_root *)frames[0].bh->b_data)->info; - for (i = 0; i <= info->indirect_levels; i++) { + /* save local copy, "info" may be freed after brelse() */ + indirect_levels = info->indirect_levels; + for (i = 0; i <= indirect_levels; i++) { if (frames[i].bh == NULL) break; brelse(frames[i].bh); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f71b5254a990..4079605d437a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -699,7 +699,7 @@ void __ext4_abort(struct super_block *sb, const char *function, jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); save_error_info(sb, function, line); } - if (test_opt(sb, ERRORS_PANIC)) { + if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) { if (EXT4_SB(sb)->s_journal && !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) return; @@ -4661,7 +4661,7 @@ failed_mount: #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); + kfree(get_qf_name(sb, sbi, i)); #endif ext4_blkdev_remove(sbi); brelse(bh); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 36855c1f8daf..b16645b417d9 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -523,8 +523,6 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) isw->inode = inode; - atomic_inc(&isw_nr_in_flight); - /* * In addition to synchronizing among switchers, I_WB_SWITCH tells * the RCU protected stat update paths to grab the i_page @@ -532,6 +530,9 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) * Let's continue after I_WB_SWITCH is guaranteed to be visible. */ call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); + + atomic_inc(&isw_nr_in_flight); + goto out_unlock; out_free: @@ -901,7 +902,11 @@ restart: void cgroup_writeback_umount(void) { if (atomic_read(&isw_nr_in_flight)) { - synchronize_rcu(); + /* + * Use rcu_barrier() to wait for all pending callbacks to + * ensure that all in-flight wb switches are in the workqueue. + */ + rcu_barrier(); flush_workqueue(isw_wq); } } diff --git a/fs/fsopen.c b/fs/fsopen.c index 3bb9c0c8cbcc..c2891e933ef1 100644 --- a/fs/fsopen.c +++ b/fs/fsopen.c @@ -92,7 +92,7 @@ static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags) { int fd; - fd = anon_inode_getfd("fscontext", &fscontext_fops, fc, + fd = anon_inode_getfd("[fscontext]", &fscontext_fops, fc, O_RDWR | o_flags); if (fd < 0) put_fs_context(fc); diff --git a/fs/io_uring.c b/fs/io_uring.c index fdc18321d70c..310f8d17c53e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -231,7 +231,6 @@ struct io_ring_ctx { struct task_struct *sqo_thread; /* if using sq thread polling */ struct mm_struct *sqo_mm; wait_queue_head_t sqo_wait; - unsigned sqo_stop; struct { /* CQ ring */ @@ -329,9 +328,8 @@ struct io_kiocb { #define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */ #define REQ_F_FIXED_FILE 4 /* ctx owns file */ #define REQ_F_SEQ_PREV 8 /* sequential with previous */ -#define REQ_F_PREPPED 16 /* prep already done */ -#define REQ_F_IO_DRAIN 32 /* drain existing IO first */ -#define REQ_F_IO_DRAINED 64 /* drain done */ +#define REQ_F_IO_DRAIN 16 /* drain existing IO first */ +#define REQ_F_IO_DRAINED 32 /* drain done */ u64 user_data; u32 error; /* iopoll result from callback */ u32 sequence; @@ -490,7 +488,7 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) } static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, - long res, unsigned ev_flags) + long res) { struct io_uring_cqe *cqe; @@ -503,7 +501,7 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, if (cqe) { WRITE_ONCE(cqe->user_data, ki_user_data); WRITE_ONCE(cqe->res, res); - WRITE_ONCE(cqe->flags, ev_flags); + WRITE_ONCE(cqe->flags, 0); } else { unsigned overflow = READ_ONCE(ctx->cq_ring->overflow); @@ -522,12 +520,12 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) } static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data, - long res, unsigned ev_flags) + long res) { unsigned long flags; spin_lock_irqsave(&ctx->completion_lock, flags); - io_cqring_fill_event(ctx, user_data, res, ev_flags); + io_cqring_fill_event(ctx, user_data, res); io_commit_cqring(ctx); spin_unlock_irqrestore(&ctx->completion_lock, flags); @@ -629,7 +627,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, req = list_first_entry(done, struct io_kiocb, list); list_del(&req->list); - io_cqring_fill_event(ctx, req->user_data, req->error, 0); + io_cqring_fill_event(ctx, req->user_data, req->error); (*nr_events)++; if (refcount_dec_and_test(&req->refs)) { @@ -777,7 +775,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) kiocb_end_write(kiocb); - io_cqring_add_event(req->ctx, req->user_data, res, 0); + io_cqring_add_event(req->ctx, req->user_data, res); io_put_req(req); } @@ -896,9 +894,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, if (!req->file) return -EBADF; - /* For -EAGAIN retry, everything is already prepped */ - if (req->flags & REQ_F_PREPPED) - return 0; if (force_nonblock && !io_file_supports_async(req->file)) force_nonblock = false; @@ -941,7 +936,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, return -EINVAL; kiocb->ki_complete = io_complete_rw; } - req->flags |= REQ_F_PREPPED; return 0; } @@ -1216,7 +1210,7 @@ static int io_nop(struct io_kiocb *req, u64 user_data) if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - io_cqring_add_event(ctx, user_data, err, 0); + io_cqring_add_event(ctx, user_data, err); io_put_req(req); return 0; } @@ -1227,16 +1221,12 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (!req->file) return -EBADF; - /* Prep already done (EAGAIN retry) */ - if (req->flags & REQ_F_PREPPED) - return 0; if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) return -EINVAL; - req->flags |= REQ_F_PREPPED; return 0; } @@ -1265,7 +1255,7 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, end > 0 ? end : LLONG_MAX, fsync_flags & IORING_FSYNC_DATASYNC); - io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); + io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; } @@ -1277,16 +1267,12 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (!req->file) return -EBADF; - /* Prep already done (EAGAIN retry) */ - if (req->flags & REQ_F_PREPPED) - return 0; if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) return -EINVAL; - req->flags |= REQ_F_PREPPED; return ret; } @@ -1313,7 +1299,7 @@ static int io_sync_file_range(struct io_kiocb *req, ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); - io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); + io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; } @@ -1371,7 +1357,7 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe) } spin_unlock_irq(&ctx->completion_lock); - io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); + io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; } @@ -1380,7 +1366,7 @@ static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req, __poll_t mask) { req->poll.done = true; - io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0); + io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask)); io_commit_cqring(ctx); } @@ -1700,7 +1686,7 @@ restart: io_put_req(req); if (ret) { - io_cqring_add_event(ctx, sqe->user_data, ret, 0); + io_cqring_add_event(ctx, sqe->user_data, ret); io_put_req(req); } @@ -2005,7 +1991,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, continue; } - io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret, 0); + io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret); } if (statep) @@ -2028,7 +2014,7 @@ static int io_sq_thread(void *data) set_fs(USER_DS); timeout = inflight = 0; - while (!kthread_should_stop() && !ctx->sqo_stop) { + while (!kthread_should_park()) { bool all_fixed, mm_fault = false; int i; @@ -2090,7 +2076,7 @@ static int io_sq_thread(void *data) smp_mb(); if (!io_get_sqring(ctx, &sqes[0])) { - if (kthread_should_stop()) { + if (kthread_should_park()) { finish_wait(&ctx->sqo_wait, &wait); break; } @@ -2140,8 +2126,7 @@ static int io_sq_thread(void *data) mmput(cur_mm); } - if (kthread_should_park()) - kthread_parkme(); + kthread_parkme(); return 0; } @@ -2170,7 +2155,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) ret = io_submit_sqe(ctx, &s, statep); if (ret) - io_cqring_add_event(ctx, s.sqe->user_data, ret, 0); + io_cqring_add_event(ctx, s.sqe->user_data, ret); } io_commit_sqring(ctx); @@ -2182,6 +2167,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) static unsigned io_cqring_events(struct io_cq_ring *ring) { + /* See comment at the top of this file */ + smp_rmb(); return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head); } @@ -2194,11 +2181,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, { struct io_cq_ring *ring = ctx->cq_ring; sigset_t ksigmask, sigsaved; - DEFINE_WAIT(wait); int ret; - /* See comment at the top of this file */ - smp_rmb(); if (io_cqring_events(ring) >= min_events) return 0; @@ -2216,23 +2200,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; } - do { - prepare_to_wait(&ctx->wait, &wait, TASK_INTERRUPTIBLE); - - ret = 0; - /* See comment at the top of this file */ - smp_rmb(); - if (io_cqring_events(ring) >= min_events) - break; - - schedule(); - + ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); + if (ret == -ERESTARTSYS) ret = -EINTR; - if (signal_pending(current)) - break; - } while (1); - - finish_wait(&ctx->wait, &wait); if (sig) restore_user_sigmask(sig, &sigsaved); @@ -2273,8 +2243,11 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) static void io_sq_thread_stop(struct io_ring_ctx *ctx) { if (ctx->sqo_thread) { - ctx->sqo_stop = 1; - mb(); + /* + * The park is a bit of a work-around, without it we get + * warning spews on shutdown with SQPOLL set and affinity + * set to a single CPU. + */ kthread_park(ctx->sqo_thread); kthread_stop(ctx->sqo_thread); ctx->sqo_thread = NULL; @@ -2467,10 +2440,11 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, ctx->sq_thread_idle = HZ; if (p->flags & IORING_SETUP_SQ_AFF) { - int cpu = array_index_nospec(p->sq_thread_cpu, - nr_cpu_ids); + int cpu = p->sq_thread_cpu; ret = -EINVAL; + if (cpu >= nr_cpu_ids) + goto err; if (!cpu_online(cpu)) goto err; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 37e16d969925..43df0c943229 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2375,22 +2375,19 @@ static struct kmem_cache *jbd2_journal_head_cache; static atomic_t nr_journal_heads = ATOMIC_INIT(0); #endif -static int jbd2_journal_init_journal_head_cache(void) +static int __init jbd2_journal_init_journal_head_cache(void) { - int retval; - - J_ASSERT(jbd2_journal_head_cache == NULL); + J_ASSERT(!jbd2_journal_head_cache); jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", sizeof(struct journal_head), 0, /* offset */ SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU, NULL); /* ctor */ - retval = 0; if (!jbd2_journal_head_cache) { - retval = -ENOMEM; printk(KERN_EMERG "JBD2: no memory for journal_head cache\n"); + return -ENOMEM; } - return retval; + return 0; } static void jbd2_journal_destroy_journal_head_cache(void) @@ -2636,28 +2633,38 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void) struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; +static int __init jbd2_journal_init_inode_cache(void) +{ + J_ASSERT(!jbd2_inode_cache); + jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0); + if (!jbd2_inode_cache) { + pr_emerg("JBD2: failed to create inode cache\n"); + return -ENOMEM; + } + return 0; +} + static int __init jbd2_journal_init_handle_cache(void) { + J_ASSERT(!jbd2_handle_cache); jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); - if (jbd2_handle_cache == NULL) { + if (!jbd2_handle_cache) { printk(KERN_EMERG "JBD2: failed to create handle cache\n"); return -ENOMEM; } - jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0); - if (jbd2_inode_cache == NULL) { - printk(KERN_EMERG "JBD2: failed to create inode cache\n"); - kmem_cache_destroy(jbd2_handle_cache); - return -ENOMEM; - } return 0; } +static void jbd2_journal_destroy_inode_cache(void) +{ + kmem_cache_destroy(jbd2_inode_cache); + jbd2_inode_cache = NULL; +} + static void jbd2_journal_destroy_handle_cache(void) { kmem_cache_destroy(jbd2_handle_cache); jbd2_handle_cache = NULL; - kmem_cache_destroy(jbd2_inode_cache); - jbd2_inode_cache = NULL; } /* @@ -2668,21 +2675,27 @@ static int __init journal_init_caches(void) { int ret; - ret = jbd2_journal_init_revoke_caches(); + ret = jbd2_journal_init_revoke_record_cache(); + if (ret == 0) + ret = jbd2_journal_init_revoke_table_cache(); if (ret == 0) ret = jbd2_journal_init_journal_head_cache(); if (ret == 0) ret = jbd2_journal_init_handle_cache(); if (ret == 0) + ret = jbd2_journal_init_inode_cache(); + if (ret == 0) ret = jbd2_journal_init_transaction_cache(); return ret; } static void jbd2_journal_destroy_caches(void) { - jbd2_journal_destroy_revoke_caches(); + jbd2_journal_destroy_revoke_record_cache(); + jbd2_journal_destroy_revoke_table_cache(); jbd2_journal_destroy_journal_head_cache(); jbd2_journal_destroy_handle_cache(); + jbd2_journal_destroy_inode_cache(); jbd2_journal_destroy_transaction_cache(); jbd2_journal_destroy_slabs(); } diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index a1143e57a718..69b9bc329964 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -178,33 +178,41 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal, return NULL; } -void jbd2_journal_destroy_revoke_caches(void) +void jbd2_journal_destroy_revoke_record_cache(void) { kmem_cache_destroy(jbd2_revoke_record_cache); jbd2_revoke_record_cache = NULL; +} + +void jbd2_journal_destroy_revoke_table_cache(void) +{ kmem_cache_destroy(jbd2_revoke_table_cache); jbd2_revoke_table_cache = NULL; } -int __init jbd2_journal_init_revoke_caches(void) +int __init jbd2_journal_init_revoke_record_cache(void) { J_ASSERT(!jbd2_revoke_record_cache); - J_ASSERT(!jbd2_revoke_table_cache); - jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s, SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY); - if (!jbd2_revoke_record_cache) - goto record_cache_failure; + if (!jbd2_revoke_record_cache) { + pr_emerg("JBD2: failed to create revoke_record cache\n"); + return -ENOMEM; + } + return 0; +} + +int __init jbd2_journal_init_revoke_table_cache(void) +{ + J_ASSERT(!jbd2_revoke_table_cache); jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s, SLAB_TEMPORARY); - if (!jbd2_revoke_table_cache) - goto table_cache_failure; - return 0; -table_cache_failure: - jbd2_journal_destroy_revoke_caches(); -record_cache_failure: + if (!jbd2_revoke_table_cache) { + pr_emerg("JBD2: failed to create revoke_table cache\n"); return -ENOMEM; + } + return 0; } static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index f940d31c2adc..8ca4fddc705f 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -42,9 +42,11 @@ int __init jbd2_journal_init_transaction_cache(void) 0, SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, NULL); - if (transaction_cache) - return 0; - return -ENOMEM; + if (!transaction_cache) { + pr_emerg("JBD2: failed to create transaction cache\n"); + return -ENOMEM; + } + return 0; } void jbd2_journal_destroy_transaction_cache(void) diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 70f520b41a19..5fb4f8910aab 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -56,7 +56,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4; int status; - status = lockd_up(nlm_init->net); + status = lockd_up(nlm_init->net, nlm_init->cred); if (status < 0) return ERR_PTR(status); @@ -241,7 +241,7 @@ reclaimer(void *ptr) allow_signal(SIGKILL); down_write(&host->h_rwsem); - lockd_up(net); /* note: this cannot fail as lockd is already running */ + lockd_up(net, NULL); /* note: this cannot fail as lockd is already running */ dprintk("lockd: reclaiming locks for host %s\n", host->h_name); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 346ed161756d..3056f3a0c270 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -188,28 +188,31 @@ lockd(void *vrqstp) static int create_lockd_listener(struct svc_serv *serv, const char *name, struct net *net, const int family, - const unsigned short port) + const unsigned short port, + const struct cred *cred) { struct svc_xprt *xprt; xprt = svc_find_xprt(serv, name, net, family, 0); if (xprt == NULL) return svc_create_xprt(serv, name, net, family, port, - SVC_SOCK_DEFAULTS); + SVC_SOCK_DEFAULTS, cred); svc_xprt_put(xprt); return 0; } static int create_lockd_family(struct svc_serv *serv, struct net *net, - const int family) + const int family, const struct cred *cred) { int err; - err = create_lockd_listener(serv, "udp", net, family, nlm_udpport); + err = create_lockd_listener(serv, "udp", net, family, nlm_udpport, + cred); if (err < 0) return err; - return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport); + return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport, + cred); } /* @@ -222,16 +225,17 @@ static int create_lockd_family(struct svc_serv *serv, struct net *net, * Returns zero if all listeners are available; otherwise a * negative errno value is returned. */ -static int make_socks(struct svc_serv *serv, struct net *net) +static int make_socks(struct svc_serv *serv, struct net *net, + const struct cred *cred) { static int warned; int err; - err = create_lockd_family(serv, net, PF_INET); + err = create_lockd_family(serv, net, PF_INET, cred); if (err < 0) goto out_err; - err = create_lockd_family(serv, net, PF_INET6); + err = create_lockd_family(serv, net, PF_INET6, cred); if (err < 0 && err != -EAFNOSUPPORT) goto out_err; @@ -246,7 +250,8 @@ out_err: return err; } -static int lockd_up_net(struct svc_serv *serv, struct net *net) +static int lockd_up_net(struct svc_serv *serv, struct net *net, + const struct cred *cred) { struct lockd_net *ln = net_generic(net, lockd_net_id); int error; @@ -258,7 +263,7 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net) if (error) goto err_bind; - error = make_socks(serv, net); + error = make_socks(serv, net, cred); if (error < 0) goto err_bind; set_grace_period(net); @@ -461,7 +466,7 @@ static struct svc_serv *lockd_create_svc(void) /* * Bring up the lockd process if it's not already up. */ -int lockd_up(struct net *net) +int lockd_up(struct net *net, const struct cred *cred) { struct svc_serv *serv; int error; @@ -474,7 +479,7 @@ int lockd_up(struct net *net) goto err_create; } - error = lockd_up_net(serv, net); + error = lockd_up_net(serv, net, cred); if (error < 0) { lockd_unregister_notifiers(); goto err_put; @@ -807,5 +812,7 @@ static struct svc_program nlmsvc_program = { .pg_name = "lockd", /* service name */ .pg_class = "nfsd", /* share authentication with nfsd */ .pg_stats = &nlmsvc_stats, /* stats table */ - .pg_authenticate = &lockd_authenticate /* export authentication */ + .pg_authenticate = &lockd_authenticate, /* export authentication */ + .pg_init_request = svc_generic_init_request, + .pg_rpcbind_set = svc_generic_rpcbind_set, }; diff --git a/fs/locks.c b/fs/locks.c index d7c05dde4ed8..8af49f89ac2f 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -352,6 +352,12 @@ EXPORT_SYMBOL_GPL(locks_alloc_lock); void locks_release_private(struct file_lock *fl) { + BUG_ON(waitqueue_active(&fl->fl_wait)); + BUG_ON(!list_empty(&fl->fl_list)); + BUG_ON(!list_empty(&fl->fl_blocked_requests)); + BUG_ON(!list_empty(&fl->fl_blocked_member)); + BUG_ON(!hlist_unhashed(&fl->fl_link)); + if (fl->fl_ops) { if (fl->fl_ops->fl_release_private) fl->fl_ops->fl_release_private(fl); @@ -371,12 +377,6 @@ EXPORT_SYMBOL_GPL(locks_release_private); /* Free a lock which is not in use. */ void locks_free_lock(struct file_lock *fl) { - BUG_ON(waitqueue_active(&fl->fl_wait)); - BUG_ON(!list_empty(&fl->fl_list)); - BUG_ON(!list_empty(&fl->fl_blocked_requests)); - BUG_ON(!list_empty(&fl->fl_blocked_member)); - BUG_ON(!hlist_unhashed(&fl->fl_link)); - locks_release_private(fl); kmem_cache_free(filelock_cache, fl); } diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 0b602a39dd71..7817ad94a6ba 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -41,11 +41,13 @@ static struct svc_program nfs4_callback_program; static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) { + const struct cred *cred = current_cred(); int ret; struct nfs_net *nn = net_generic(net, nfs_net_id); ret = svc_create_xprt(serv, "tcp", net, PF_INET, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, + cred); if (ret <= 0) goto out_err; nn->nfs_callback_tcpport = ret; @@ -53,7 +55,8 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) nn->nfs_callback_tcpport, PF_INET, net->ns.inum); ret = svc_create_xprt(serv, "tcp", net, PF_INET6, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, + cred); if (ret > 0) { nn->nfs_callback_tcpport6 = ret; dprintk("NFS: Callback listener port = %u (af %u, net %x)\n", @@ -457,4 +460,6 @@ static struct svc_program nfs4_callback_program = { .pg_class = "nfs", /* authentication class */ .pg_stats = &nfs4_callback_stats, .pg_authenticate = nfs_callback_authenticate, + .pg_init_request = svc_generic_init_request, + .pg_rpcbind_set = svc_generic_rpcbind_set, }; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 06233bfa6d73..73a5a5ea2976 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -983,7 +983,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) out_invalidcred: pr_warn_ratelimited("NFS: NFSv4 callback contains invalid cred\n"); - return rpc_autherr_badcred; + return svc_return_autherr(rqstp, rpc_autherr_badcred); } /* diff --git a/fs/nfs/client.c b/fs/nfs/client.c index da74c4c4a244..3d04cb0b839e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -558,6 +558,7 @@ static int nfs_start_lockd(struct nfs_server *server) 1 : 0, .net = clp->cl_net, .nlmclnt_ops = clp->cl_nfs_mod->rpc_ops->nlmclnt_ops, + .cred = current_cred(), }; if (nlm_init.nfs_version > 3) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index a7d3df85736d..e6a700f01452 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -22,7 +22,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, char *ip_addr = NULL; int ip_len; - ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL); + ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL, false); if (ip_len > 0) ret = rpc_pton(net, ip_addr, ip_len, sa, salen); else diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 802993d8912f..baa01956a5b3 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -570,13 +570,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) err = get_int(&mesg, &an_int); if (err) goto out3; - exp.ex_anon_uid= make_kuid(&init_user_ns, an_int); + exp.ex_anon_uid= make_kuid(current_user_ns(), an_int); /* anon gid */ err = get_int(&mesg, &an_int); if (err) goto out3; - exp.ex_anon_gid= make_kgid(&init_user_ns, an_int); + exp.ex_anon_gid= make_kgid(current_user_ns(), an_int); /* fsid */ err = get_int(&mesg, &an_int); @@ -1170,15 +1170,17 @@ static void show_secinfo(struct seq_file *m, struct svc_export *exp) static void exp_flags(struct seq_file *m, int flag, int fsid, kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fsloc) { + struct user_namespace *userns = m->file->f_cred->user_ns; + show_expflags(m, flag, NFSEXP_ALLFLAGS); if (flag & NFSEXP_FSID) seq_printf(m, ",fsid=%d", fsid); - if (!uid_eq(anonu, make_kuid(&init_user_ns, (uid_t)-2)) && - !uid_eq(anonu, make_kuid(&init_user_ns, 0x10000-2))) - seq_printf(m, ",anonuid=%u", from_kuid(&init_user_ns, anonu)); - if (!gid_eq(anong, make_kgid(&init_user_ns, (gid_t)-2)) && - !gid_eq(anong, make_kgid(&init_user_ns, 0x10000-2))) - seq_printf(m, ",anongid=%u", from_kgid(&init_user_ns, anong)); + if (!uid_eq(anonu, make_kuid(userns, (uid_t)-2)) && + !uid_eq(anonu, make_kuid(userns, 0x10000-2))) + seq_printf(m, ",anonuid=%u", from_kuid_munged(userns, anonu)); + if (!gid_eq(anong, make_kgid(userns, (gid_t)-2)) && + !gid_eq(anong, make_kgid(userns, 0x10000-2))) + seq_printf(m, ",anongid=%u", from_kgid_munged(userns, anong)); if (fsloc && fsloc->locations_count > 0) { char *loctype = (fsloc->migrated) ? "refer" : "replicas"; int i; diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 32cb8c027483..789abc4dd1d2 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -104,6 +104,9 @@ struct nfsd_net { time_t nfsd4_grace; bool somebody_reclaimed; + bool track_reclaim_completes; + atomic_t nr_reclaim_complete; + bool nfsd_net_up; bool lockd_up; @@ -131,10 +134,18 @@ struct nfsd_net { u32 s2s_cp_cl_id; struct idr s2s_cp_stateids; spinlock_t s2s_cp_lock; + + /* + * Version information + */ + bool *nfsd_versions; + bool *nfsd4_minorversions; }; /* Simple check to find out if a given net was properly initialized */ #define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl) +extern void nfsd_netns_free_versions(struct nfsd_net *nn); + extern unsigned int nfsd_net_id; #endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 8d789124ed3c..fcf31822c74c 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -96,7 +96,7 @@ decode_filename(__be32 *p, char **namp, unsigned int *lenp) } static __be32 * -decode_sattr3(__be32 *p, struct iattr *iap) +decode_sattr3(__be32 *p, struct iattr *iap, struct user_namespace *userns) { u32 tmp; @@ -107,12 +107,12 @@ decode_sattr3(__be32 *p, struct iattr *iap) iap->ia_mode = ntohl(*p++); } if (*p++) { - iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++)); + iap->ia_uid = make_kuid(userns, ntohl(*p++)); if (uid_valid(iap->ia_uid)) iap->ia_valid |= ATTR_UID; } if (*p++) { - iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++)); + iap->ia_gid = make_kgid(userns, ntohl(*p++)); if (gid_valid(iap->ia_gid)) iap->ia_valid |= ATTR_GID; } @@ -165,12 +165,13 @@ static __be32 * encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { + struct user_namespace *userns = nfsd_user_namespace(rqstp); struct timespec ts; *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); *p++ = htonl((u32) (stat->mode & S_IALLUGO)); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid)); - *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); + *p++ = htonl((u32) from_kuid_munged(userns, stat->uid)); + *p++ = htonl((u32) from_kgid_munged(userns, stat->gid)); if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { @@ -325,7 +326,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) p = decode_fh(p, &args->fh); if (!p) return 0; - p = decode_sattr3(p, &args->attrs); + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); if ((args->check_guard = ntohl(*p++)) != 0) { struct timespec time; @@ -455,7 +456,7 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) switch (args->createmode = ntohl(*p++)) { case NFS3_CREATE_UNCHECKED: case NFS3_CREATE_GUARDED: - p = decode_sattr3(p, &args->attrs); + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); break; case NFS3_CREATE_EXCLUSIVE: args->verf = p; @@ -476,7 +477,7 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p) if (!(p = decode_fh(p, &args->fh)) || !(p = decode_filename(p, &args->name, &args->len))) return 0; - p = decode_sattr3(p, &args->attrs); + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); return xdr_argsize_check(rqstp, p); } @@ -491,7 +492,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) if (!(p = decode_fh(p, &args->ffh)) || !(p = decode_filename(p, &args->fname, &args->flen))) return 0; - p = decode_sattr3(p, &args->attrs); + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); args->tlen = ntohl(*p++); @@ -519,7 +520,7 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p) if (args->ftype == NF3BLK || args->ftype == NF3CHR || args->ftype == NF3SOCK || args->ftype == NF3FIFO) - p = decode_sattr3(p, &args->attrs); + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); if (args->ftype == NF3BLK || args->ftype == NF3CHR) { args->major = ntohl(*p++); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 9b93e7a9a26d..397eb7820929 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1123,10 +1123,11 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) rpc_restart_call_prepare(task); return; case 1: - break; - case -1: - /* Network partition? */ - nfsd4_mark_cb_down(clp, task->tk_status); + switch (task->tk_status) { + case -EIO: + case -ETIMEDOUT: + nfsd4_mark_cb_down(clp, task->tk_status); + } break; default: BUG(); diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index bf137fec33ff..2961016097ac 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -634,7 +634,7 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, return nfserr_inval; status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id); - *uid = make_kuid(&init_user_ns, id); + *uid = make_kuid(nfsd_user_namespace(rqstp), id); if (!uid_valid(*uid)) status = nfserr_badowner; return status; @@ -651,7 +651,7 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, return nfserr_inval; status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id); - *gid = make_kgid(&init_user_ns, id); + *gid = make_kgid(nfsd_user_namespace(rqstp), id); if (!gid_valid(*gid)) status = nfserr_badowner; return status; @@ -660,13 +660,13 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, __be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp, kuid_t uid) { - u32 id = from_kuid(&init_user_ns, uid); + u32 id = from_kuid_munged(nfsd_user_namespace(rqstp), uid); return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id); } __be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp, kgid_t gid) { - u32 id = from_kgid(&init_user_ns, gid); + u32 id = from_kgid_munged(nfsd_user_namespace(rqstp), gid); return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id); } diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 44517fb5c0de..a79e24b79095 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -693,7 +693,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) ops->fence_client(ls); else nfsd4_cb_layout_fail(ls); - return -1; + return 1; case -NFS4ERR_NOMATCHING_LAYOUT: trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid); task->tk_status = 0; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4680ad3bf55b..8beda999e134 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1927,6 +1927,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) struct nfsd4_compound_state *cstate = &resp->cstate; struct svc_fh *current_fh = &cstate->current_fh; struct svc_fh *save_fh = &cstate->save_fh; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); __be32 status; svcxdr_init_encode(rqstp, resp); @@ -1949,7 +1950,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) * According to RFC3010, this takes precedence over all other errors. */ status = nfserr_minor_vers_mismatch; - if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0) + if (nfsd_minorversion(nn, args->minorversion, NFSD_TEST) <= 0) goto out; status = nfserr_resource; if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 8c8563441208..87679557d0d6 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -169,12 +169,33 @@ legacy_recdir_name_error(struct nfs4_client *clp, int error) } static void +__nfsd4_create_reclaim_record_grace(struct nfs4_client *clp, + const char *dname, int len, struct nfsd_net *nn) +{ + struct xdr_netobj name; + struct nfs4_client_reclaim *crp; + + name.data = kmemdup(dname, len, GFP_KERNEL); + if (!name.data) { + dprintk("%s: failed to allocate memory for name.data!\n", + __func__); + return; + } + name.len = len; + crp = nfs4_client_to_reclaim(name, nn); + if (!crp) { + kfree(name.data); + return; + } + crp->cr_clp = clp; +} + +static void nfsd4_create_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; char dname[HEXDIR_LEN]; struct dentry *dir, *dentry; - struct nfs4_client_reclaim *crp; int status; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); @@ -220,11 +241,9 @@ out_put: out_unlock: inode_unlock(d_inode(dir)); if (status == 0) { - if (nn->in_grace) { - crp = nfs4_client_to_reclaim(dname, nn); - if (crp) - crp->cr_clp = clp; - } + if (nn->in_grace) + __nfsd4_create_reclaim_record_grace(clp, dname, + HEXDIR_LEN, nn); vfs_fsync(nn->rec_file, 0); } else { printk(KERN_ERR "NFSD: failed to write recovery record" @@ -345,10 +364,29 @@ out_unlock: } static void +__nfsd4_remove_reclaim_record_grace(const char *dname, int len, + struct nfsd_net *nn) +{ + struct xdr_netobj name; + struct nfs4_client_reclaim *crp; + + name.data = kmemdup(dname, len, GFP_KERNEL); + if (!name.data) { + dprintk("%s: failed to allocate memory for name.data!\n", + __func__); + return; + } + name.len = len; + crp = nfsd4_find_reclaim_client(name, nn); + kfree(name.data); + if (crp) + nfs4_remove_reclaim_record(crp, nn); +} + +static void nfsd4_remove_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; - struct nfs4_client_reclaim *crp; char dname[HEXDIR_LEN]; int status; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); @@ -373,12 +411,9 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) nfs4_reset_creds(original_cred); if (status == 0) { vfs_fsync(nn->rec_file, 0); - if (nn->in_grace) { - /* remove reclaim record */ - crp = nfsd4_find_reclaim_client(dname, nn); - if (crp) - nfs4_remove_reclaim_record(crp, nn); - } + if (nn->in_grace) + __nfsd4_remove_reclaim_record_grace(dname, + HEXDIR_LEN, nn); } out_drop_write: mnt_drop_write_file(nn->rec_file); @@ -392,14 +427,31 @@ static int purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { int status; + struct xdr_netobj name; - if (nfs4_has_reclaimed_state(child->d_name.name, nn)) + if (child->d_name.len != HEXDIR_LEN - 1) { + printk("%s: illegal name %pd in recovery directory\n", + __func__, child); + /* Keep trying; maybe the others are OK: */ return 0; + } + name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL); + if (!name.data) { + dprintk("%s: failed to allocate memory for name.data!\n", + __func__); + goto out; + } + name.len = HEXDIR_LEN; + if (nfs4_has_reclaimed_state(name, nn)) + goto out_free; status = vfs_rmdir(d_inode(parent), child); if (status) printk("failed to remove client recovery directory %pd\n", child); +out_free: + kfree(name.data); +out: /* Keep trying, success or failure: */ return 0; } @@ -429,13 +481,24 @@ out: static int load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { + struct xdr_netobj name; + if (child->d_name.len != HEXDIR_LEN - 1) { - printk("nfsd4: illegal name %pd in recovery directory\n", - child); + printk("%s: illegal name %pd in recovery directory\n", + __func__, child); /* Keep trying; maybe the others are OK: */ return 0; } - nfs4_client_to_reclaim(child->d_name.name, nn); + name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL); + if (!name.data) { + dprintk("%s: failed to allocate memory for name.data!\n", + __func__); + goto out; + } + name.len = HEXDIR_LEN; + if (!nfs4_client_to_reclaim(name, nn)) + kfree(name.data); +out: return 0; } @@ -564,6 +627,7 @@ nfsd4_legacy_tracking_init(struct net *net) status = nfsd4_load_reboot_recovery_data(net); if (status) goto err; + printk("NFSD: Using legacy client tracking operations.\n"); return 0; err: @@ -615,6 +679,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp) char dname[HEXDIR_LEN]; struct nfs4_client_reclaim *crp; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct xdr_netobj name; /* did we already find that this client is stable? */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) @@ -627,13 +692,22 @@ nfsd4_check_legacy_client(struct nfs4_client *clp) } /* look for it in the reclaim hashtable otherwise */ - crp = nfsd4_find_reclaim_client(dname, nn); + name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL); + if (!name.data) { + dprintk("%s: failed to allocate memory for name.data!\n", + __func__); + goto out_enoent; + } + name.len = HEXDIR_LEN; + crp = nfsd4_find_reclaim_client(name, nn); + kfree(name.data); if (crp) { set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); crp->cr_clp = clp; return 0; } +out_enoent: return -ENOENT; } @@ -656,6 +730,7 @@ struct cld_net { spinlock_t cn_lock; struct list_head cn_list; unsigned int cn_xid; + bool cn_has_legacy; }; struct cld_upcall { @@ -706,6 +781,40 @@ cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) } static ssize_t +__cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg, + struct nfsd_net *nn) +{ + uint8_t cmd; + struct xdr_netobj name; + uint16_t namelen; + struct cld_net *cn = nn->cld_net; + + if (get_user(cmd, &cmsg->cm_cmd)) { + dprintk("%s: error when copying cmd from userspace", __func__); + return -EFAULT; + } + if (cmd == Cld_GraceStart) { + if (get_user(namelen, &cmsg->cm_u.cm_name.cn_len)) + return -EFAULT; + name.data = memdup_user(&cmsg->cm_u.cm_name.cn_id, namelen); + if (IS_ERR_OR_NULL(name.data)) + return -EFAULT; + name.len = namelen; + if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) { + name.len = name.len - 5; + memmove(name.data, name.data + 5, name.len); + cn->cn_has_legacy = true; + } + if (!nfs4_client_to_reclaim(name, nn)) { + kfree(name.data); + return -EFAULT; + } + return sizeof(*cmsg); + } + return -EFAULT; +} + +static ssize_t cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { struct cld_upcall *tmp, *cup; @@ -714,6 +823,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info, nfsd_net_id); struct cld_net *cn = nn->cld_net; + int16_t status; if (mlen != sizeof(*cmsg)) { dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen, @@ -727,13 +837,24 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) return -EFAULT; } + /* + * copy the status so we know whether to remove the upcall from the + * list (for -EINPROGRESS, we just want to make sure the xid is + * valid, not remove the upcall from the list) + */ + if (get_user(status, &cmsg->cm_status)) { + dprintk("%s: error when copying status from userspace", __func__); + return -EFAULT; + } + /* walk the list and find corresponding xid */ cup = NULL; spin_lock(&cn->cn_lock); list_for_each_entry(tmp, &cn->cn_list, cu_list) { if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) { cup = tmp; - list_del_init(&cup->cu_list); + if (status != -EINPROGRESS) + list_del_init(&cup->cu_list); break; } } @@ -745,6 +866,9 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) return -EINVAL; } + if (status == -EINPROGRESS) + return __cld_pipe_inprogress_downcall(cmsg, nn); + if (copy_from_user(&cup->cu_msg, src, mlen) != 0) return -EFAULT; @@ -820,7 +944,7 @@ nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe) /* Initialize rpc_pipefs pipe for communication with client tracking daemon */ static int -nfsd4_init_cld_pipe(struct net *net) +__nfsd4_init_cld_pipe(struct net *net) { int ret; struct dentry *dentry; @@ -851,6 +975,7 @@ nfsd4_init_cld_pipe(struct net *net) } cn->cn_pipe->dentry = dentry; + cn->cn_has_legacy = false; nn->cld_net = cn; return 0; @@ -863,6 +988,17 @@ err: return ret; } +static int +nfsd4_init_cld_pipe(struct net *net) +{ + int status; + + status = __nfsd4_init_cld_pipe(net); + if (!status) + printk("NFSD: Using old nfsdcld client tracking operations.\n"); + return status; +} + static void nfsd4_remove_cld_pipe(struct net *net) { @@ -991,9 +1127,14 @@ out_err: "record from stable storage: %d\n", ret); } -/* Check for presence of a record, and update its timestamp */ +/* + * For older nfsdcld's that do not allow us to "slurp" the clients + * from the tracking database during startup. + * + * Check for presence of a record, and update its timestamp + */ static int -nfsd4_cld_check(struct nfs4_client *clp) +nfsd4_cld_check_v0(struct nfs4_client *clp) { int ret; struct cld_upcall *cup; @@ -1026,8 +1167,84 @@ nfsd4_cld_check(struct nfs4_client *clp) return ret; } +/* + * For newer nfsdcld's that allow us to "slurp" the clients + * from the tracking database during startup. + * + * Check for presence of a record in the reclaim_str_hashtbl + */ +static int +nfsd4_cld_check(struct nfs4_client *clp) +{ + struct nfs4_client_reclaim *crp; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + int status; + char dname[HEXDIR_LEN]; + struct xdr_netobj name; + + /* did we already find that this client is stable? */ + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return 0; + + /* look for it in the reclaim hashtable otherwise */ + crp = nfsd4_find_reclaim_client(clp->cl_name, nn); + if (crp) + goto found; + + if (cn->cn_has_legacy) { + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) + return -ENOENT; + + name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL); + if (!name.data) { + dprintk("%s: failed to allocate memory for name.data!\n", + __func__); + return -ENOENT; + } + name.len = HEXDIR_LEN; + crp = nfsd4_find_reclaim_client(name, nn); + kfree(name.data); + if (crp) + goto found; + + } + return -ENOENT; +found: + crp->cr_clp = clp; + return 0; +} + +static int +nfsd4_cld_grace_start(struct nfsd_net *nn) +{ + int ret; + struct cld_upcall *cup; + struct cld_net *cn = nn->cld_net; + + cup = alloc_cld_upcall(cn); + if (!cup) { + ret = -ENOMEM; + goto out_err; + } + + cup->cu_msg.cm_cmd = Cld_GraceStart; + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) + ret = cup->cu_msg.cm_status; + + free_cld_upcall(cup); +out_err: + if (ret) + dprintk("%s: Unable to get clients from userspace: %d\n", + __func__, ret); + return ret; +} + +/* For older nfsdcld's that need cm_gracetime */ static void -nfsd4_cld_grace_done(struct nfsd_net *nn) +nfsd4_cld_grace_done_v0(struct nfsd_net *nn) { int ret; struct cld_upcall *cup; @@ -1051,11 +1268,149 @@ out_err: printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret); } -static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { +/* + * For newer nfsdcld's that do not need cm_gracetime. We also need to call + * nfs4_release_reclaim() to clear out the reclaim_str_hashtbl. + */ +static void +nfsd4_cld_grace_done(struct nfsd_net *nn) +{ + int ret; + struct cld_upcall *cup; + struct cld_net *cn = nn->cld_net; + + cup = alloc_cld_upcall(cn); + if (!cup) { + ret = -ENOMEM; + goto out_err; + } + + cup->cu_msg.cm_cmd = Cld_GraceDone; + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) + ret = cup->cu_msg.cm_status; + + free_cld_upcall(cup); +out_err: + nfs4_release_reclaim(nn); + if (ret) + printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret); +} + +static int +nfs4_cld_state_init(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int i; + + nn->reclaim_str_hashtbl = kmalloc_array(CLIENT_HASH_SIZE, + sizeof(struct list_head), + GFP_KERNEL); + if (!nn->reclaim_str_hashtbl) + return -ENOMEM; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]); + nn->reclaim_str_hashtbl_size = 0; + nn->track_reclaim_completes = true; + atomic_set(&nn->nr_reclaim_complete, 0); + + return 0; +} + +static void +nfs4_cld_state_shutdown(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nn->track_reclaim_completes = false; + kfree(nn->reclaim_str_hashtbl); +} + +static bool +cld_running(struct nfsd_net *nn) +{ + struct cld_net *cn = nn->cld_net; + struct rpc_pipe *pipe = cn->cn_pipe; + + return pipe->nreaders || pipe->nwriters; +} + +static int +nfsd4_cld_tracking_init(struct net *net) +{ + int status; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + bool running; + int retries = 10; + + status = nfs4_cld_state_init(net); + if (status) + return status; + + status = __nfsd4_init_cld_pipe(net); + if (status) + goto err_shutdown; + + /* + * rpc pipe upcalls take 30 seconds to time out, so we don't want to + * queue an upcall unless we know that nfsdcld is running (because we + * want this to fail fast so that nfsd4_client_tracking_init() can try + * the next client tracking method). nfsdcld should already be running + * before nfsd is started, so the wait here is for nfsdcld to open the + * pipefs file we just created. + */ + while (!(running = cld_running(nn)) && retries--) + msleep(100); + + if (!running) { + status = -ETIMEDOUT; + goto err_remove; + } + + status = nfsd4_cld_grace_start(nn); + if (status) { + if (status == -EOPNOTSUPP) + printk(KERN_WARNING "NFSD: Please upgrade nfsdcld.\n"); + nfs4_release_reclaim(nn); + goto err_remove; + } else + printk("NFSD: Using nfsdcld client tracking operations.\n"); + return 0; + +err_remove: + nfsd4_remove_cld_pipe(net); +err_shutdown: + nfs4_cld_state_shutdown(net); + return status; +} + +static void +nfsd4_cld_tracking_exit(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs4_release_reclaim(nn); + nfsd4_remove_cld_pipe(net); + nfs4_cld_state_shutdown(net); +} + +/* For older nfsdcld's */ +static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = { .init = nfsd4_init_cld_pipe, .exit = nfsd4_remove_cld_pipe, .create = nfsd4_cld_create, .remove = nfsd4_cld_remove, + .check = nfsd4_cld_check_v0, + .grace_done = nfsd4_cld_grace_done_v0, +}; + +/* For newer nfsdcld's */ +static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { + .init = nfsd4_cld_tracking_init, + .exit = nfsd4_cld_tracking_exit, + .create = nfsd4_cld_create, + .remove = nfsd4_cld_remove, .check = nfsd4_cld_check, .grace_done = nfsd4_cld_grace_done, }; @@ -1267,6 +1622,8 @@ nfsd4_umh_cltrack_init(struct net *net) ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL); kfree(grace_start); + if (!ret) + printk("NFSD: Using UMH upcall client tracking operations.\n"); return ret; } @@ -1416,9 +1773,20 @@ nfsd4_client_tracking_init(struct net *net) if (nn->client_tracking_ops) goto do_init; + /* First, try to use nfsdcld */ + nn->client_tracking_ops = &nfsd4_cld_tracking_ops; + status = nn->client_tracking_ops->init(net); + if (!status) + return status; + if (status != -ETIMEDOUT) { + nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v0; + status = nn->client_tracking_ops->init(net); + if (!status) + return status; + } + /* - * First, try a UMH upcall. It should succeed or fail quickly, so - * there's little harm in trying that first. + * Next, try the UMH upcall. */ nn->client_tracking_ops = &nfsd4_umh_tracking_ops; status = nn->client_tracking_ops->init(net); @@ -1426,25 +1794,23 @@ nfsd4_client_tracking_init(struct net *net) return status; /* - * See if the recoverydir exists and is a directory. If it is, - * then use the legacy ops. + * Finally, See if the recoverydir exists and is a directory. + * If it is, then use the legacy ops. */ nn->client_tracking_ops = &nfsd4_legacy_tracking_ops; status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path); if (!status) { status = d_is_dir(path.dentry); path_put(&path); - if (status) - goto do_init; + if (!status) { + status = -EINVAL; + goto out; + } } - /* Finally, try to use nfsdcld */ - nn->client_tracking_ops = &nfsd4_cld_tracking_ops; - printk(KERN_WARNING "NFSD: the nfsdcld client tracking upcall will be " - "removed in 3.10. Please transition to using " - "nfsdcltrack.\n"); do_init: status = nn->client_tracking_ops->init(net); +out: if (status) { printk(KERN_WARNING "NFSD: Unable to initialize client " "recovery tracking! (%d)\n", status); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index eca4a23f93c8..618e66078ee5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -77,6 +77,7 @@ static u64 current_sessionid = 1; /* forward declarations */ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); static void nfs4_free_ol_stateid(struct nfs4_stid *stid); +void nfsd4_end_grace(struct nfsd_net *nn); /* Locking: */ @@ -1067,9 +1068,9 @@ static unsigned int clientid_hashval(u32 id) return id & CLIENT_HASH_MASK; } -static unsigned int clientstr_hashval(const char *name) +static unsigned int clientstr_hashval(struct xdr_netobj name) { - return opaque_hashval(name, 8) & CLIENT_HASH_MASK; + return opaque_hashval(name.data, 8) & CLIENT_HASH_MASK; } /* @@ -1997,6 +1998,22 @@ destroy_client(struct nfs4_client *clp) __destroy_client(clp); } +static void inc_reclaim_complete(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (!nn->track_reclaim_completes) + return; + if (!nfsd4_find_reclaim_client(clp->cl_name, nn)) + return; + if (atomic_inc_return(&nn->nr_reclaim_complete) == + nn->reclaim_str_hashtbl_size) { + printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n", + clp->net->ns.inum); + nfsd4_end_grace(nn); + } +} + static void expire_client(struct nfs4_client *clp) { unhash_client(clp); @@ -2048,11 +2065,6 @@ compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2) return memcmp(o1->data, o2->data, o1->len); } -static int same_name(const char *n1, const char *n2) -{ - return 0 == memcmp(n1, n2, HEXDIR_LEN); -} - static int same_verf(nfs4_verifier *v1, nfs4_verifier *v2) { @@ -3354,6 +3366,7 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, status = nfs_ok; nfsd4_client_record_create(cstate->session->se_client); + inc_reclaim_complete(cstate->session->se_client); out: return status; } @@ -3958,6 +3971,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, switch (task->tk_status) { case 0: return 1; + case -NFS4ERR_DELAY: + rpc_delay(task, 2 * HZ); + return 0; case -EBADHANDLE: case -NFS4ERR_BAD_STATEID: /* @@ -3970,7 +3986,7 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, } /*FALLTHRU*/ default: - return -1; + return 1; } } @@ -4713,7 +4729,6 @@ nfsd4_end_grace(struct nfsd_net *nn) if (nn->grace_ended) return; - dprintk("NFSD: end of grace period\n"); nn->grace_ended = true; /* * If the server goes down again right now, an NFSv4 @@ -4749,6 +4764,10 @@ static bool clients_still_reclaiming(struct nfsd_net *nn) unsigned long double_grace_period_end = nn->boot_time + 2 * nn->nfsd4_lease; + if (nn->track_reclaim_completes && + atomic_read(&nn->nr_reclaim_complete) == + nn->reclaim_str_hashtbl_size) + return false; if (!nn->somebody_reclaimed) return false; nn->somebody_reclaimed = false; @@ -4779,6 +4798,7 @@ nfs4_laundromat(struct nfsd_net *nn) new_timeo = 0; goto out; } + dprintk("NFSD: end of grace period\n"); nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); spin_lock(&nn->client_lock); @@ -6458,7 +6478,7 @@ alloc_reclaim(void) } bool -nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn) +nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn) { struct nfs4_client_reclaim *crp; @@ -6468,20 +6488,24 @@ nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn) /* * failure => all reset bets are off, nfserr_no_grace... + * + * The caller is responsible for freeing name.data if NULL is returned (it + * will be freed in nfs4_remove_reclaim_record in the normal case). */ struct nfs4_client_reclaim * -nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn) +nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn) { unsigned int strhashval; struct nfs4_client_reclaim *crp; - dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name); + dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", name.len, name.data); crp = alloc_reclaim(); if (crp) { strhashval = clientstr_hashval(name); INIT_LIST_HEAD(&crp->cr_strhash); list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]); - memcpy(crp->cr_recdir, name, HEXDIR_LEN); + crp->cr_name.data = name.data; + crp->cr_name.len = name.len; crp->cr_clp = NULL; nn->reclaim_str_hashtbl_size++; } @@ -6492,6 +6516,7 @@ void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn) { list_del(&crp->cr_strhash); + kfree(crp->cr_name.data); kfree(crp); nn->reclaim_str_hashtbl_size--; } @@ -6515,16 +6540,16 @@ nfs4_release_reclaim(struct nfsd_net *nn) /* * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ struct nfs4_client_reclaim * -nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn) +nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn) { unsigned int strhashval; struct nfs4_client_reclaim *crp = NULL; - dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir); + dprintk("NFSD: nfs4_find_reclaim_client for name %.*s\n", name.len, name.data); - strhashval = clientstr_hashval(recdir); + strhashval = clientstr_hashval(name); list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) { - if (same_name(crp->cr_recdir, recdir)) { + if (compare_blob(&crp->cr_name, &name) == 0) { return crp; } } @@ -7262,10 +7287,19 @@ nfs4_state_start_net(struct net *net) return ret; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); + if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0) + goto skip_grace; printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n", nn->nfsd4_grace, net->ns.inum); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); return 0; + +skip_grace: + printk(KERN_INFO "NFSD: no clients to reclaim, skipping NFSv4 grace period (net %x)\n", + net->ns.inum); + queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_lease * HZ); + nfsd4_end_grace(nn); + return 0; } /* initialization to perform when the nfsd service is started: */ diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3de42a729093..52c4f6daa649 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -521,6 +521,7 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) { DECODE_HEAD; + struct user_namespace *userns = nfsd_user_namespace(argp->rqstp); u32 dummy, uid, gid; char *machine_name; int i; @@ -563,8 +564,8 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); if (cbs->flavor == (u32)(-1)) { - kuid_t kuid = make_kuid(&init_user_ns, uid); - kgid_t kgid = make_kgid(&init_user_ns, gid); + kuid_t kuid = make_kuid(userns, uid); + kgid_t kgid = make_kgid(userns, gid); if (uid_valid(kuid) && gid_valid(kgid)) { cbs->uid = kuid; cbs->gid = kgid; @@ -2420,8 +2421,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, __be32 status; int err; struct nfs4_acl *acl = NULL; +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL void *context = NULL; int contextlen; +#endif bool contextsupport = false; struct nfsd4_compoundres *resp = rqstp->rq_resp; u32 minorversion = resp->cstate.minorversion; @@ -2906,12 +2909,14 @@ out_acl: *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA); } +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { status = nfsd4_encode_security_label(xdr, rqstp, context, contextlen); if (status) goto out; } +#endif attrlen = htonl(xdr->buf->len - attrlen_offset - 4); write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index f2feb2d11bae..90972e1fd785 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -439,7 +439,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) return rv; if (newthreads < 0) return -EINVAL; - rv = nfsd_svc(newthreads, net); + rv = nfsd_svc(newthreads, net, file->f_cred); if (rv < 0) return rv; } else @@ -537,14 +537,14 @@ out_free: } static ssize_t -nfsd_print_version_support(char *buf, int remaining, const char *sep, - unsigned vers, int minor) +nfsd_print_version_support(struct nfsd_net *nn, char *buf, int remaining, + const char *sep, unsigned vers, int minor) { const char *format = minor < 0 ? "%s%c%u" : "%s%c%u.%u"; - bool supported = !!nfsd_vers(vers, NFSD_TEST); + bool supported = !!nfsd_vers(nn, vers, NFSD_TEST); if (vers == 4 && minor >= 0 && - !nfsd_minorversion(minor, NFSD_TEST)) + !nfsd_minorversion(nn, minor, NFSD_TEST)) supported = false; if (minor == 0 && supported) /* @@ -599,20 +599,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) switch(num) { case 2: case 3: - nfsd_vers(num, cmd); + nfsd_vers(nn, num, cmd); break; case 4: if (*minorp == '.') { - if (nfsd_minorversion(minor, cmd) < 0) + if (nfsd_minorversion(nn, minor, cmd) < 0) return -EINVAL; - } else if ((cmd == NFSD_SET) != nfsd_vers(num, NFSD_TEST)) { + } else if ((cmd == NFSD_SET) != nfsd_vers(nn, num, NFSD_TEST)) { /* * Either we have +4 and no minors are enabled, * or we have -4 and at least one minor is enabled. * In either case, propagate 'cmd' to all minors. */ minor = 0; - while (nfsd_minorversion(minor, cmd) >= 0) + while (nfsd_minorversion(nn, minor, cmd) >= 0) minor++; } break; @@ -624,7 +624,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) /* If all get turned off, turn them back on, as * having no versions is BAD */ - nfsd_reset_versions(); + nfsd_reset_versions(nn); } /* Now write current state into reply buffer */ @@ -633,12 +633,12 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) { int minor; - if (!nfsd_vers(num, NFSD_AVAIL)) + if (!nfsd_vers(nn, num, NFSD_AVAIL)) continue; minor = -1; do { - len = nfsd_print_version_support(buf, remaining, + len = nfsd_print_version_support(nn, buf, remaining, sep, num, minor); if (len >= remaining) goto out; @@ -717,7 +717,7 @@ static ssize_t __write_ports_names(char *buf, struct net *net) * a socket of a supported family/protocol, and we use it as an * nfsd listener. */ -static ssize_t __write_ports_addfd(char *buf, struct net *net) +static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred *cred) { char *mesg = buf; int fd, err; @@ -736,7 +736,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net) if (err != 0) return err; - err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); + err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); if (err < 0) { nfsd_destroy(net); return err; @@ -751,7 +751,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net) * A transport listener is added by writing it's transport name and * a port number. */ -static ssize_t __write_ports_addxprt(char *buf, struct net *net) +static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred) { char transport[16]; struct svc_xprt *xprt; @@ -769,12 +769,12 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net) return err; err = svc_create_xprt(nn->nfsd_serv, transport, net, - PF_INET, port, SVC_SOCK_ANONYMOUS); + PF_INET, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0) goto out_err; err = svc_create_xprt(nn->nfsd_serv, transport, net, - PF_INET6, port, SVC_SOCK_ANONYMOUS); + PF_INET6, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; @@ -799,10 +799,10 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size, return __write_ports_names(buf, net); if (isdigit(buf[0])) - return __write_ports_addfd(buf, net); + return __write_ports_addfd(buf, net, file->f_cred); if (isalpha(buf[0])) - return __write_ports_addxprt(buf, net); + return __write_ports_addxprt(buf, net, file->f_cred); return -EINVAL; } @@ -1239,9 +1239,12 @@ static __net_init int nfsd_init_net(struct net *net) retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; + nn->nfsd_versions = NULL; + nn->nfsd4_minorversions = NULL; nn->nfsd4_lease = 90; /* default lease time */ nn->nfsd4_grace = 90; nn->somebody_reclaimed = false; + nn->track_reclaim_completes = false; nn->clverifier_counter = prandom_u32(); nn->clientid_counter = prandom_u32(); nn->s2s_cp_cl_id = nn->clientid_counter++; @@ -1260,6 +1263,7 @@ static __net_exit void nfsd_exit_net(struct net *net) { nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); + nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); } static struct pernet_operations nfsd_net_ops = { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 066899929863..24187b5dd638 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -17,6 +17,7 @@ #include <linux/nfs3.h> #include <linux/nfs4.h> #include <linux/sunrpc/svc.h> +#include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/msg_prot.h> #include <uapi/linux/nfsd/debug.h> @@ -73,7 +74,7 @@ extern const struct seq_operations nfs_exports_op; /* * Function prototypes. */ -int nfsd_svc(int nrservs, struct net *net); +int nfsd_svc(int nrservs, struct net *net, const struct cred *cred); int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp); int nfsd_nrthreads(struct net *); @@ -98,10 +99,12 @@ extern const struct svc_version nfsd_acl_version3; #endif #endif +struct nfsd_net; + enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL }; -int nfsd_vers(int vers, enum vers_op change); -int nfsd_minorversion(u32 minorversion, enum vers_op change); -void nfsd_reset_versions(void); +int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change); +int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change); +void nfsd_reset_versions(struct nfsd_net *nn); int nfsd_create_serv(struct net *net); extern int nfsd_max_blksize; @@ -110,6 +113,12 @@ static inline int nfsd_v4client(struct svc_rqst *rq) { return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4; } +static inline struct user_namespace * +nfsd_user_namespace(const struct svc_rqst *rqstp) +{ + const struct cred *cred = rqstp->rq_xprt->xpt_cred; + return cred ? cred->user_ns : &init_user_ns; +} /* * NFSv4 State diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 89cb484f1cfb..18d94ea984ba 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -32,6 +32,24 @@ extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +static int nfsd_acl_rpcbind_set(struct net *, + const struct svc_program *, + u32, int, + unsigned short, + unsigned short); +static __be32 nfsd_acl_init_request(struct svc_rqst *, + const struct svc_program *, + struct svc_process_info *); +#endif +static int nfsd_rpcbind_set(struct net *, + const struct svc_program *, + u32, int, + unsigned short, + unsigned short); +static __be32 nfsd_init_request(struct svc_rqst *, + const struct svc_program *, + struct svc_process_info *); /* * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members @@ -86,6 +104,8 @@ static struct svc_program nfsd_acl_program = { .pg_class = "nfsd", .pg_stats = &nfsd_acl_svcstats, .pg_authenticate = &svc_set_client, + .pg_init_request = nfsd_acl_init_request, + .pg_rpcbind_set = nfsd_acl_rpcbind_set, }; static struct svc_stat nfsd_acl_svcstats = { @@ -105,7 +125,6 @@ static const struct svc_version *nfsd_version[] = { #define NFSD_MINVERS 2 #define NFSD_NRVERS ARRAY_SIZE(nfsd_version) -static const struct svc_version *nfsd_versions[NFSD_NRVERS]; struct svc_program nfsd_program = { #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) @@ -113,77 +132,136 @@ struct svc_program nfsd_program = { #endif .pg_prog = NFS_PROGRAM, /* program number */ .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */ - .pg_vers = nfsd_versions, /* version table */ + .pg_vers = nfsd_version, /* version table */ .pg_name = "nfsd", /* program name */ .pg_class = "nfsd", /* authentication class */ .pg_stats = &nfsd_svcstats, /* version table */ .pg_authenticate = &svc_set_client, /* export authentication */ - + .pg_init_request = nfsd_init_request, + .pg_rpcbind_set = nfsd_rpcbind_set, }; -static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { - [0] = 1, - [1] = 1, - [2] = 1, -}; +static bool +nfsd_support_version(int vers) +{ + if (vers >= NFSD_MINVERS && vers < NFSD_NRVERS) + return nfsd_version[vers] != NULL; + return false; +} + +static bool * +nfsd_alloc_versions(void) +{ + bool *vers = kmalloc_array(NFSD_NRVERS, sizeof(bool), GFP_KERNEL); + unsigned i; + + if (vers) { + /* All compiled versions are enabled by default */ + for (i = 0; i < NFSD_NRVERS; i++) + vers[i] = nfsd_support_version(i); + } + return vers; +} + +static bool * +nfsd_alloc_minorversions(void) +{ + bool *vers = kmalloc_array(NFSD_SUPPORTED_MINOR_VERSION + 1, + sizeof(bool), GFP_KERNEL); + unsigned i; + + if (vers) { + /* All minor versions are enabled by default */ + for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) + vers[i] = nfsd_support_version(4); + } + return vers; +} -int nfsd_vers(int vers, enum vers_op change) +void +nfsd_netns_free_versions(struct nfsd_net *nn) +{ + kfree(nn->nfsd_versions); + kfree(nn->nfsd4_minorversions); + nn->nfsd_versions = NULL; + nn->nfsd4_minorversions = NULL; +} + +static void +nfsd_netns_init_versions(struct nfsd_net *nn) +{ + if (!nn->nfsd_versions) { + nn->nfsd_versions = nfsd_alloc_versions(); + nn->nfsd4_minorversions = nfsd_alloc_minorversions(); + if (!nn->nfsd_versions || !nn->nfsd4_minorversions) + nfsd_netns_free_versions(nn); + } +} + +int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change) { if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) return 0; switch(change) { case NFSD_SET: - nfsd_versions[vers] = nfsd_version[vers]; -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) - if (vers < NFSD_ACL_NRVERS) - nfsd_acl_versions[vers] = nfsd_acl_version[vers]; -#endif + if (nn->nfsd_versions) + nn->nfsd_versions[vers] = nfsd_support_version(vers); break; case NFSD_CLEAR: - nfsd_versions[vers] = NULL; -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) - if (vers < NFSD_ACL_NRVERS) - nfsd_acl_versions[vers] = NULL; -#endif + nfsd_netns_init_versions(nn); + if (nn->nfsd_versions) + nn->nfsd_versions[vers] = false; break; case NFSD_TEST: - return nfsd_versions[vers] != NULL; + if (nn->nfsd_versions) + return nn->nfsd_versions[vers]; + /* Fallthrough */ case NFSD_AVAIL: - return nfsd_version[vers] != NULL; + return nfsd_support_version(vers); } return 0; } static void -nfsd_adjust_nfsd_versions4(void) +nfsd_adjust_nfsd_versions4(struct nfsd_net *nn) { unsigned i; for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) { - if (nfsd_supported_minorversions[i]) + if (nn->nfsd4_minorversions[i]) return; } - nfsd_vers(4, NFSD_CLEAR); + nfsd_vers(nn, 4, NFSD_CLEAR); } -int nfsd_minorversion(u32 minorversion, enum vers_op change) +int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change) { if (minorversion > NFSD_SUPPORTED_MINOR_VERSION && change != NFSD_AVAIL) return -1; + switch(change) { case NFSD_SET: - nfsd_supported_minorversions[minorversion] = true; - nfsd_vers(4, NFSD_SET); + if (nn->nfsd4_minorversions) { + nfsd_vers(nn, 4, NFSD_SET); + nn->nfsd4_minorversions[minorversion] = + nfsd_vers(nn, 4, NFSD_TEST); + } break; case NFSD_CLEAR: - nfsd_supported_minorversions[minorversion] = false; - nfsd_adjust_nfsd_versions4(); + nfsd_netns_init_versions(nn); + if (nn->nfsd4_minorversions) { + nn->nfsd4_minorversions[minorversion] = false; + nfsd_adjust_nfsd_versions4(nn); + } break; case NFSD_TEST: - return nfsd_supported_minorversions[minorversion]; + if (nn->nfsd4_minorversions) + return nn->nfsd4_minorversions[minorversion]; + return nfsd_vers(nn, 4, NFSD_TEST); case NFSD_AVAIL: - return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; + return minorversion <= NFSD_SUPPORTED_MINOR_VERSION && + nfsd_vers(nn, 4, NFSD_AVAIL); } return 0; } @@ -205,7 +283,7 @@ int nfsd_nrthreads(struct net *net) return rv; } -static int nfsd_init_socks(struct net *net) +static int nfsd_init_socks(struct net *net, const struct cred *cred) { int error; struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -214,12 +292,12 @@ static int nfsd_init_socks(struct net *net) return 0; error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS); + SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS); + SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; @@ -265,16 +343,12 @@ static void nfsd_shutdown_generic(void) nfsd_racache_shutdown(); } -static bool nfsd_needs_lockd(void) +static bool nfsd_needs_lockd(struct nfsd_net *nn) { -#if defined(CONFIG_NFSD_V3) - return (nfsd_versions[2] != NULL) || (nfsd_versions[3] != NULL); -#else - return (nfsd_versions[2] != NULL); -#endif + return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST); } -static int nfsd_startup_net(int nrservs, struct net *net) +static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; @@ -285,12 +359,12 @@ static int nfsd_startup_net(int nrservs, struct net *net) ret = nfsd_startup_generic(nrservs); if (ret) return ret; - ret = nfsd_init_socks(net); + ret = nfsd_init_socks(net, cred); if (ret) goto out_socks; - if (nfsd_needs_lockd() && !nn->lockd_up) { - ret = lockd_up(net); + if (nfsd_needs_lockd(nn) && !nn->lockd_up) { + ret = lockd_up(net, cred); if (ret) goto out_socks; nn->lockd_up = 1; @@ -422,20 +496,20 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) nfsd_export_flush(net); } -void nfsd_reset_versions(void) +void nfsd_reset_versions(struct nfsd_net *nn) { int i; for (i = 0; i < NFSD_NRVERS; i++) - if (nfsd_vers(i, NFSD_TEST)) + if (nfsd_vers(nn, i, NFSD_TEST)) return; for (i = 0; i < NFSD_NRVERS; i++) if (i != 4) - nfsd_vers(i, NFSD_SET); + nfsd_vers(nn, i, NFSD_SET); else { int minor = 0; - while (nfsd_minorversion(minor, NFSD_SET) >= 0) + while (nfsd_minorversion(nn, minor, NFSD_SET) >= 0) minor++; } } @@ -503,7 +577,7 @@ int nfsd_create_serv(struct net *net) } if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); - nfsd_reset_versions(); + nfsd_reset_versions(nn); nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, &nfsd_thread_sv_ops); if (nn->nfsd_serv == NULL) @@ -623,7 +697,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) * this is the first time nrservs is nonzero. */ int -nfsd_svc(int nrservs, struct net *net) +nfsd_svc(int nrservs, struct net *net, const struct cred *cred) { int error; bool nfsd_up_before; @@ -645,7 +719,7 @@ nfsd_svc(int nrservs, struct net *net) nfsd_up_before = nn->nfsd_net_up; - error = nfsd_startup_net(nrservs, net); + error = nfsd_startup_net(nrservs, net, cred); if (error) goto out_destroy; error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, @@ -667,6 +741,101 @@ out: return error; } +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +static bool +nfsd_support_acl_version(int vers) +{ + if (vers >= NFSD_ACL_MINVERS && vers < NFSD_ACL_NRVERS) + return nfsd_acl_version[vers] != NULL; + return false; +} + +static int +nfsd_acl_rpcbind_set(struct net *net, const struct svc_program *progp, + u32 version, int family, unsigned short proto, + unsigned short port) +{ + if (!nfsd_support_acl_version(version) || + !nfsd_vers(net_generic(net, nfsd_net_id), version, NFSD_TEST)) + return 0; + return svc_generic_rpcbind_set(net, progp, version, family, + proto, port); +} + +static __be32 +nfsd_acl_init_request(struct svc_rqst *rqstp, + const struct svc_program *progp, + struct svc_process_info *ret) +{ + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + int i; + + if (likely(nfsd_support_acl_version(rqstp->rq_vers) && + nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST))) + return svc_generic_init_request(rqstp, progp, ret); + + ret->mismatch.lovers = NFSD_ACL_NRVERS; + for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) { + if (nfsd_support_acl_version(rqstp->rq_vers) && + nfsd_vers(nn, i, NFSD_TEST)) { + ret->mismatch.lovers = i; + break; + } + } + if (ret->mismatch.lovers == NFSD_ACL_NRVERS) + return rpc_prog_unavail; + ret->mismatch.hivers = NFSD_ACL_MINVERS; + for (i = NFSD_ACL_NRVERS - 1; i >= NFSD_ACL_MINVERS; i--) { + if (nfsd_support_acl_version(rqstp->rq_vers) && + nfsd_vers(nn, i, NFSD_TEST)) { + ret->mismatch.hivers = i; + break; + } + } + return rpc_prog_mismatch; +} +#endif + +static int +nfsd_rpcbind_set(struct net *net, const struct svc_program *progp, + u32 version, int family, unsigned short proto, + unsigned short port) +{ + if (!nfsd_vers(net_generic(net, nfsd_net_id), version, NFSD_TEST)) + return 0; + return svc_generic_rpcbind_set(net, progp, version, family, + proto, port); +} + +static __be32 +nfsd_init_request(struct svc_rqst *rqstp, + const struct svc_program *progp, + struct svc_process_info *ret) +{ + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + int i; + + if (likely(nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST))) + return svc_generic_init_request(rqstp, progp, ret); + + ret->mismatch.lovers = NFSD_NRVERS; + for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) { + if (nfsd_vers(nn, i, NFSD_TEST)) { + ret->mismatch.lovers = i; + break; + } + } + if (ret->mismatch.lovers == NFSD_NRVERS) + return rpc_prog_unavail; + ret->mismatch.hivers = NFSD_MINVERS; + for (i = NFSD_NRVERS - 1; i >= NFSD_MINVERS; i--) { + if (nfsd_vers(nn, i, NFSD_TEST)) { + ret->mismatch.hivers = i; + break; + } + } + return rpc_prog_mismatch; +} /* * This is the NFS server kernel thread diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 6b2e8b73d36e..b51fe515f06f 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -71,7 +71,7 @@ decode_filename(__be32 *p, char **namp, unsigned int *lenp) } static __be32 * -decode_sattr(__be32 *p, struct iattr *iap) +decode_sattr(__be32 *p, struct iattr *iap, struct user_namespace *userns) { u32 tmp, tmp1; @@ -86,12 +86,12 @@ decode_sattr(__be32 *p, struct iattr *iap) iap->ia_mode = tmp; } if ((tmp = ntohl(*p++)) != (u32)-1) { - iap->ia_uid = make_kuid(&init_user_ns, tmp); + iap->ia_uid = make_kuid(userns, tmp); if (uid_valid(iap->ia_uid)) iap->ia_valid |= ATTR_UID; } if ((tmp = ntohl(*p++)) != (u32)-1) { - iap->ia_gid = make_kgid(&init_user_ns, tmp); + iap->ia_gid = make_kgid(userns, tmp); if (gid_valid(iap->ia_gid)) iap->ia_valid |= ATTR_GID; } @@ -129,6 +129,7 @@ static __be32 * encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { + struct user_namespace *userns = nfsd_user_namespace(rqstp); struct dentry *dentry = fhp->fh_dentry; int type; struct timespec64 time; @@ -139,8 +140,8 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, *p++ = htonl(nfs_ftypes[type >> 12]); *p++ = htonl((u32) stat->mode); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid)); - *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); + *p++ = htonl((u32) from_kuid_munged(userns, stat->uid)); + *p++ = htonl((u32) from_kgid_munged(userns, stat->gid)); if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); @@ -216,7 +217,7 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) p = decode_fh(p, &args->fh); if (!p) return 0; - p = decode_sattr(p, &args->attrs); + p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp)); return xdr_argsize_check(rqstp, p); } @@ -319,7 +320,7 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) if ( !(p = decode_fh(p, &args->fh)) || !(p = decode_filename(p, &args->name, &args->len))) return 0; - p = decode_sattr(p, &args->attrs); + p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp)); return xdr_argsize_check(rqstp, p); } @@ -398,7 +399,7 @@ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) return 0; p += xdrlen; } - decode_sattr(p, &args->attrs); + decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp)); return 1; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 9d6cb246c6c5..0b74d371ed67 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -368,7 +368,7 @@ struct nfs4_client { struct nfs4_client_reclaim { struct list_head cr_strhash; /* hash by cr_name */ struct nfs4_client *cr_clp; /* pointer to associated clp */ - char cr_recdir[HEXDIR_LEN]; /* recover dir */ + struct xdr_netobj cr_name; /* recovery dir name */ }; /* A reasonable value for REPLAY_ISIZE was estimated as follows: @@ -620,7 +620,7 @@ void nfs4_put_stid(struct nfs4_stid *s); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); extern void nfs4_release_reclaim(struct nfsd_net *); -extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, +extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn); extern __be32 nfs4_check_open_reclaim(clientid_t *clid, struct nfsd4_compound_state *cstate, struct nfsd_net *nn); @@ -635,9 +635,9 @@ extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); -extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, +extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn); -extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); +extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); struct nfs4_file *find_file(struct knfsd_fh *fh); void put_nfs4_file(struct nfs4_file *fi); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7dc98e14655d..fc24ee47eab5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1786,12 +1786,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, rdentry = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) - goto out_nfserr; + goto out_drop_write; if (d_really_is_negative(rdentry)) { dput(rdentry); - err = nfserr_noent; - goto out; + host_err = -ENOENT; + goto out_drop_write; } if (!type) @@ -1805,6 +1805,8 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, host_err = commit_metadata(fhp); dput(rdentry); +out_drop_write: + fh_drop_write(fhp); out_nfserr: err = nfserrno(host_err); out: diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index a7e107309f76..db351247892d 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -120,8 +120,11 @@ void nfsd_put_raparams(struct file *file, struct raparms *ra); static inline int fh_want_write(struct svc_fh *fh) { - int ret = mnt_want_write(fh->fh_export->ex_path.mnt); + int ret; + if (fh->fh_want_write) + return 0; + ret = mnt_want_write(fh->fh_export->ex_path.mnt); if (!ret) fh->fh_want_write = true; return ret; diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile index ef2854422a6e..3d4041f0431e 100644 --- a/fs/ocfs2/dlm/Makefile +++ b/fs/ocfs2/dlm/Makefile @@ -1,7 +1,6 @@ -ccflags-y := -I$(src)/.. +ccflags-y := -I $(srctree)/$(src)/.. obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o - diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile index 33431a0296a3..0a0b93d940fe 100644 --- a/fs/ocfs2/dlmfs/Makefile +++ b/fs/ocfs2/dlmfs/Makefile @@ -1,4 +1,4 @@ -ccflags-y := -I$(src)/.. +ccflags-y := -I $(srctree)/$(src)/.. obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o diff --git a/fs/unicode/README.utf8data b/fs/unicode/README.utf8data index 9307cf0727de..c73786807d3b 100644 --- a/fs/unicode/README.utf8data +++ b/fs/unicode/README.utf8data @@ -5,29 +5,15 @@ The full set of files can be found here: http://www.unicode.org/Public/12.1.0/ucd/ -Note! - -The URL's listed below are not stable. That's because Unicode 12.1.0 -has not been officially released yet; it is scheduled to be released -on May 8, 2019. We taking Unicode 12.1.0 a few weeks early because it -contains a new Japanese character which is required in order to -specify Japenese dates after May 1, 2019, when Crown Prince Naruhito -ascends to the Chrysanthemum Throne. (Isn't internationalization fun? -The abdication of Emperor Akihito of Japan is requiring dozens of -software packages to be updated with only a month's notice. :-) - -We will update the URL's (and any needed changes to the checksums) -after the final Unicode 12.1.0 is released. - Individual source links: - https://www.unicode.org/Public/12.1.0/ucd/CaseFolding-12.1.0d2.txt - https://www.unicode.org/Public/12.1.0/ucd/DerivedAge-12.1.0d3.txt - https://www.unicode.org/Public/12.1.0/ucd/extracted/DerivedCombiningClass-12.1.0d2.txt - https://www.unicode.org/Public/12.1.0/ucd/DerivedCoreProperties-12.1.0d2.txt - https://www.unicode.org/Public/12.1.0/ucd/NormalizationCorrections-12.1.0d1.txt - https://www.unicode.org/Public/12.1.0/ucd/NormalizationTest-12.1.0d3.txt - https://www.unicode.org/Public/12.1.0/ucd/UnicodeData-12.1.0d2.txt + https://www.unicode.org/Public/12.1.0/ucd/CaseFolding.txt + https://www.unicode.org/Public/12.1.0/ucd/DerivedAge.txt + https://www.unicode.org/Public/12.1.0/ucd/extracted/DerivedCombiningClass.txt + https://www.unicode.org/Public/12.1.0/ucd/DerivedCoreProperties.txt + https://www.unicode.org/Public/12.1.0/ucd/NormalizationCorrections.txt + https://www.unicode.org/Public/12.1.0/ucd/NormalizationTest.txt + https://www.unicode.org/Public/12.1.0/ucd/UnicodeData.txt md5sums (verify by running "md5sum -c README.utf8data"): diff --git a/fs/unicode/utf8-norm.c b/fs/unicode/utf8-norm.c index 20d440c3f2db..801ed6d2ea37 100644 --- a/fs/unicode/utf8-norm.c +++ b/fs/unicode/utf8-norm.c @@ -714,6 +714,8 @@ int utf8byte(struct utf8cursor *u8c) } leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s); + if (!leaf) + return -1; ccc = LEAF_CCC(leaf); } diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 1dfc6df2e2bd..91831975363b 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -4,8 +4,8 @@ # All Rights Reserved. # -ccflags-y += -I$(src) # needed for trace events -ccflags-y += -I$(src)/libxfs +ccflags-y += -I $(srctree)/$(src) # needed for trace events +ccflags-y += -I $(srctree)/$(src)/libxfs ccflags-$(CONFIG_XFS_DEBUG) += -g |