From d2dcd9083f101584e029cbd4f0e1a4e573170d43 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Fri, 9 Mar 2012 06:27:12 +0530 Subject: logfs: destroy the reserved inodes while unmounting We were assuming that the evict_inode() would never be called on reserved inodes. However, (after the commit 8e22c1a4e logfs: get rid of magical inodes) while unmounting the file system, in put_super, we call iput() on all of the reserved inodes. The following simple test used to cause a kernel panic on LogFS: 1. Mount a LogFS file system on /mnt 2. Create a file $ touch /mnt/a 3. Try to unmount the FS $ umount /mnt The simple fix would be to drop the assumption and properly destroy the reserved inodes. Signed-off-by: Prasad Joshi --- fs/logfs/inode.c | 16 ++++++++++++++++ fs/logfs/readwrite.c | 1 - fs/logfs/segment.c | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index a422f42238b2..df093d9e4da1 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -156,10 +156,26 @@ static void __logfs_destroy_inode(struct inode *inode) call_rcu(&inode->i_rcu, logfs_i_callback); } +static void __logfs_destroy_meta_inode(struct inode *inode) +{ + struct logfs_inode *li = logfs_inode(inode); + BUG_ON(li->li_block); + call_rcu(&inode->i_rcu, logfs_i_callback); +} + static void logfs_destroy_inode(struct inode *inode) { struct logfs_inode *li = logfs_inode(inode); + if (inode->i_ino < LOGFS_RESERVED_INOS) { + /* + * The reserved inodes are never destroyed unless we are in + * unmont path. + */ + __logfs_destroy_meta_inode(inode); + return; + } + BUG_ON(list_empty(&li->li_freeing_list)); spin_lock(&logfs_inode_lock); li->li_refcount--; diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index e3ab5e5a904c..c8ea8664699c 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -2189,7 +2189,6 @@ void logfs_evict_inode(struct inode *inode) return; } - BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS); page = inode_to_page(inode); BUG_ON(!page); /* FIXME: Use emergency page */ logfs_put_write_page(page); diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index e28d090c98d6..038da0991794 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -886,7 +886,7 @@ static struct logfs_area *alloc_area(struct super_block *sb) static void map_invalidatepage(struct page *page, unsigned long l) { - BUG(); + return; } static int map_releasepage(struct page *page, gfp_t g) -- cgit v1.2.3 From cd8bfa9c8a13cf3facc5731da17e10188b3795d1 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Mon, 2 Apr 2012 09:23:04 +0530 Subject: logfs: initialize the number of iovecs in bio This fixes the following crash when a LogFS file system, created on a encrypted LVM volume, was mounted [ 526.548034] BUG: unable to handle kernel NULL pointer dereference at [ 526.550106] IP: [] memcpy+0xb/0x120 [ 526.551008] PGD bd60067 PUD 1778d067 PMD 0 [ 526.551783] Oops: 0000 [#1] SMP Pid: 2043, comm: mount RIP: 0010:[] [] memcpy+0xb/0x120 Call Trace: kcryptd_io_read+0xdb/0x100 crypt_map+0xfd/0x190 __map_bio+0x48/0x150 __split_and_process_bio+0x51b/0x630 dm_request+0x138/0x230 generic_make_request+0xca/0x100 submit_bio+0x87/0x110 sync_request+0xdd/0x120 [logfs] bdev_readpage+0x2e/0x70 [logfs] do_read_cache_page+0x82/0x180 logfs_mount+0x2ad/0x770 [logfs] mount_fs+0x47/0x1c0 vfs_kern_mount+0x72/0x110 do_kern_mount+0x54/0x110 do_mount+0x520/0x7f0 sys_mount+0x90/0xe0 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=42292 Reported-by: Witold Baryluk Signed-off-by: Prasad Joshi --- fs/logfs/dev_bdev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index df0de27c2733..ea29df36893d 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -26,6 +26,7 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw) struct completion complete; bio_init(&bio); + bio.bi_max_vecs = 1; bio.bi_io_vec = &bio_vec; bio_vec.bv_page = page; bio_vec.bv_len = PAGE_SIZE; -- cgit v1.2.3 From ddb24bbac3681b87ae0638aacb702d9273e3c025 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Mon, 23 Jul 2012 09:18:14 +0530 Subject: logfs: create a pagecache page if it is not present While writing the partial journal entries we assumed that the page associated with the journal would always in locatable. This incorrect assumption resulted in the following BUG kernel BUG at /home/benixon/WD_SMR/kernels/linux-3.3.7-logfs/fs/logfs/journal.c:569! EIP is at logfs_write_area+0xb6/0x109 [logfs] EAX: 00000000 EBX: 00000000 ECX: ef6efea4 EDX: 00000000 ESI: 001b9000 EDI: f009e000 EBP: c3c13f14 ESP: c3c13ef0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process sync (pid: 1799, ti=c3c12000 task=f07825b0 task.ti=c3c12000) Stack: 01001000 c3c13f26 781b9000 00000000 f009e000 f7286000 f1f83400 f8445071 f1f83400 c3c13f30 f8445ae9 c3c13f20 0000100a 000ee000 f009e000 00000001 c3c13f5c f8445d17 c05eb0ee 00000000 f1f83400 ef718000 f009e25c ea9c3d80 Call Trace: [] ? account_shadow+0x16d/0x16d [logfs] [] logfs_write_je+0x2a/0x44 [logfs] [] logfs_write_anchor+0x114/0x228 [logfs] [] ? empty+0x5/0x5 [] logfs_sync_fs+0x1e/0x31 [logfs] [] __sync_filesystem+0x5d/0x6f [] sync_one_sb+0x15/0x17 [] iterate_supers+0x59/0x9a [] ? __sync_filesystem+0x6f/0x6f [] sys_sync+0x29/0x4f [] sysenter_do_call+0x12/0x28 EIP: [] logfs_write_area+0xb6/0x109 [logfs] SS:ESP 0068:c3c13ef0 ---[ end trace ef6e9ef52601a945 ]--- The fix is to create the pagecache page if it is not locatable. Reported-and-tested-by: Benixon Dhas Signed-off-by: Prasad Joshi --- fs/logfs/journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 1e1c369df22b..2a09b8d73989 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c @@ -565,7 +565,7 @@ static void write_wbuf(struct super_block *sb, struct logfs_area *area, index = ofs >> PAGE_SHIFT; page_ofs = ofs & (PAGE_SIZE - 1); - page = find_lock_page(mapping, index); + page = find_or_create_page(mapping, index, GFP_NOFS); BUG_ON(!page); memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize); unlock_page(page); -- cgit v1.2.3 From 41b93bc1ee7e7276db698dd66afa7b740cda517a Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Mon, 23 Jul 2012 09:35:52 +0530 Subject: logfs: maintain the ordering of meta-inode destruction LogFS does not use a specialized area to maintain the inodes. The inodes information is kept in a specialized file called inode file. Similarly, the segment information is kept in a segment file. Since the segment file also has an inode which is kept in the inode file, the inode for segment file must be evicted before the inode for inode file. The change fixes the following BUG during unmount Pid: 2057, comm: umount Not tainted 3.5.0-rc6+ #25 Bochs Bochs RIP: 0010:[] [] move_page_to_btree+0x32/0x1f0 [logfs] Process umount (pid: 2057, threadinfo ...) Call Trace: [] ? find_get_pages+0x2a/0x180 [] logfs_invalidatepage+0x85/0x90 [logfs] [] truncate_inode_page+0xb1/0xd0 [] truncate_inode_pages_range+0x15f/0x490 [] ? printk+0x78/0x7a [] truncate_inode_pages+0x15/0x20 [] logfs_evict_inode+0x6c/0x190 [logfs] [] ? _raw_spin_unlock+0x2b/0x40 [] evict+0xa7/0x1b0 [] dispose_list+0x3e/0x60 [] evict_inodes+0xf4/0x110 [] generic_shutdown_super+0x53/0xf0 [] logfs_kill_sb+0x52/0xf0 [logfs] [] deactivate_locked_super+0x45/0x80 [] deactivate_super+0x4a/0x70 [] mntput_no_expire+0xde/0x140 [] sys_umount+0x6f/0x3a0 [] system_call_fastpath+0x16/0x1b ---[ end trace 45f7752082cefafd ]--- Signed-off-by: Prasad Joshi --- fs/logfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index df093d9e4da1..6984562738d3 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -389,8 +389,8 @@ static void logfs_put_super(struct super_block *sb) { struct logfs_super *super = logfs_super(sb); /* kill the meta-inodes */ - iput(super->s_master_inode); iput(super->s_segfile_inode); + iput(super->s_master_inode); iput(super->s_mapping_inode); } -- cgit v1.2.3 From 9f0bbd8ca7905fcc0602c038013b095322fec939 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Mon, 23 Jul 2012 10:32:11 +0530 Subject: logfs: query block device for number of pages to send with bio The block device driver puts a limit on maximum number of pages that can be sent with the bio. Not all block devices can handle BIO_MAX_PAGES number of pages in bio. Specifically the virtio-blk diriver limits it to 126. When the LogFS file system was excersized in KVM, the following bug from do_virtblk_request() was observed static void do_virtblk_request(struct request_queue *q) { .... .... while ((req = blk_peek_request(q)) != NULL) { BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); .... .... } .... } The patch fixes the problem by querring the maximum number of pages in bio allowed from block device driver and then using those many pages during submit_bio. Signed-off-by: Prasad Joshi --- fs/logfs/dev_bdev.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index ea29df36893d..e784a217b500 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -96,12 +96,11 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, struct address_space *mapping = super->s_mapping_inode->i_mapping; struct bio *bio; struct page *page; - struct request_queue *q = bdev_get_queue(sb->s_bdev); - unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); + unsigned int max_pages; int i; - if (max_pages > BIO_MAX_PAGES) - max_pages = BIO_MAX_PAGES; + max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev)); + bio = bio_alloc(GFP_NOFS, max_pages); BUG_ON(!bio); @@ -191,12 +190,11 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, { struct logfs_super *super = logfs_super(sb); struct bio *bio; - struct request_queue *q = bdev_get_queue(sb->s_bdev); - unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); + unsigned int max_pages; int i; - if (max_pages > BIO_MAX_PAGES) - max_pages = BIO_MAX_PAGES; + max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev)); + bio = bio_alloc(GFP_NOFS, max_pages); BUG_ON(!bio); -- cgit v1.2.3 From 53362a05ae683e12a20d9ffdf58a88094a0bed9d Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Thu, 2 Aug 2012 09:50:39 +0200 Subject: fs/block-dev.c:fix performance regression in O_DIRECT writes to md block devices For regular file, write operaion used blk_plug function.But for block file,write operation did not use blk_plug. This patch is also for write-cache mode for block-device. Signed-off-by: Jianpeng Ma Reviewed-by: NeilBrown Signed-off-by: Jens Axboe --- fs/block_dev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 1e519195d45b..38e721b35d45 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1578,10 +1578,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; + struct blk_plug plug; ssize_t ret; BUG_ON(iocb->ki_pos != pos); + blk_start_plug(&plug); ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); if (ret > 0 || ret == -EIOCBQUEUED) { ssize_t err; @@ -1590,6 +1592,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0 && ret > 0) ret = err; } + blk_finish_plug(&plug); return ret; } EXPORT_SYMBOL_GPL(blkdev_aio_write); -- cgit v1.2.3 From 3dd4765fce04c0b4af1e0bc4c0b10f906f95fabc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 2 Aug 2012 14:30:56 -0400 Subject: nfs: tear down caches in nfs_init_writepagecache when allocation fails ...and ensure that we tear down the nfs_commit_data cache too when unloading the module. Cc: Bryan Schumaker Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5829d0ce7cfb..e3b55372726c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1814,19 +1814,19 @@ int __init nfs_init_writepagecache(void) nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, nfs_wdata_cachep); if (nfs_wdata_mempool == NULL) - return -ENOMEM; + goto out_destroy_write_cache; nfs_cdata_cachep = kmem_cache_create("nfs_commit_data", sizeof(struct nfs_commit_data), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_cdata_cachep == NULL) - return -ENOMEM; + goto out_destroy_write_mempool; nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, nfs_wdata_cachep); if (nfs_commit_mempool == NULL) - return -ENOMEM; + goto out_destroy_commit_cache; /* * NFS congestion size, scale with available memory. @@ -1849,11 +1849,20 @@ int __init nfs_init_writepagecache(void) nfs_congestion_kb = 256*1024; return 0; + +out_destroy_commit_cache: + kmem_cache_destroy(nfs_cdata_cachep); +out_destroy_write_mempool: + mempool_destroy(nfs_wdata_mempool); +out_destroy_write_cache: + kmem_cache_destroy(nfs_wdata_cachep); + return -ENOMEM; } void nfs_destroy_writepagecache(void) { mempool_destroy(nfs_commit_mempool); + kmem_cache_destroy(nfs_cdata_cachep); mempool_destroy(nfs_wdata_mempool); kmem_cache_destroy(nfs_wdata_cachep); } -- cgit v1.2.3 From 8554116e17eef055d9dd58a94b3427cb2ad1c317 Mon Sep 17 00:00:00 2001 From: Idan Kedar Date: Thu, 2 Aug 2012 11:47:10 +0300 Subject: pnfs: defer release of pages in layoutget we have encountered a bug whereby reading a lot of files (copying fedora's /bin) from a pNFS mount and hitting Ctrl+C in the middle caused a general protection fault in xdr_shrink_bufhead. this function is called when decoding the response from LAYOUTGET. the decoding is done by a worker thread, and the caller of LAYOUTGET waits for the worker thread to complete. hitting Ctrl+C caused the synchronous wait to end and the next thing the caller does is to free the pages, so when the worker thread calls xdr_shrink_bufhead, the pages are gone. therefore, the cleanup of these pages has been moved to nfs4_layoutget_release. Signed-off-by: Idan Kedar Signed-off-by: Benny Halevy Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/pnfs.c | 39 +------------------------------------ fs/nfs/pnfs.h | 2 +- 3 files changed, 58 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a99a8d948721..6a78d49da5c1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6223,11 +6223,58 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) dprintk("<-- %s\n", __func__); } +static size_t max_response_pages(struct nfs_server *server) +{ + u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; + return nfs_page_array_len(0, max_resp_sz); +} + +static void nfs4_free_pages(struct page **pages, size_t size) +{ + int i; + + if (!pages) + return; + + for (i = 0; i < size; i++) { + if (!pages[i]) + break; + __free_page(pages[i]); + } + kfree(pages); +} + +static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) +{ + struct page **pages; + int i; + + pages = kcalloc(size, sizeof(struct page *), gfp_flags); + if (!pages) { + dprintk("%s: can't alloc array of %zu pages\n", __func__, size); + return NULL; + } + + for (i = 0; i < size; i++) { + pages[i] = alloc_page(gfp_flags); + if (!pages[i]) { + dprintk("%s: failed to allocate page\n", __func__); + nfs4_free_pages(pages, size); + return NULL; + } + } + + return pages; +} + static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; + struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); dprintk("--> %s\n", __func__); + nfs4_free_pages(lgp->args.layout.pages, max_pages); put_nfs_open_context(lgp->args.ctx); kfree(calldata); dprintk("<-- %s\n", __func__); @@ -6239,9 +6286,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { .rpc_release = nfs4_layoutget_release, }; -int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) +int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], @@ -6259,6 +6307,13 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) dprintk("--> %s\n", __func__); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); + if (!lgp->args.layout.pages) { + nfs4_layoutget_release(lgp); + return -ENOMEM; + } + lgp->args.layout.pglen = max_pages * PAGE_SIZE; + lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 76875bfcf19c..2e00feacd4be 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -583,9 +583,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_server *server = NFS_SERVER(ino); struct nfs4_layoutget *lgp; struct pnfs_layout_segment *lseg = NULL; - struct page **pages = NULL; - int i; - u32 max_resp_sz, max_pages; dprintk("--> %s\n", __func__); @@ -594,20 +591,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, if (lgp == NULL) return NULL; - /* allocate pages for xdr post processing */ - max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - max_pages = nfs_page_array_len(0, max_resp_sz); - - pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); - if (!pages) - goto out_err_free; - - for (i = 0; i < max_pages; i++) { - pages[i] = alloc_page(gfp_flags); - if (!pages[i]) - goto out_err_free; - } - lgp->args.minlength = PAGE_CACHE_SIZE; if (lgp->args.minlength > range->length) lgp->args.minlength = range->length; @@ -616,39 +599,19 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); - lgp->args.layout.pages = pages; - lgp->args.layout.pglen = max_pages * PAGE_SIZE; lgp->lsegpp = &lseg; lgp->gfp_flags = gfp_flags; /* Synchronously retrieve layout information from server and * store in lseg. */ - nfs4_proc_layoutget(lgp); + nfs4_proc_layoutget(lgp, gfp_flags); if (!lseg) { /* remember that LAYOUTGET failed and suspend trying */ set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); } - /* free xdr pages */ - for (i = 0; i < max_pages; i++) - __free_page(pages[i]); - kfree(pages); - return lseg; - -out_err_free: - /* free any allocated xdr pages, lgp as it's not used */ - if (pages) { - for (i = 0; i < max_pages; i++) { - if (!pages[i]) - break; - __free_page(pages[i]); - } - kfree(pages); - } - kfree(lgp); - return NULL; } /* diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2c6c80503ba4..5ea019e80b4c 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); -extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); +extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ -- cgit v1.2.3 From 21d1f58aedc5f7ac4bb0c4e3d78c74ea31ac050f Mon Sep 17 00:00:00 2001 From: Idan Kedar Date: Thu, 2 Aug 2012 11:47:11 +0300 Subject: pnfs: nfs4_proc_layoutget returns void since the only user of nfs4_proc_layoutget is send_layoutget, which ignores its return value, there is no reason to return any value. Signed-off-by: Idan Kedar Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++---- fs/nfs/pnfs.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6a78d49da5c1..f94f6b3928fc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6286,7 +6286,7 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { .rpc_release = nfs4_layoutget_release, }; -int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) +void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { struct nfs_server *server = NFS_SERVER(lgp->args.inode); size_t max_pages = max_response_pages(server); @@ -6310,7 +6310,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); - return -ENOMEM; + return; } lgp->args.layout.pglen = max_pages * PAGE_SIZE; @@ -6319,7 +6319,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) - return PTR_ERR(task); + return; status = nfs4_wait_for_completion_rpc_task(task); if (status == 0) status = task->tk_status; @@ -6327,7 +6327,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) status = pnfs_layout_process(lgp); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); - return status; + return; } static void diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 5ea019e80b4c..745aa1b39e7c 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); -extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); +extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ -- cgit v1.2.3 From f6166384095b7ecf77752b5e9096e6d03d75f7ae Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 2 Aug 2012 15:36:09 +0300 Subject: NFS41: add pg_layout_private to nfs_pageio_descriptor To allow layout driver to pass private information around pg_init/pg_doio. Signed-off-by: Peng Tao Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 ++ include/linux/nfs_page.h | 1 + include/linux/nfs_xdr.h | 1 + 3 files changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 1a6732ed04a4..311a79681e2b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, hdr->io_start = req_offset(hdr->req); hdr->good_bytes = desc->pg_count; hdr->dreq = desc->pg_dreq; + hdr->layout_private = desc->pg_layout_private; hdr->release = release; hdr->completion_ops = desc->pg_completion_ops; if (hdr->completion_ops->init_hdr) @@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_error = 0; desc->pg_lseg = NULL; desc->pg_dreq = NULL; + desc->pg_layout_private = NULL; } EXPORT_SYMBOL_GPL(nfs_pageio_init); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 880805774f9f..92ce5783b707 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -69,6 +69,7 @@ struct nfs_pageio_descriptor { const struct nfs_pgio_completion_ops *pg_completion_ops; struct pnfs_layout_segment *pg_lseg; struct nfs_direct_req *pg_dreq; + void *pg_layout_private; }; #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 00485e084394..ac7c8ae254f2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1248,6 +1248,7 @@ struct nfs_pgio_header { void (*release) (struct nfs_pgio_header *hdr); const struct nfs_pgio_completion_ops *completion_ops; struct nfs_direct_req *dreq; + void *layout_private; spinlock_t lock; /* fields protected by lock */ int pnfs_error; -- cgit v1.2.3 From 7de6e28417c65919cf2c1621841a650c4a3afbbd Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 2 Aug 2012 15:38:23 +0300 Subject: pnfs-obj: Better IO pattern in case of unaligned offset Depending on layout and ARCH, ORE has some limits on max IO sizes which is communicated on (what else) ore_layout->max_io_length, which is always stripe aligned. This was considered as the pg_test boundary for splitting and starting a new IO. But in the case of a long IO where the start offset is not aligned what would happen is that both end of IO[N] and start of IO[N+1] would be unaligned, causing each IO boundary parity unit to be calculated and written twice. So what we do in this patch is split the very start of an unaligned IO, up to a stripe boundary, and then next IO's can continue fully aligned til the end. We might be sacrificing the case where the full unaligned IO would fit within a single max_io_length, but the sacrifice is well worth the elimination of double calculation and parity units IO. Actually the sacrificing is marginal and is almost unmeasurable. TODO: If we know the total expected linear segment that will be received, at pg_init, we could use that information in many places: 1. blocks-layout get_layout write segment size 2. Better mds-threshold 3. In above situation for a better clean split I will do this in future submission. Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 55 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index f50d3e8d6f22..ea6d111b03e9 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -570,17 +570,66 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, return false; return pgio->pg_count + req->wb_bytes <= - OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; + (unsigned long)pgio->pg_layout_private; +} + +void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + pnfs_generic_pg_init_read(pgio, req); + if (unlikely(pgio->pg_lseg == NULL)) + return; /* Not pNFS */ + + pgio->pg_layout_private = (void *) + OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; +} + +static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, + unsigned long *stripe_end) +{ + u32 stripe_off; + unsigned stripe_size; + + if (layout->raid_algorithm == PNFS_OSD_RAID_0) + return true; + + stripe_size = layout->stripe_unit * + (layout->group_width - layout->parity); + + div_u64_rem(offset, stripe_size, &stripe_off); + if (!stripe_off) + return true; + + *stripe_end = stripe_size - stripe_off; + return false; +} + +void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + unsigned long stripe_end = 0; + + pnfs_generic_pg_init_write(pgio, req); + if (unlikely(pgio->pg_lseg == NULL)) + return; /* Not pNFS */ + + if (req->wb_offset || + !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE, + &OBJIO_LSEG(pgio->pg_lseg)->layout, + &stripe_end)) { + pgio->pg_layout_private = (void *)stripe_end; + } else { + pgio->pg_layout_private = (void *) + OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; + } } static const struct nfs_pageio_ops objio_pg_read_ops = { - .pg_init = pnfs_generic_pg_init_read, + .pg_init = objio_init_read, .pg_test = objio_pg_test, .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops objio_pg_write_ops = { - .pg_init = pnfs_generic_pg_init_write, + .pg_init = objio_init_write, .pg_test = objio_pg_test, .pg_doio = pnfs_generic_pg_writepages, }; -- cgit v1.2.3 From 81abe27b10af98f861c955be63da700938dd59c1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 3 Aug 2012 09:38:08 -0700 Subject: userns: Fix link restrictions to use uid_eq Signed-off-by: "Eric W. Biederman" --- fs/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 1b464390dde8..05480a64d7b7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -678,7 +678,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd) /* Allowed if owner and follower match. */ inode = link->dentry->d_inode; - if (current_cred()->fsuid == inode->i_uid) + if (uid_eq(current_cred()->fsuid, inode->i_uid)) return 0; /* Allowed if parent directory not sticky and world-writable. */ @@ -687,7 +687,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd) return 0; /* Allowed if parent directory and link owner match. */ - if (parent->i_uid == inode->i_uid) + if (uid_eq(parent->i_uid, inode->i_uid)) return 0; path_put_conditional(link, nd); @@ -757,7 +757,7 @@ static int may_linkat(struct path *link) /* Source inode owner (or CAP_FOWNER) can hardlink all they like, * otherwise, it must be a safe source. */ - if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) || + if (uid_eq(cred->fsuid, inode->i_uid) || safe_hardlink_source(inode) || capable(CAP_FOWNER)) return 0; -- cgit v1.2.3 From a384f6411734e763daa4bae30e8ff170d7d4c3e2 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 19 Jul 2012 15:47:11 -0700 Subject: pstore/ram: Fix possible NULL dereference We can dereference 'cxt->cprz' if console and dump logging are disabled (which is unlikely, but still possible to do). This patch fixes the issue by changing the code so that we don't dereference przs at all, we can just calculate bufsize from console_size and record_size values. Plus, while at it, the patch improves the buffer size calculation. After Kay's printk rework, we know the optimal buffer size for console logging -- it is LOG_LINE_MAX (defined privately in printk.c). Previously, if only console logging was enabled, we would allocate unnecessary large buffer in pstore, while we only need LOG_LINE_MAX. (Pstore console logging is still capable of handling buffers > LOG_LINE_MAX, it will just do multiple calls to psinfo->write). Note that I don't export the constant, since we will do even a better thing soon: we will switch console logging to a new write_buf API, which will eliminate the need for the additional buffer; and so we won't need the constant. Reported-by: Dan Carpenter Signed-off-by: Anton Vorontsov Acked-by: Kees Cook --- fs/pstore/ram.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 0b311bc18916..bcd1bbd42598 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -414,13 +414,14 @@ static int __devinit ramoops_probe(struct platform_device *pdev) cxt->pstore.data = cxt; /* - * Console can handle any buffer size, so prefer dumps buffer - * size since usually it is smaller. + * Console can handle any buffer size, so prefer LOG_LINE_MAX. If we + * have to handle dumps, we must have at least record_size buffer. And + * for ftrace, bufsize is irrelevant (if bufsize is 0, buf will be + * ZERO_SIZE_PTR). */ - if (cxt->przs) - cxt->pstore.bufsize = cxt->przs[0]->buffer_size; - else - cxt->pstore.bufsize = cxt->cprz->buffer_size; + if (cxt->console_size) + cxt->pstore.bufsize = 1024; /* LOG_LINE_MAX */ + cxt->pstore.bufsize = max(cxt->record_size, cxt->pstore.bufsize); cxt->pstore.buf = kmalloc(cxt->pstore.bufsize, GFP_KERNEL); spin_lock_init(&cxt->pstore.buf_lock); if (!cxt->pstore.buf) { -- cgit v1.2.3 From 0427193b691edc81c846c7d0ebd2561cae8709d8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 3 Aug 2012 17:02:48 -0700 Subject: pstore/ram: Fix printk format warning Fix printk format warning (on i386) in pstore: fs/pstore/ram.c:409:3: warning: format '%lu' expects type 'long unsigned int', but argument 2 has type 'size_t' Signed-off-by: Randy Dunlap Acked-by: Kees Cook Signed-off-by: Anton Vorontsov --- fs/pstore/ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index bcd1bbd42598..fba8c7256929 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -406,7 +406,7 @@ static int __devinit ramoops_probe(struct platform_device *pdev) goto fail_init_fprz; if (!cxt->przs && !cxt->cprz && !cxt->fprz) { - pr_err("memory size too small, minimum is %lu\n", + pr_err("memory size too small, minimum is %zu\n", cxt->console_size + cxt->record_size + cxt->ftrace_size); goto fail_cnt; -- cgit v1.2.3 From 242030365eacb649161023a3a024373198c34d59 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 17 Jul 2012 19:49:37 -0700 Subject: pstore/ram: Mark ramoops_pstore_write_buf() as notrace write_buf() should be marked as notrace, otherwise it is prone to recursion. Though, yet the issue is never triggered in real life, because we run inside the function tracer, where ftrace does its own recurse protection. But it's still no good, plus soon we might switch to our own tracer ops, and then the issue will be fatal. So, let's fix it. Signed-off-by: Anton Vorontsov --- fs/pstore/ram.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index fba8c7256929..91016049e551 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #define RAMOOPS_KERNMSG_HDR "====" @@ -181,12 +182,11 @@ static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) return len; } - -static int ramoops_pstore_write_buf(enum pstore_type_id type, - enum kmsg_dump_reason reason, - u64 *id, unsigned int part, - const char *buf, size_t size, - struct pstore_info *psi) +static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, + enum kmsg_dump_reason reason, + u64 *id, unsigned int part, + const char *buf, size_t size, + struct pstore_info *psi) { struct ramoops_context *cxt = psi->data; struct persistent_ram_zone *prz = cxt->przs[cxt->dump_write_cnt]; -- cgit v1.2.3 From d796c52ef0b71a988364f6109aeb63d79c5b116b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 5 Aug 2012 19:04:57 -0400 Subject: ext4: make sure the journal sb is written in ext4_clear_journal_err() After we transfer set the EXT4_ERROR_FS bit in the file system superblock, it's not enough to call jbd2_journal_clear_err() to clear the error indication from journal superblock --- we need to call jbd2_journal_update_sb_errno() as well. Otherwise, when the root file system is mounted read-only, the journal is replayed, and the error indicator is transferred to the superblock --- but the s_errno field in the jbd2 superblock is left set (since although we cleared it in memory, we never flushed it out to disk). This can end up confusing e2fsck. We should make e2fsck more robust in this case, but the kernel shouldn't be leaving things in this confused state, either. Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/super.c | 1 + fs/jbd2/journal.c | 3 ++- include/linux/jbd2.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d76ec8277d3f..ccc4bcad5616 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4430,6 +4430,7 @@ static void ext4_clear_journal_err(struct super_block *sb, ext4_commit_super(sb, 1); jbd2_journal_clear_err(journal); + jbd2_journal_update_sb_errno(journal); } } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e9a3c4c85594..bd23f2ebaa67 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1377,7 +1377,7 @@ static void jbd2_mark_journal_empty(journal_t *journal) * Update a journal's errno. Write updated superblock to disk waiting for IO * to complete. */ -static void jbd2_journal_update_sb_errno(journal_t *journal) +void jbd2_journal_update_sb_errno(journal_t *journal) { journal_superblock_t *sb = journal->j_superblock; @@ -1390,6 +1390,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal) jbd2_write_superblock(journal, WRITE_SYNC); } +EXPORT_SYMBOL(jbd2_journal_update_sb_errno); /* * Read the superblock for a given journal, performing initial diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f334c7fab967..3efc43f3f162 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1125,6 +1125,7 @@ extern int jbd2_journal_destroy (journal_t *); extern int jbd2_journal_recover (journal_t *journal); extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_skip_recovery (journal_t *); +extern void jbd2_journal_update_sb_errno(journal_t *); extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t, unsigned long, int); extern void __jbd2_journal_abort_hard (journal_t *); -- cgit v1.2.3 From 7e731bc9a12339f344cddf82166b82633d99dd86 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 5 Aug 2012 23:28:16 -0400 Subject: ext4: avoid kmemcheck complaint from reading uninitialized memory Commit 03179fe923 introduced a kmemcheck complaint in ext4_da_get_block_prep() because we save and restore ei->i_da_metadata_calc_last_lblock even though it is left uninitialized in the case where i_da_metadata_calc_len is zero. This doesn't hurt anything, but silencing the kmemcheck complaint makes it easier for people to find real bugs. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=45631 (which is marked as a regression). Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ccc4bcad5616..56bcaec9149c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -959,6 +959,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_reserved_meta_blocks = 0; ei->i_allocated_meta_blocks = 0; ei->i_da_metadata_calc_len = 0; + ei->i_da_metadata_calc_last_lblock = 0; spin_lock_init(&(ei->i_block_reservation_lock)); #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; -- cgit v1.2.3 From 36b71a8bfbc92e1ba164e9aec840c0180ee933b5 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 26 Jul 2012 12:44:30 -0500 Subject: dlm: fix deadlock between dlm_send and dlm_controld A deadlock sometimes occurs between dlm_controld closing a lowcomms connection through configfs and dlm_send looking up the address for a new connection in configfs. dlm_controld does a configfs rmdir which calls dlm_lowcomms_close which waits for dlm_send to cancel work on the workqueues. The dlm_send workqueue thread has called tcp_connect_to_sock which calls dlm_nodeid_to_addr which does a configfs lookup and blocks on a lock held by dlm_controld in the rmdir path. The solution here is to save the node addresses within the lowcomms code so that the lowcomms workqueue does not need to step through configfs to get a node address. dlm_controld: wait_for_completion+0x1d/0x20 __cancel_work_timer+0x1b3/0x1e0 cancel_work_sync+0x10/0x20 dlm_lowcomms_close+0x4c/0xb0 [dlm] drop_comm+0x22/0x60 [dlm] client_drop_item+0x26/0x50 [configfs] configfs_rmdir+0x180/0x230 [configfs] vfs_rmdir+0xbd/0xf0 do_rmdir+0x103/0x120 sys_rmdir+0x16/0x20 dlm_send: mutex_lock+0x2b/0x50 get_comm+0x34/0x140 [dlm] dlm_nodeid_to_addr+0x18/0xd0 [dlm] tcp_connect_to_sock+0xf4/0x2d0 [dlm] process_send_sockets+0x1d2/0x260 [dlm] worker_thread+0x170/0x2a0 Signed-off-by: David Teigland --- fs/dlm/config.c | 79 ++++----------------- fs/dlm/config.h | 2 - fs/dlm/lowcomms.c | 205 +++++++++++++++++++++++++++++++++++++++++++++++------- fs/dlm/lowcomms.h | 2 + fs/dlm/main.c | 2 + 5 files changed, 200 insertions(+), 90 deletions(-) (limited to 'fs') diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 9ccf7346834a..a0387dd8b1f0 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -750,6 +750,7 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) { struct sockaddr_storage *addr; + int rv; if (len != sizeof(struct sockaddr_storage)) return -EINVAL; @@ -762,6 +763,13 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) return -ENOMEM; memcpy(addr, buf, len); + + rv = dlm_lowcomms_addr(cm->nodeid, addr, len); + if (rv) { + kfree(addr); + return rv; + } + cm->addr[cm->addr_count++] = addr; return len; } @@ -878,34 +886,7 @@ static void put_space(struct dlm_space *sp) config_item_put(&sp->group.cg_item); } -static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) -{ - switch (x->ss_family) { - case AF_INET: { - struct sockaddr_in *sinx = (struct sockaddr_in *)x; - struct sockaddr_in *siny = (struct sockaddr_in *)y; - if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) - return 0; - if (sinx->sin_port != siny->sin_port) - return 0; - break; - } - case AF_INET6: { - struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; - struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; - if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) - return 0; - if (sinx->sin6_port != siny->sin6_port) - return 0; - break; - } - default: - return 0; - } - return 1; -} - -static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) +static struct dlm_comm *get_comm(int nodeid) { struct config_item *i; struct dlm_comm *cm = NULL; @@ -919,19 +900,11 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) list_for_each_entry(i, &comm_list->cg_children, ci_entry) { cm = config_item_to_comm(i); - if (nodeid) { - if (cm->nodeid != nodeid) - continue; - found = 1; - config_item_get(i); - break; - } else { - if (!cm->addr_count || !addr_compare(cm->addr[0], addr)) - continue; - found = 1; - config_item_get(i); - break; - } + if (cm->nodeid != nodeid) + continue; + found = 1; + config_item_get(i); + break; } mutex_unlock(&clusters_root.subsys.su_mutex); @@ -995,7 +968,7 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, int dlm_comm_seq(int nodeid, uint32_t *seq) { - struct dlm_comm *cm = get_comm(nodeid, NULL); + struct dlm_comm *cm = get_comm(nodeid); if (!cm) return -EEXIST; *seq = cm->seq; @@ -1003,28 +976,6 @@ int dlm_comm_seq(int nodeid, uint32_t *seq) return 0; } -int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) -{ - struct dlm_comm *cm = get_comm(nodeid, NULL); - if (!cm) - return -EEXIST; - if (!cm->addr_count) - return -ENOENT; - memcpy(addr, cm->addr[0], sizeof(*addr)); - put_comm(cm); - return 0; -} - -int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) -{ - struct dlm_comm *cm = get_comm(0, addr); - if (!cm) - return -EEXIST; - *nodeid = cm->nodeid; - put_comm(cm); - return 0; -} - int dlm_our_nodeid(void) { return local_comm ? local_comm->nodeid : 0; diff --git a/fs/dlm/config.h b/fs/dlm/config.h index dbd35a08f3a5..f30697bc2780 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -46,8 +46,6 @@ void dlm_config_exit(void); int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, int *count_out); int dlm_comm_seq(int nodeid, uint32_t *seq); -int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); -int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); int dlm_our_nodeid(void); int dlm_our_addr(struct sockaddr_storage *addr, int num); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 5c1b0e38c7a4..522a69fccd84 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -140,6 +140,16 @@ struct writequeue_entry { struct connection *con; }; +struct dlm_node_addr { + struct list_head list; + int nodeid; + int addr_count; + struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; +}; + +static LIST_HEAD(dlm_node_addrs); +static DEFINE_SPINLOCK(dlm_node_addrs_spin); + static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; static int dlm_local_count; static int dlm_allow_conn; @@ -264,31 +274,146 @@ static struct connection *assoc2con(int assoc_id) return NULL; } -static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) +static struct dlm_node_addr *find_node_addr(int nodeid) +{ + struct dlm_node_addr *na; + + list_for_each_entry(na, &dlm_node_addrs, list) { + if (na->nodeid == nodeid) + return na; + } + return NULL; +} + +static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) { - struct sockaddr_storage addr; - int error; + switch (x->ss_family) { + case AF_INET: { + struct sockaddr_in *sinx = (struct sockaddr_in *)x; + struct sockaddr_in *siny = (struct sockaddr_in *)y; + if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) + return 0; + if (sinx->sin_port != siny->sin_port) + return 0; + break; + } + case AF_INET6: { + struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; + struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; + if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) + return 0; + if (sinx->sin6_port != siny->sin6_port) + return 0; + break; + } + default: + return 0; + } + return 1; +} + +static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, + struct sockaddr *sa_out) +{ + struct sockaddr_storage sas; + struct dlm_node_addr *na; if (!dlm_local_count) return -1; - error = dlm_nodeid_to_addr(nodeid, &addr); - if (error) - return error; + spin_lock(&dlm_node_addrs_spin); + na = find_node_addr(nodeid); + if (na && na->addr_count) + memcpy(&sas, na->addr[0], sizeof(struct sockaddr_storage)); + spin_unlock(&dlm_node_addrs_spin); + + if (!na) + return -EEXIST; + + if (!na->addr_count) + return -ENOENT; + + if (sas_out) + memcpy(sas_out, &sas, sizeof(struct sockaddr_storage)); + + if (!sa_out) + return 0; if (dlm_local_addr[0]->ss_family == AF_INET) { - struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; - struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr; + struct sockaddr_in *in4 = (struct sockaddr_in *) &sas; + struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out; ret4->sin_addr.s_addr = in4->sin_addr.s_addr; } else { - struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; - struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas; + struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out; ret6->sin6_addr = in6->sin6_addr; } return 0; } +static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) +{ + struct dlm_node_addr *na; + int rv = -EEXIST; + + spin_lock(&dlm_node_addrs_spin); + list_for_each_entry(na, &dlm_node_addrs, list) { + if (!na->addr_count) + continue; + + if (!addr_compare(na->addr[0], addr)) + continue; + + *nodeid = na->nodeid; + rv = 0; + break; + } + spin_unlock(&dlm_node_addrs_spin); + return rv; +} + +int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len) +{ + struct sockaddr_storage *new_addr; + struct dlm_node_addr *new_node, *na; + + new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS); + if (!new_node) + return -ENOMEM; + + new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS); + if (!new_addr) { + kfree(new_node); + return -ENOMEM; + } + + memcpy(new_addr, addr, len); + + spin_lock(&dlm_node_addrs_spin); + na = find_node_addr(nodeid); + if (!na) { + new_node->nodeid = nodeid; + new_node->addr[0] = new_addr; + new_node->addr_count = 1; + list_add(&new_node->list, &dlm_node_addrs); + spin_unlock(&dlm_node_addrs_spin); + return 0; + } + + if (na->addr_count >= DLM_MAX_ADDR_COUNT) { + spin_unlock(&dlm_node_addrs_spin); + kfree(new_addr); + kfree(new_node); + return -ENOSPC; + } + + na->addr[na->addr_count++] = new_addr; + spin_unlock(&dlm_node_addrs_spin); + kfree(new_node); + return 0; +} + /* Data available on socket or listen socket received a connect */ static void lowcomms_data_ready(struct sock *sk, int count_unused) { @@ -510,7 +635,7 @@ static void process_sctp_notification(struct connection *con, return; } make_sockaddr(&prim.ssp_addr, 0, &addr_len); - if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { + if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) { unsigned char *b=(unsigned char *)&prim.ssp_addr; log_print("reject connect from unknown addr"); print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, @@ -747,7 +872,7 @@ static int tcp_accept_from_sock(struct connection *con) /* Get the new node's NODEID */ make_sockaddr(&peeraddr, 0, &len); - if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { + if (addr_to_nodeid(&peeraddr, &nodeid)) { unsigned char *b=(unsigned char *)&peeraddr; log_print("connect from non cluster node"); print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, @@ -862,7 +987,7 @@ static void sctp_init_assoc(struct connection *con) if (con->retries++ > MAX_CONNECT_RETRIES) return; - if (nodeid_to_addr(con->nodeid, (struct sockaddr *)&rem_addr)) { + if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr)) { log_print("no address for nodeid %d", con->nodeid); return; } @@ -928,11 +1053,11 @@ static void sctp_init_assoc(struct connection *con) /* Connect a new socket to its peer */ static void tcp_connect_to_sock(struct connection *con) { - int result = -EHOSTUNREACH; struct sockaddr_storage saddr, src_addr; int addr_len; struct socket *sock = NULL; int one = 1; + int result; if (con->nodeid == 0) { log_print("attempt to connect sock 0 foiled"); @@ -944,10 +1069,8 @@ static void tcp_connect_to_sock(struct connection *con) goto out; /* Some odd races can cause double-connects, ignore them */ - if (con->sock) { - result = 0; + if (con->sock) goto out; - } /* Create a socket to communicate with */ result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, @@ -956,8 +1079,11 @@ static void tcp_connect_to_sock(struct connection *con) goto out_err; memset(&saddr, 0, sizeof(saddr)); - if (dlm_nodeid_to_addr(con->nodeid, &saddr)) + result = nodeid_to_addr(con->nodeid, &saddr, NULL); + if (result < 0) { + log_print("no address for nodeid %d", con->nodeid); goto out_err; + } sock->sk->sk_user_data = con; con->rx_action = receive_from_sock; @@ -983,8 +1109,7 @@ static void tcp_connect_to_sock(struct connection *con) kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, sizeof(one)); - result = - sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, + result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, O_NONBLOCK); if (result == -EINPROGRESS) result = 0; @@ -1002,11 +1127,17 @@ out_err: * Some errors are fatal and this list might need adjusting. For other * errors we try again until the max number of retries is reached. */ - if (result != -EHOSTUNREACH && result != -ENETUNREACH && - result != -ENETDOWN && result != -EINVAL - && result != -EPROTONOSUPPORT) { + if (result != -EHOSTUNREACH && + result != -ENETUNREACH && + result != -ENETDOWN && + result != -EINVAL && + result != -EPROTONOSUPPORT) { + log_print("connect %d try %d error %d", con->nodeid, + con->retries, result); + mutex_unlock(&con->sock_mutex); + msleep(1000); lowcomms_connect_sock(con); - result = 0; + return; } out: mutex_unlock(&con->sock_mutex); @@ -1414,6 +1545,7 @@ static void clean_one_writequeue(struct connection *con) int dlm_lowcomms_close(int nodeid) { struct connection *con; + struct dlm_node_addr *na; log_print("closing connection to node %d", nodeid); con = nodeid2con(nodeid, 0); @@ -1428,6 +1560,17 @@ int dlm_lowcomms_close(int nodeid) clean_one_writequeue(con); close_connection(con, true); } + + spin_lock(&dlm_node_addrs_spin); + na = find_node_addr(nodeid); + if (na) { + list_del(&na->list); + while (na->addr_count--) + kfree(na->addr[na->addr_count]); + kfree(na); + } + spin_unlock(&dlm_node_addrs_spin); + return 0; } @@ -1577,3 +1720,17 @@ fail_destroy: fail: return error; } + +void dlm_lowcomms_exit(void) +{ + struct dlm_node_addr *na, *safe; + + spin_lock(&dlm_node_addrs_spin); + list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) { + list_del(&na->list); + while (na->addr_count--) + kfree(na->addr[na->addr_count]); + kfree(na); + } + spin_unlock(&dlm_node_addrs_spin); +} diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h index 1311e6426287..67462e54fc2f 100644 --- a/fs/dlm/lowcomms.h +++ b/fs/dlm/lowcomms.h @@ -16,10 +16,12 @@ int dlm_lowcomms_start(void); void dlm_lowcomms_stop(void); +void dlm_lowcomms_exit(void); int dlm_lowcomms_close(int nodeid); void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); void dlm_lowcomms_commit_buffer(void *mh); int dlm_lowcomms_connect_node(int nodeid); +int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len); #endif /* __LOWCOMMS_DOT_H__ */ diff --git a/fs/dlm/main.c b/fs/dlm/main.c index 5a59efa0bb46..079c0bd71ab7 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c @@ -17,6 +17,7 @@ #include "user.h" #include "memory.h" #include "config.h" +#include "lowcomms.h" static int __init init_dlm(void) { @@ -78,6 +79,7 @@ static void __exit exit_dlm(void) dlm_config_exit(); dlm_memory_exit(); dlm_lockspace_exit(); + dlm_lowcomms_exit(); dlm_unregister_debugfs(); } -- cgit v1.2.3 From 6ad2291624824c1de19dbbbbb6d4f9f601b60781 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 2 Aug 2012 11:00:05 -0500 Subject: dlm: fix uninitialized spinlock Use DEFINE_SPINLOCK for global dlm_cb_seq_spin. Signed-off-by: David Teigland --- fs/dlm/ast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 63dc19c54d5a..27a6ba9aaeec 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -15,8 +15,8 @@ #include "lock.h" #include "user.h" -static uint64_t dlm_cb_seq; -static spinlock_t dlm_cb_seq_spin; +static uint64_t dlm_cb_seq; +static DEFINE_SPINLOCK(dlm_cb_seq_spin); static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) { -- cgit v1.2.3 From 475f230c6072fb2186f48b23943afcd0ee3a8343 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 2 Aug 2012 11:08:21 -0500 Subject: dlm: fix unlock balance warnings The in_recovery rw_semaphore has always been acquired and released by different threads by design. To work around the "BUG: bad unlock balance detected!" messages, adjust things so the dlm_recoverd thread always does both down_write and up_write. Signed-off-by: David Teigland --- fs/dlm/dlm_internal.h | 46 ++++++++++++++++++++++++++++++++++++---------- fs/dlm/lockspace.c | 15 ++++++++++++--- fs/dlm/member.c | 17 +++++++++++------ fs/dlm/rcom.c | 2 +- fs/dlm/recoverd.c | 27 ++++++++++++++++++--------- fs/dlm/recoverd.h | 1 - 6 files changed, 78 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 9d3e485f88c8..871c1abf6029 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -604,6 +604,7 @@ struct dlm_ls { struct idr ls_recover_idr; spinlock_t ls_recover_idr_lock; wait_queue_head_t ls_wait_general; + wait_queue_head_t ls_recover_lock_wait; struct mutex ls_clear_proc_locks; struct list_head ls_root_list; /* root resources */ @@ -616,15 +617,40 @@ struct dlm_ls { char ls_name[1]; }; -#define LSFL_WORK 0 -#define LSFL_RUNNING 1 -#define LSFL_RECOVERY_STOP 2 -#define LSFL_RCOM_READY 3 -#define LSFL_RCOM_WAIT 4 -#define LSFL_UEVENT_WAIT 5 -#define LSFL_TIMEWARN 6 -#define LSFL_CB_DELAY 7 -#define LSFL_NODIR 8 +/* + * LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines + * that they should abort what they're doing so new recovery can be started. + * + * LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it + * should do down_write() on the in_recovery rw_semaphore. (doing down_write + * within dlm_ls_stop causes complaints about the lock acquired/released + * in different contexts.) + * + * LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore. + * It sets this after it is done with down_write() on the in_recovery + * rw_semaphore and clears it after it has released the rw_semaphore. + * + * LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it + * should begin recovery of the lockspace. + * + * LSFL_RUNNING - set when normal locking activity is enabled. + * dlm_ls_stop() clears this to tell dlm locking routines that they should + * quit what they are doing so recovery can run. dlm_recoverd sets + * this after recovery is finished. + */ + +#define LSFL_RECOVER_STOP 0 +#define LSFL_RECOVER_DOWN 1 +#define LSFL_RECOVER_LOCK 2 +#define LSFL_RECOVER_WORK 3 +#define LSFL_RUNNING 4 + +#define LSFL_RCOM_READY 5 +#define LSFL_RCOM_WAIT 6 +#define LSFL_UEVENT_WAIT 7 +#define LSFL_TIMEWARN 8 +#define LSFL_CB_DELAY 9 +#define LSFL_NODIR 10 /* much of this is just saving user space pointers associated with the lock that we pass back to the user lib with an ast */ @@ -667,7 +693,7 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls) static inline int dlm_recovery_stopped(struct dlm_ls *ls) { - return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); + return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); } static inline int dlm_no_directory(struct dlm_ls *ls) diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 952557d00ccd..2e99fb0c9737 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -582,8 +582,6 @@ static int new_lockspace(const char *name, const char *cluster, INIT_LIST_HEAD(&ls->ls_root_list); init_rwsem(&ls->ls_root_sem); - down_write(&ls->ls_in_recovery); - spin_lock(&lslist_lock); ls->ls_create_count = 1; list_add(&ls->ls_list, &lslist); @@ -597,13 +595,24 @@ static int new_lockspace(const char *name, const char *cluster, } } - /* needs to find ls in lslist */ + init_waitqueue_head(&ls->ls_recover_lock_wait); + + /* + * Once started, dlm_recoverd first looks for ls in lslist, then + * initializes ls_in_recovery as locked in "down" mode. We need + * to wait for the wakeup from dlm_recoverd because in_recovery + * has to start out in down mode. + */ + error = dlm_recoverd_start(ls); if (error) { log_error(ls, "can't start dlm_recoverd %d", error); goto out_callback; } + wait_event(ls->ls_recover_lock_wait, + test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)); + ls->ls_kobj.kset = dlm_kset; error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, "%s", ls->ls_name); diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 862640a36d5c..476557b54921 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -616,13 +616,13 @@ int dlm_ls_stop(struct dlm_ls *ls) down_write(&ls->ls_recv_active); /* - * Abort any recovery that's in progress (see RECOVERY_STOP, + * Abort any recovery that's in progress (see RECOVER_STOP, * dlm_recovery_stopped()) and tell any other threads running in the * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). */ spin_lock(&ls->ls_recover_lock); - set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); + set_bit(LSFL_RECOVER_STOP, &ls->ls_flags); new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); ls->ls_recover_seq++; spin_unlock(&ls->ls_recover_lock); @@ -642,12 +642,16 @@ int dlm_ls_stop(struct dlm_ls *ls) * when recovery is complete. */ - if (new) - down_write(&ls->ls_in_recovery); + if (new) { + set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags); + wake_up_process(ls->ls_recoverd_task); + wait_event(ls->ls_recover_lock_wait, + test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)); + } /* * The recoverd suspend/resume makes sure that dlm_recoverd (if - * running) has noticed RECOVERY_STOP above and quit processing the + * running) has noticed RECOVER_STOP above and quit processing the * previous recovery. */ @@ -709,7 +713,8 @@ int dlm_ls_start(struct dlm_ls *ls) kfree(rv_old); } - dlm_recoverd_kick(ls); + set_bit(LSFL_RECOVER_WORK, &ls->ls_flags); + wake_up_process(ls->ls_recoverd_task); return 0; fail: diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 87f1a56eab32..9d61947d473a 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -581,7 +581,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) spin_lock(&ls->ls_recover_lock); status = ls->ls_recover_status; - stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); + stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); seq = ls->ls_recover_seq; spin_unlock(&ls->ls_recover_lock); diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 88ce65ff021e..32f9f8926ec3 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -41,6 +41,7 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq) set_bit(LSFL_RUNNING, &ls->ls_flags); /* unblocks processes waiting to enter the dlm */ up_write(&ls->ls_in_recovery); + clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); error = 0; } spin_unlock(&ls->ls_recover_lock); @@ -262,7 +263,7 @@ static void do_ls_recovery(struct dlm_ls *ls) rv = ls->ls_recover_args; ls->ls_recover_args = NULL; if (rv && ls->ls_recover_seq == rv->seq) - clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); + clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags); spin_unlock(&ls->ls_recover_lock); if (rv) { @@ -282,26 +283,34 @@ static int dlm_recoverd(void *arg) return -1; } + down_write(&ls->ls_in_recovery); + set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); + wake_up(&ls->ls_recover_lock_wait); + while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); - if (!test_bit(LSFL_WORK, &ls->ls_flags)) + if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) && + !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) schedule(); set_current_state(TASK_RUNNING); - if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) + if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) { + down_write(&ls->ls_in_recovery); + set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); + wake_up(&ls->ls_recover_lock_wait); + } + + if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags)) do_ls_recovery(ls); } + if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)) + up_write(&ls->ls_in_recovery); + dlm_put_lockspace(ls); return 0; } -void dlm_recoverd_kick(struct dlm_ls *ls) -{ - set_bit(LSFL_WORK, &ls->ls_flags); - wake_up_process(ls->ls_recoverd_task); -} - int dlm_recoverd_start(struct dlm_ls *ls) { struct task_struct *p; diff --git a/fs/dlm/recoverd.h b/fs/dlm/recoverd.h index 866657c5d69d..8856079733fa 100644 --- a/fs/dlm/recoverd.h +++ b/fs/dlm/recoverd.h @@ -14,7 +14,6 @@ #ifndef __RECOVERD_DOT_H__ #define __RECOVERD_DOT_H__ -void dlm_recoverd_kick(struct dlm_ls *ls); void dlm_recoverd_stop(struct dlm_ls *ls); int dlm_recoverd_start(struct dlm_ls *ls); void dlm_recoverd_suspend(struct dlm_ls *ls); -- cgit v1.2.3 From 47fbf7976e0b7d9dcdd799e2a1baba19064d9631 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Aug 2012 16:03:13 -0400 Subject: NFSv4.1: Remove a bogus BUG_ON() in nfs4_layoutreturn_done Ever since commit 0a57cdac3f (NFSv4.1 send layoutreturn to fence disconnected data server) we've been sending layoutreturn calls while there is potentially still outstanding I/O to the data servers. The reason we do this is to avoid races between replayed writes to the MDS and the original writes to the DS. When this happens, the BUG_ON() in nfs4_layoutreturn_done can be triggered because it assumes that we would never call layoutreturn without knowing that all I/O to the DS is finished. The fix is to remove the BUG_ON() now that the assumptions behind the test are obsolete. Reported-by: Boaz Harrosh Reported-by: Tigran Mkrtchyan Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>=3.5] --- fs/nfs/nfs4proc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f94f6b3928fc..c77d296bdaa6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6359,12 +6359,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) return; } spin_lock(&lo->plh_inode->i_lock); - if (task->tk_status == 0) { - if (lrp->res.lrs_present) { - pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - } else - BUG_ON(!list_empty(&lo->plh_segs)); - } + if (task->tk_status == 0 && lrp->res.lrs_present) + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); -- cgit v1.2.3 From 389d7b26d9e4f78b17366c23a3aa16b3c5cb3bde Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Thu, 9 Aug 2012 15:19:25 +0200 Subject: bio: Fix potential memory leak in bio_find_or_create_slab() Do not leak memory by updating pointer with potentially NULL realloc return value. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: Jeff Moyer Signed-off-by: Jens Axboe --- fs/bio.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 73922abba832..fed1f799cb56 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -73,7 +73,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) { unsigned int sz = sizeof(struct bio) + extra_size; struct kmem_cache *slab = NULL; - struct bio_slab *bslab; + struct bio_slab *bslab, *new_bio_slabs; unsigned int i, entry = -1; mutex_lock(&bio_slab_lock); @@ -97,11 +97,12 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) if (bio_slab_nr == bio_slab_max && entry == -1) { bio_slab_max <<= 1; - bio_slabs = krealloc(bio_slabs, - bio_slab_max * sizeof(struct bio_slab), - GFP_KERNEL); - if (!bio_slabs) + new_bio_slabs = krealloc(bio_slabs, + bio_slab_max * sizeof(struct bio_slab), + GFP_KERNEL); + if (!new_bio_slabs) goto out_unlock; + bio_slabs = new_bio_slabs; } if (entry == -1) entry = bio_slab_nr++; -- cgit v1.2.3 From 647d1e4c5235763b83fbfe74a09d148edc6ca152 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Thu, 9 Aug 2012 15:23:09 +0200 Subject: block: move down direct IO plugging Move unplugging for direct I/O from around ->direct_IO() down to do_blockdev_direct_IO(). This implicitly adds plugging for direct writes. CC: Li Shaohua Acked-by: Jeff Moyer Signed-off-by: Wu Fengguang Signed-off-by: Jens Axboe --- fs/direct-io.c | 5 +++++ mm/filemap.c | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/direct-io.c b/fs/direct-io.c index 1faf4cb56f39..f86c720dba0e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1062,6 +1062,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, unsigned long user_addr; size_t bytes; struct buffer_head map_bh = { 0, }; + struct blk_plug plug; if (rw & WRITE) rw = WRITE_ODIRECT; @@ -1177,6 +1178,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, PAGE_SIZE - user_addr / PAGE_SIZE); } + blk_start_plug(&plug); + for (seg = 0; seg < nr_segs; seg++) { user_addr = (unsigned long)iov[seg].iov_base; sdio.size += bytes = iov[seg].iov_len; @@ -1235,6 +1238,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (sdio.bio) dio_bio_submit(dio, &sdio); + blk_finish_plug(&plug); + /* * It is possible that, we return short IO due to end of file. * In that case, we need to release all the pages we got hold on. diff --git a/mm/filemap.c b/mm/filemap.c index 2b0952974cb9..384344575c37 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1412,12 +1412,8 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, retval = filemap_write_and_wait_range(mapping, pos, pos + iov_length(iov, nr_segs) - 1); if (!retval) { - struct blk_plug plug; - - blk_start_plug(&plug); retval = mapping->a_ops->direct_IO(READ, iocb, iov, pos, nr_segs); - blk_finish_plug(&plug); } if (retval > 0) { *ppos = pos + retval; -- cgit v1.2.3 From b4c798cf695dc7cee9798a686128461ad0070115 Mon Sep 17 00:00:00 2001 From: Xue Ying Date: Fri, 10 Aug 2012 10:58:37 +0800 Subject: dlm: remove redundant variable assignments Once the tcp_create_listen_sock() is returned successfully, we will invoke add_sock() immediately. In add_sock(), the 'con' variable is assigned to 'sk_user_data', meanwhile, the 'sock' is also set to 'con->sock'. So it's unnecessary to do the same thing in tcp_create_listen_sock(). Signed-off-by: Xue Ying Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 522a69fccd84..3e6aaccce951 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1175,10 +1175,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con, if (result < 0) { log_print("Failed to set SO_REUSEADDR on socket: %d", result); } - sock->sk->sk_user_data = con; con->rx_action = tcp_accept_from_sock; con->connect_action = tcp_connect_to_sock; - con->sock = sock; /* Bind to our port */ make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); -- cgit v1.2.3 From 4dd40f0cd99a3500c6df80eb8f537678559c761e Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 10 Aug 2012 14:58:42 +0800 Subject: dlm: convert add_sock routine return value type to void Since add_sock() always returns a success code - 0, its return value type should be changed from integer to void. Signed-off-by: Ying Xue Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 3e6aaccce951..3637f3f18824 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -473,7 +473,7 @@ int dlm_lowcomms_connect_node(int nodeid) } /* Make a socket active */ -static int add_sock(struct socket *sock, struct connection *con) +static void add_sock(struct socket *sock, struct connection *con) { con->sock = sock; @@ -483,7 +483,6 @@ static int add_sock(struct socket *sock, struct connection *con) con->sock->sk->sk_state_change = lowcomms_state_change; con->sock->sk->sk_user_data = con; con->sock->sk->sk_allocation = GFP_NOFS; - return 0; } /* Add the port number to an IPv6 or 4 sockaddr and return the address -- cgit v1.2.3 From b8017d2957fb0ebf0c2aa91d48f2465f6f799738 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Wed, 8 Aug 2012 21:02:37 +0400 Subject: exofs: check for allocation failure in uri_store() There is no memory allocation failure check in uri_store(). That can lead to NULL pointer dereference. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Boaz Harrosh --- fs/exofs/sys.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c index 5a7b691e748b..1b4f2f95fc37 100644 --- a/fs/exofs/sys.c +++ b/fs/exofs/sys.c @@ -80,8 +80,13 @@ static ssize_t uri_show(struct exofs_dev *edp, char *buf) static ssize_t uri_store(struct exofs_dev *edp, const char *buf, size_t len) { + uint8_t *new_uri; + edp->urilen = strlen(buf) + 1; - edp->uri = krealloc(edp->uri, edp->urilen, GFP_KERNEL); + new_uri = krealloc(edp->uri, edp->urilen, GFP_KERNEL); + if (new_uri == NULL) + return -ENOMEM; + edp->uri = new_uri; strncpy(edp->uri, buf, edp->urilen); return edp->urilen; } -- cgit v1.2.3 From 9c5bef5849c9fde1a37ac005299f759440cbaf4c Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 13 Aug 2012 14:29:55 +0800 Subject: dlm: cleanup send_to_sock routine Remove unnecessary code form send_to_sock routine. Signed-off-by: Ying Xue Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 3637f3f18824..331ea4f94efd 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1486,8 +1486,7 @@ static void send_to_sock(struct connection *con) } cond_resched(); goto out; - } - if (ret <= 0) + } else if (ret < 0) goto send_error; } @@ -1504,7 +1503,6 @@ static void send_to_sock(struct connection *con) if (e->len == 0 && e->users == 0) { list_del(&e->list); free_entry(e); - continue; } } spin_unlock(&con->writequeue_lock); @@ -1522,7 +1520,6 @@ out_connect: mutex_unlock(&con->sock_mutex); if (!test_bit(CF_INIT_PENDING, &con->flags)) lowcomms_connect_sock(con); - return; } static void clean_one_writequeue(struct connection *con) -- cgit v1.2.3 From 41f63c5359d14ca995172b8f6eaffd93f60fec54 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:47 -0700 Subject: workqueue: use mod_delayed_work() instead of cancel + queue Convert delayed_work users doing cancel_delayed_work() followed by queue_delayed_work() to mod_delayed_work(). Most conversions are straight-forward. Ones worth mentioning are, * drivers/edac/edac_mc.c: edac_mc_workq_setup() converted to always use mod_delayed_work() and cancel loop in edac_mc_reset_delay_period() is dropped. * drivers/platform/x86/thinkpad_acpi.c: No need to remember whether watchdog is active or not. @fan_watchdog_active and related code dropped. * drivers/power/charger-manager.c: Seemingly a lot of delayed_work_pending() abuse going on here. [delayed_]work_pending() are unsynchronized and racy when used like this. I converted one instance in fullbatt_handler(). Please conver the rest so that it invokes workqueue APIs for the intended target state rather than trying to game work item pending state transitions. e.g. if timer should be modified - call mod_delayed_work(), canceled - call cancel_delayed_work[_sync](). * drivers/thermal/thermal_sys.c: thermal_zone_device_set_polling() simplified. Note that round_jiffies() calls in this function are meaningless. round_jiffies() work on absolute jiffies not delta delay used by delayed_work. v2: Tomi pointed out that __cancel_delayed_work() users can't be safely converted to mod_delayed_work(). They could be calling it from irq context and if that happens while delayed_work_timer_fn() is running, it could deadlock. __cancel_delayed_work() users are dropped. Signed-off-by: Tejun Heo Acked-by: Henrique de Moraes Holschuh Acked-by: Dmitry Torokhov Acked-by: Anton Vorontsov Acked-by: David Howells Cc: Tomi Valkeinen Cc: Jens Axboe Cc: Jiri Kosina Cc: Doug Thompson Cc: David Airlie Cc: Roland Dreier Cc: "John W. Linville" Cc: Zhang Rui Cc: Len Brown Cc: "J. Bruce Fields" Cc: Johannes Berg --- block/genhd.c | 6 ++---- drivers/edac/edac_mc.c | 17 +---------------- drivers/infiniband/core/addr.c | 4 +--- drivers/infiniband/hw/nes/nes_hw.c | 6 ++---- drivers/infiniband/hw/nes/nes_nic.c | 5 ++--- drivers/net/wireless/ipw2x00/ipw2100.c | 8 +++----- drivers/net/wireless/zd1211rw/zd_usb.c | 3 +-- drivers/platform/x86/thinkpad_acpi.c | 20 +++++--------------- drivers/power/charger-manager.c | 9 +++------ drivers/power/ds2760_battery.c | 9 +++------ drivers/power/jz4740-battery.c | 6 ++---- drivers/thermal/thermal_sys.c | 15 ++++++--------- fs/afs/callback.c | 4 +--- fs/afs/server.c | 10 ++-------- fs/afs/vlocation.c | 14 +++----------- fs/nfs/nfs4renewd.c | 3 +-- net/core/dst.c | 4 ++-- net/rfkill/input.c | 3 +-- 18 files changed, 41 insertions(+), 105 deletions(-) (limited to 'fs') diff --git a/block/genhd.c b/block/genhd.c index cac7366957c3..5d8b44a6442b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1534,10 +1534,8 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask) spin_lock_irq(&ev->lock); ev->clearing |= mask; - if (!ev->block) { - cancel_delayed_work(&ev->dwork); - queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); - } + if (!ev->block) + mod_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); spin_unlock_irq(&ev->lock); } diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 616d90bcb3a4..7c0df4af9ef7 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -538,7 +538,7 @@ static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) return; INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); - queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); + mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); } /* @@ -578,21 +578,6 @@ void edac_mc_reset_delay_period(int value) mutex_lock(&mem_ctls_mutex); - /* scan the list and turn off all workq timers, doing so under lock - */ - list_for_each(item, &mc_devices) { - mci = list_entry(item, struct mem_ctl_info, link); - - if (mci->op_state == OP_RUNNING_POLL) - cancel_delayed_work(&mci->work); - } - - mutex_unlock(&mem_ctls_mutex); - - - /* re-walk the list, and reset the poll delay */ - mutex_lock(&mem_ctls_mutex); - list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 28058ae33d38..eaec8d7a3b73 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -152,13 +152,11 @@ static void set_timeout(unsigned long time) { unsigned long delay; - cancel_delayed_work(&work); - delay = time - jiffies; if ((long)delay <= 0) delay = 1; - queue_delayed_work(addr_wq, &work, delay); + mod_delayed_work(addr_wq, &work, delay); } static void queue_req(struct addr_req *req) diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index d42c9f435b1b..9e0895b45eb8 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -2679,11 +2679,9 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) } } if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) { - if (nesdev->link_recheck) - cancel_delayed_work(&nesdev->work); nesdev->link_recheck = 1; - schedule_delayed_work(&nesdev->work, - NES_LINK_RECHECK_DELAY); + mod_delayed_work(system_wq, &nesdev->work, + NES_LINK_RECHECK_DELAY); } } diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index f3a3ecf8d09e..e43f6e41a6bd 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -243,10 +243,9 @@ static int nes_netdev_open(struct net_device *netdev) spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) { - if (nesdev->link_recheck) - cancel_delayed_work(&nesdev->work); nesdev->link_recheck = 1; - schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY); + mod_delayed_work(system_wq, &nesdev->work, + NES_LINK_RECHECK_DELAY); } spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c index 95aa8e1683ec..8a3420257eeb 100644 --- a/drivers/net/wireless/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/ipw2x00/ipw2100.c @@ -2180,8 +2180,7 @@ static void isr_indicate_rf_kill(struct ipw2100_priv *priv, u32 status) /* Make sure the RF Kill check timer is running */ priv->stop_rf_kill = 0; - cancel_delayed_work(&priv->rf_kill); - schedule_delayed_work(&priv->rf_kill, round_jiffies_relative(HZ)); + mod_delayed_work(system_wq, &priv->rf_kill, round_jiffies_relative(HZ)); } static void send_scan_event(void *data) @@ -4321,9 +4320,8 @@ static int ipw_radio_kill_sw(struct ipw2100_priv *priv, int disable_radio) "disabled by HW switch\n"); /* Make sure the RF_KILL check timer is running */ priv->stop_rf_kill = 0; - cancel_delayed_work(&priv->rf_kill); - schedule_delayed_work(&priv->rf_kill, - round_jiffies_relative(HZ)); + mod_delayed_work(system_wq, &priv->rf_kill, + round_jiffies_relative(HZ)); } else schedule_reset(priv); } diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index af83c43bcdb1..ef2b171e3514 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -1164,8 +1164,7 @@ void zd_usb_reset_rx_idle_timer(struct zd_usb *usb) { struct zd_usb_rx *rx = &usb->rx; - cancel_delayed_work(&rx->idle_work); - queue_delayed_work(zd_workqueue, &rx->idle_work, ZD_RX_IDLE_INTERVAL); + mod_delayed_work(zd_workqueue, &rx->idle_work, ZD_RX_IDLE_INTERVAL); } static inline void init_usb_interrupt(struct zd_usb *usb) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index e7f73287636c..06d2502ffb37 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -7682,25 +7682,15 @@ static int fan_set_speed(int speed) static void fan_watchdog_reset(void) { - static int fan_watchdog_active; - if (fan_control_access_mode == TPACPI_FAN_WR_NONE) return; - if (fan_watchdog_active) - cancel_delayed_work(&fan_watchdog_task); - if (fan_watchdog_maxinterval > 0 && - tpacpi_lifecycle != TPACPI_LIFE_EXITING) { - fan_watchdog_active = 1; - if (!queue_delayed_work(tpacpi_wq, &fan_watchdog_task, - msecs_to_jiffies(fan_watchdog_maxinterval - * 1000))) { - pr_err("failed to queue the fan watchdog, " - "watchdog will not trigger\n"); - } - } else - fan_watchdog_active = 0; + tpacpi_lifecycle != TPACPI_LIFE_EXITING) + mod_delayed_work(tpacpi_wq, &fan_watchdog_task, + msecs_to_jiffies(fan_watchdog_maxinterval * 1000)); + else + cancel_delayed_work(&fan_watchdog_task); } static void fan_watchdog_fire(struct work_struct *ignored) diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c index 526e5c931294..7ff83cf43c8c 100644 --- a/drivers/power/charger-manager.c +++ b/drivers/power/charger-manager.c @@ -509,9 +509,8 @@ static void _setup_polling(struct work_struct *work) if (!delayed_work_pending(&cm_monitor_work) || (delayed_work_pending(&cm_monitor_work) && time_after(next_polling, _next_polling))) { - cancel_delayed_work_sync(&cm_monitor_work); next_polling = jiffies + polling_jiffy; - queue_delayed_work(cm_wq, &cm_monitor_work, polling_jiffy); + mod_delayed_work(cm_wq, &cm_monitor_work, polling_jiffy); } out: @@ -546,10 +545,8 @@ static void fullbatt_handler(struct charger_manager *cm) if (cm_suspended) device_set_wakeup_capable(cm->dev, true); - if (delayed_work_pending(&cm->fullbatt_vchk_work)) - cancel_delayed_work(&cm->fullbatt_vchk_work); - queue_delayed_work(cm_wq, &cm->fullbatt_vchk_work, - msecs_to_jiffies(desc->fullbatt_vchkdrop_ms)); + mod_delayed_work(cm_wq, &cm->fullbatt_vchk_work, + msecs_to_jiffies(desc->fullbatt_vchkdrop_ms)); cm->fullbatt_vchk_jiffies_at = jiffies + msecs_to_jiffies( desc->fullbatt_vchkdrop_ms); diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c index 076e211a40b7..704e652072be 100644 --- a/drivers/power/ds2760_battery.c +++ b/drivers/power/ds2760_battery.c @@ -355,8 +355,7 @@ static void ds2760_battery_external_power_changed(struct power_supply *psy) dev_dbg(di->dev, "%s\n", __func__); - cancel_delayed_work(&di->monitor_work); - queue_delayed_work(di->monitor_wqueue, &di->monitor_work, HZ/10); + mod_delayed_work(di->monitor_wqueue, &di->monitor_work, HZ/10); } @@ -401,8 +400,7 @@ static void ds2760_battery_set_charged(struct power_supply *psy) /* postpone the actual work by 20 secs. This is for debouncing GPIO * signals and to let the current value settle. See AN4188. */ - cancel_delayed_work(&di->set_charged_work); - queue_delayed_work(di->monitor_wqueue, &di->set_charged_work, HZ * 20); + mod_delayed_work(di->monitor_wqueue, &di->set_charged_work, HZ * 20); } static int ds2760_battery_get_property(struct power_supply *psy, @@ -616,8 +614,7 @@ static int ds2760_battery_resume(struct platform_device *pdev) di->charge_status = POWER_SUPPLY_STATUS_UNKNOWN; power_supply_changed(&di->bat); - cancel_delayed_work(&di->monitor_work); - queue_delayed_work(di->monitor_wqueue, &di->monitor_work, HZ); + mod_delayed_work(di->monitor_wqueue, &di->monitor_work, HZ); return 0; } diff --git a/drivers/power/jz4740-battery.c b/drivers/power/jz4740-battery.c index 8dbc7bfaab14..ffbed5e5b945 100644 --- a/drivers/power/jz4740-battery.c +++ b/drivers/power/jz4740-battery.c @@ -173,16 +173,14 @@ static void jz_battery_external_power_changed(struct power_supply *psy) { struct jz_battery *jz_battery = psy_to_jz_battery(psy); - cancel_delayed_work(&jz_battery->work); - schedule_delayed_work(&jz_battery->work, 0); + mod_delayed_work(system_wq, &jz_battery->work, 0); } static irqreturn_t jz_battery_charge_irq(int irq, void *data) { struct jz_battery *jz_battery = data; - cancel_delayed_work(&jz_battery->work); - schedule_delayed_work(&jz_battery->work, 0); + mod_delayed_work(system_wq, &jz_battery->work, 0); return IRQ_HANDLED; } diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 2ab31e4f02cc..67789b8345d2 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -694,17 +694,14 @@ thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, int delay) { - cancel_delayed_work(&(tz->poll_queue)); - - if (!delay) - return; - if (delay > 1000) - queue_delayed_work(system_freezable_wq, &(tz->poll_queue), - round_jiffies(msecs_to_jiffies(delay))); + mod_delayed_work(system_freezable_wq, &tz->poll_queue, + round_jiffies(msecs_to_jiffies(delay))); + else if (delay) + mod_delayed_work(system_freezable_wq, &tz->poll_queue, + msecs_to_jiffies(delay)); else - queue_delayed_work(system_freezable_wq, &(tz->poll_queue), - msecs_to_jiffies(delay)); + cancel_delayed_work(&tz->poll_queue); } static void thermal_zone_device_passive(struct thermal_zone_device *tz, diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 587ef5123cd8..7ef637d7f3a5 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -351,9 +351,7 @@ void afs_dispatch_give_up_callbacks(struct work_struct *work) */ void afs_flush_callback_breaks(struct afs_server *server) { - cancel_delayed_work(&server->cb_break_work); - queue_delayed_work(afs_callback_update_worker, - &server->cb_break_work, 0); + mod_delayed_work(afs_callback_update_worker, &server->cb_break_work, 0); } #if 0 diff --git a/fs/afs/server.c b/fs/afs/server.c index d59b7516e943..f342acf3547d 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -285,12 +285,7 @@ static void afs_reap_server(struct work_struct *work) expiry = server->time_of_death + afs_server_timeout; if (expiry > now) { delay = (expiry - now) * HZ; - if (!queue_delayed_work(afs_wq, &afs_server_reaper, - delay)) { - cancel_delayed_work(&afs_server_reaper); - queue_delayed_work(afs_wq, &afs_server_reaper, - delay); - } + mod_delayed_work(afs_wq, &afs_server_reaper, delay); break; } @@ -323,6 +318,5 @@ static void afs_reap_server(struct work_struct *work) void __exit afs_purge_servers(void) { afs_server_timeout = 0; - cancel_delayed_work(&afs_server_reaper); - queue_delayed_work(afs_wq, &afs_server_reaper, 0); + mod_delayed_work(afs_wq, &afs_server_reaper, 0); } diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 431984d2e372..57bcb1596530 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -561,12 +561,7 @@ static void afs_vlocation_reaper(struct work_struct *work) if (expiry > now) { delay = (expiry - now) * HZ; _debug("delay %lu", delay); - if (!queue_delayed_work(afs_wq, &afs_vlocation_reap, - delay)) { - cancel_delayed_work(&afs_vlocation_reap); - queue_delayed_work(afs_wq, &afs_vlocation_reap, - delay); - } + mod_delayed_work(afs_wq, &afs_vlocation_reap, delay); break; } @@ -614,13 +609,10 @@ void afs_vlocation_purge(void) spin_lock(&afs_vlocation_updates_lock); list_del_init(&afs_vlocation_updates); spin_unlock(&afs_vlocation_updates_lock); - cancel_delayed_work(&afs_vlocation_update); - queue_delayed_work(afs_vlocation_update_worker, - &afs_vlocation_update, 0); + mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0); destroy_workqueue(afs_vlocation_update_worker); - cancel_delayed_work(&afs_vlocation_reap); - queue_delayed_work(afs_wq, &afs_vlocation_reap, 0); + mod_delayed_work(afs_wq, &afs_vlocation_reap, 0); } /* diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 6930bec91bca..1720d32ffa54 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -117,8 +117,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) timeout = 5 * HZ; dprintk("%s: requeueing work. Lease period = %ld\n", __func__, (timeout + HZ - 1) / HZ); - cancel_delayed_work(&clp->cl_renewd); - schedule_delayed_work(&clp->cl_renewd, timeout); + mod_delayed_work(system_wq, &clp->cl_renewd, timeout); set_bit(NFS_CS_RENEWD, &clp->cl_res_state); spin_unlock(&clp->cl_lock); } diff --git a/net/core/dst.c b/net/core/dst.c index 069d51d29414..ed5a0b409aa3 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -214,8 +214,8 @@ void __dst_free(struct dst_entry *dst) if (dst_garbage.timer_inc > DST_GC_INC) { dst_garbage.timer_inc = DST_GC_INC; dst_garbage.timer_expires = DST_GC_MIN; - cancel_delayed_work(&dst_gc_work); - schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires); + mod_delayed_work(system_wq, &dst_gc_work, + dst_garbage.timer_expires); } spin_unlock_bh(&dst_garbage.lock); } diff --git a/net/rfkill/input.c b/net/rfkill/input.c index 24c55c53e6a2..c9d931e7ffec 100644 --- a/net/rfkill/input.c +++ b/net/rfkill/input.c @@ -164,8 +164,7 @@ static void rfkill_schedule_global_op(enum rfkill_sched_op op) rfkill_op_pending = true; if (op == RFKILL_GLOBAL_OP_EPO && !rfkill_is_epo_lock_active()) { /* bypass the limiter for EPO */ - cancel_delayed_work(&rfkill_op_work); - schedule_delayed_work(&rfkill_op_work, 0); + mod_delayed_work(system_wq, &rfkill_op_work, 0); rfkill_last_scheduled = jiffies; } else rfkill_schedule_ratelimited(); -- cgit v1.2.3 From bb2b6d19ec8b593b66402e2895c4314955b19833 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 23 Jul 2012 16:39:29 +0000 Subject: udf: fix udf_setsize() for file data in ICB If the new size is larger than the old size and the old file data was stored in the ICB (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) and the new size still fits in the ICB, skip the call to udf_extend_file() as it does not handle this i_alloc_type value (it calls BUG()). Signed-off-by: Ian Abbott Signed-off-by: Jan Kara --- fs/udf/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/udf/inode.c b/fs/udf/inode.c index fafaad795cd6..aa233469b3c1 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1124,14 +1124,17 @@ int udf_setsize(struct inode *inode, loff_t newsize) if (err) return err; down_write(&iinfo->i_data_sem); - } else + } else { iinfo->i_lenAlloc = newsize; + goto set_size; + } } err = udf_extend_file(inode, newsize); if (err) { up_write(&iinfo->i_data_sem); return err; } +set_size: truncate_setsize(inode, newsize); up_write(&iinfo->i_data_sem); } else { -- cgit v1.2.3 From dc141a402b9dc03a4188cd978a4cf149c397172c Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Sat, 21 Jul 2012 16:35:17 +0530 Subject: UDF: During mount free lvid_bh before rescanning with different blocksize If s_lvid_bh is not freed and set to NULL before re-scanning partition with default block size, we might end up using wrong lvid in case s_lvid_bh is not updated in udf_load_logicalvolint during rescan. Signed-off-by: Ashish Sangwan Signed-off-by: Namjae Jeon Signed-off-by: Jan Kara --- fs/udf/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/udf/super.c b/fs/udf/super.c index dcbf98722afc..9f55f7981b7d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -2000,6 +2000,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (!silent) pr_notice("Rescanning with blocksize %d\n", UDF_DEFAULT_BLOCKSIZE); + brelse(sbi->s_lvid_bh); + sbi->s_lvid_bh = NULL; uopt.blocksize = UDF_DEFAULT_BLOCKSIZE; ret = udf_load_vrs(sb, &uopt, silent, &fileset); } -- cgit v1.2.3 From 6ea2eea1fa930b9308a06f77fce65c38931eeb13 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Wed, 18 Jul 2012 12:12:41 +0800 Subject: quota: Move down dqptr_sem read after initializing default warn[] type at __dquot_alloc_space(). sb->s_dqopt->dqptr_sem is used to serialize ops using pointers from inode to dquots. But for __dquot_alloc_space(), it could be safely moved down after the default warn[] array got initialized. Signed-off-by: Jie Liu Signed-off-by: Jan Kara --- fs/quota/dquot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 36a29b753c79..c495a3055e2a 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1589,10 +1589,10 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) goto out; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; + down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!dquots[cnt]) -- cgit v1.2.3 From 48d1788493f874e5d32dccb2911a7bc91c248b4b Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 2 Aug 2012 21:36:04 -0400 Subject: reiserfs: fix deadlocks with quotas The BKL push-down for reiserfs made lock recursion a special case that needs to be handled explicitly. One of the cases that was unhandled is dropping the quota during inode eviction. Both reiserfs_evict_inode and reiserfs_write_dquot take the write lock, but when the journal lock is taken it only drops one the references. The locking rules are that the journal lock be acquired before the write lock so leaving the reference open leads to a ABBA deadlock. This patch pushes the unlock up before clear_inode and avoids the recursive locking. Another ABBA situation can occur when the write lock is dropped while reading the bitmap buffer while in the quota code. When the lock is reacquired, it will deadlock against dquot->dq_lock and dqopt->dqio_mutex in the dquot_acquire path. It's safe to retain the lock across the read and should be cached under write load. Signed-off-by: Jeff Mahoney Signed-off-by: Jan Kara --- fs/reiserfs/bitmap.c | 2 -- fs/reiserfs/inode.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 4c0c7d163d15..a98b7740a0fc 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1334,9 +1334,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, else if (bitmap == 0) block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; - reiserfs_write_unlock(sb); bh = sb_bread(sb, block); - reiserfs_write_lock(sb); if (bh == NULL) reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " "reading failed", __func__, block); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a6d4268fb6c1..855da58db145 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -76,10 +76,10 @@ void reiserfs_evict_inode(struct inode *inode) ; } out: + reiserfs_write_unlock_once(inode->i_sb, depth); clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ dquot_drop(inode); inode->i_blocks = 0; - reiserfs_write_unlock_once(inode->i_sb, depth); return; no_delete: -- cgit v1.2.3 From adb37c4c67f807f16beb222028fb3ce9a354dc2b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 May 2012 18:01:20 -0600 Subject: userns: Make seq_file's user namespace accessible struct file already has a user namespace associated with it in file->f_cred->user_ns, unfortunately because struct seq_file has no struct file backpointer associated with it, it is difficult to get at the user namespace in seq_file context. Therefore add a helper function seq_user_ns to return the associated user namespace and a user_ns field to struct seq_file to be used in implementing seq_user_ns. Cc: Al Viro Cc: Eric Dumazet Cc: KAMEZAWA Hiroyuki Cc: Alexey Dobriyan Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/seq_file.c | 4 ++++ include/linux/seq_file.h | 14 ++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'fs') diff --git a/fs/seq_file.c b/fs/seq_file.c index 14cf9de1dbe1..99dffab4c4e4 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,9 @@ int seq_open(struct file *file, const struct seq_operations *op) memset(p, 0, sizeof(*p)); mutex_init(&p->lock); p->op = op; +#ifdef CONFIG_USER_NS + p->user_ns = file->f_cred->user_ns; +#endif /* * Wrappers around seq_open(e.g. swaps_open) need to be diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 83c44eefe698..68a04a343cad 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -13,6 +13,7 @@ struct file; struct path; struct inode; struct dentry; +struct user_namespace; struct seq_file { char *buf; @@ -25,6 +26,9 @@ struct seq_file { struct mutex lock; const struct seq_operations *op; int poll_event; +#ifdef CONFIG_USER_NS + struct user_namespace *user_ns; +#endif void *private; }; @@ -128,6 +132,16 @@ int seq_put_decimal_ull(struct seq_file *m, char delimiter, int seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num); +static inline struct user_namespace *seq_user_ns(struct seq_file *seq) +{ +#ifdef CONFIG_USER_NS + return seq->user_ns; +#else + extern struct user_namespace init_user_ns; + return &init_user_ns; +#endif +} + #define SEQ_START_TOKEN ((void *)1) /* * Helpers for iteration over list_head-s in seq_files -- cgit v1.2.3 From e68726ff72cf7ba5e7d789857fcd9a75ca573f03 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 15 Aug 2012 13:01:24 +0200 Subject: vfs: canonicalize create mode in build_open_flags() Userspace can pass weird create mode in open(2) that we canonicalize to "(mode & S_IALLUGO) | S_IFREG" in vfs_create(). The problem is that we use the uncanonicalized mode before calling vfs_create() with unforseen consequences. So do the canonicalization early in build_open_flags(). Signed-off-by: Miklos Szeredi Tested-by: Richard W.M. Jones CC: stable@vger.kernel.org --- fs/open.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index bc132e167d2d..e1f2cdb91a4d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -852,9 +852,10 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o int lookup_flags = 0; int acc_mode; - if (!(flags & O_CREAT)) - mode = 0; - op->mode = mode; + if (flags & O_CREAT) + op->mode = (mode & S_IALLUGO) | S_IFREG; + else + op->mode = 0; /* Must never be set by userspace */ flags &= ~FMODE_NONOTIFY; -- cgit v1.2.3 From 62b259d8b3ea9d4a73108fc599e40c863ec25ae6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 15 Aug 2012 13:01:24 +0200 Subject: vfs: atomic_open(): fix create mode usage Don't mask S_ISREG off the create mode before passing to ->atomic_open(). Other methods (->create, ->mknod) also get the complete file mode and filesystems expect it. Reported-by: Steve Reported-by: Richard W.M. Jones Signed-off-by: Miklos Szeredi Tested-by: Richard W.M. Jones --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 1b464390dde8..5bac1bb6e585 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2414,7 +2414,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, goto out; } - mode = op->mode & S_IALLUGO; + mode = op->mode; if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) mode &= ~current_umask(); -- cgit v1.2.3 From 38227f78a5020b3100cbb0406c89807563b10dae Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 15 Aug 2012 13:01:24 +0200 Subject: vfs: pass right create mode to may_o_create() Pass the umask-ed create mode to may_o_create() instead of the original one. Signed-off-by: Miklos Szeredi Tested-by: Richard W.M. Jones --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 5bac1bb6e585..26c28ec4f4af 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2452,7 +2452,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, } if (open_flag & O_CREAT) { - error = may_o_create(&nd->path, dentry, op->mode); + error = may_o_create(&nd->path, dentry, mode); if (error) { create_error = error; if (open_flag & O_EXCL) -- cgit v1.2.3 From af109bca94a8a223c4632a4ff769b3419fe7ed8c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 15 Aug 2012 13:01:24 +0200 Subject: fuse: check create mode in atomic open Verify that the VFS is passing us a complete create mode with the S_IFREG to atomic open. Reported-by: Steve Reported-by: Richard W.M. Jones Signed-off-by: Miklos Szeredi Tested-by: Richard W.M. Jones --- fs/fuse/dir.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8964cf3999b2..324bc0850534 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -383,6 +383,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct fuse_entry_out outentry; struct fuse_file *ff; + /* Userspace expects S_IFREG in create mode */ + BUG_ON((mode & S_IFMT) != S_IFREG); + forget = fuse_alloc_forget(); err = -ENOMEM; if (!forget) -- cgit v1.2.3 From 2e84f2641ea91a730642ead558a4ee3bd52310c9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 15 Aug 2012 13:50:27 +0200 Subject: jbd: don't write superblock when unmounting an ro filesystem This sequence: results in an IO error when unmounting the RO filesystem. The bug was introduced by: commit 9754e39c7bc51328f145e933bfb0df47cd67b6e9 Author: Jan Kara Date: Sat Apr 7 12:33:03 2012 +0200 jbd: Split updating of journal superblock and marking journal empty which lost some of the magic in journal_update_superblock() which used to test for a journal with no outstanding transactions. This is a port of a jbd2 fix by Eric Sandeen. CC: # 3.4.x Signed-off-by: Jan Kara --- fs/jbd/journal.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 09357508ec9a..a2862339323b 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1113,6 +1113,11 @@ static void mark_journal_empty(journal_t *journal) BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); spin_lock(&journal->j_state_lock); + /* Is it already empty? */ + if (sb->s_start == 0) { + spin_unlock(&journal->j_state_lock); + return; + } jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n", journal->j_tail_sequence); -- cgit v1.2.3 From 68766a2edcd5cd744262a70a2f67a320ac944760 Mon Sep 17 00:00:00 2001 From: Nikola Pajkovsky Date: Wed, 15 Aug 2012 00:38:08 +0200 Subject: udf: fix retun value on error path in udf_load_logicalvol In case we detect a problem and bail out, we fail to set "ret" to a nonzero value, and udf_load_logicalvol will mistakenly report success. Signed-off-by: Nikola Pajkovsky Signed-off-by: Jan Kara --- fs/udf/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/udf/super.c b/fs/udf/super.c index 9f55f7981b7d..18fc038a438d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1344,6 +1344,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, udf_err(sb, "error loading logical volume descriptor: " "Partition table too long (%u > %lu)\n", table_len, sb->s_blocksize - sizeof(*lvd)); + ret = 1; goto out_bh; } @@ -1388,8 +1389,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) { if (udf_load_sparable_map(sb, map, - (struct sparablePartitionMap *)gpm) < 0) + (struct sparablePartitionMap *)gpm) < 0) { + ret = 1; goto out_bh; + } } else if (!strncmp(upm2->partIdent.ident, UDF_ID_METADATA, strlen(UDF_ID_METADATA))) { -- cgit v1.2.3 From 62b2ce964bb901f00a480104bd35a2e1f8d2cf58 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 15 Aug 2012 13:30:12 -0700 Subject: vfs: fix propagation of atomic_open create error on negative dentry If ->atomic_open() returns -ENOENT, we take care to return the create error (e.g., EACCES), if any. Do the same when ->atomic_open() returns 1 and provides a negative dentry. This fixes a regression where an unprivileged open O_CREAT fails with ENOENT instead of EACCES, introduced with the new atomic_open code. It is tested by the open/08.t test in the pjd posix test suite, and was observed on top of fuse (backed by ceph-fuse). Signed-off-by: Sage Weil Signed-off-by: Miklos Szeredi --- fs/namei.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 26c28ec4f4af..db76b866a097 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2489,6 +2489,10 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, dput(dentry); dentry = file->f_path.dentry; } + if (create_error && dentry->d_inode == NULL) { + error = create_error; + goto out; + } goto looked_up; } -- cgit v1.2.3 From a76cccbeef9e91b5f799e8853acac1ed1fc833cb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 31 Jul 2012 14:55:51 +1000 Subject: xfs: fix uninitialised variable in xfs_rtbuf_get() Results in this assert failure in generic/090: XFS: Assertion failed: *nmap >= 1, file: fs/xfs/xfs_bmap.c, line: 4363 ..... Call Trace: [] xfs_bmapi_read+0x6b/0x370 [] xfs_rtbuf_get+0x42/0x130 [] xfs_rtget_summary+0x89/0x120 [] xfs_rtallocate_extent_size+0xce/0x340 [] xfs_rtallocate_extent+0x240/0x290 [] xfs_bmap_rtalloc+0x1ba/0x340 [] xfs_bmap_alloc+0x35/0x40 [] xfs_bmapi_allocate+0xf1/0x350 [] xfs_bmapi_write+0x66e/0xa60 [] xfs_iomap_write_direct+0x22a/0x3f0 [] __xfs_get_blocks+0x38b/0x5d0 [] xfs_get_blocks_direct+0x14/0x20 [] do_blockdev_direct_IO+0xf71/0x1eb0 [] __blockdev_direct_IO+0x55/0x60 [] xfs_vm_direct_IO+0x11a/0x1e0 [] generic_file_direct_write+0xd7/0x1b0 [] xfs_file_dio_aio_write+0x13c/0x320 [] xfs_file_aio_write+0x1c2/0x1d0 [] do_sync_write+0xa7/0xe0 [] vfs_write+0xa8/0x160 [] sys_pwrite64+0x92/0xb0 [] system_call_fastpath+0x16/0x1b Signed-off-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_rtalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 92d4331cd4f1..ca28a4ba4b54 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -857,7 +857,7 @@ xfs_rtbuf_get( xfs_buf_t *bp; /* block buffer, result */ xfs_inode_t *ip; /* bitmap or summary inode */ xfs_bmbt_irec_t map; - int nmap; + int nmap = 1; int error; /* error value */ ip = issum ? mp->m_rsumip : mp->m_rbmip; -- cgit v1.2.3 From 1ed845df60f3f02d4b7cd9fcad79ccb69c289f5c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 1 Aug 2012 09:56:49 -0500 Subject: xfs: kill struct declarations in xfs_mount.h I noticed that "struct xfs_mount_args" was still declared in "fs/xfs/xfs_mount.h". That struct doesn't even exist any more (and is obviously not referenced elsewhere in that header file). While in there, delete four other unneeded struct declarations in that file. Doing so highlights that "fs/xfs/xfs_trace.h" was relying indirectly on "xfs_mount.h" to be #included in order to declare "struct xfs_bmbt_irec", so add that declaration to resolve that issue. Signed-off-by: Alex Elder Signed-off-by: Ben Myers --- fs/xfs/xfs_mount.h | 5 ----- fs/xfs/xfs_trace.h | 1 + 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 05a05a7b6119..deee09e534dc 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -54,12 +54,7 @@ typedef struct xfs_trans_reservations { #include "xfs_sync.h" struct xlog; -struct xfs_mount_args; struct xfs_inode; -struct xfs_bmbt_irec; -struct xfs_bmap_free; -struct xfs_extdelta; -struct xfs_swapext; struct xfs_mru_cache; struct xfs_nameops; struct xfs_ail; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index e5795dd6013a..7d36ccf57f93 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -37,6 +37,7 @@ struct xlog_recover; struct xlog_recover_item; struct xfs_buf_log_format; struct xfs_inode_log_format; +struct xfs_bmbt_irec; DECLARE_EVENT_CLASS(xfs_attr_list_class, TP_PROTO(struct xfs_attr_list_context *ctx), -- cgit v1.2.3 From 3cd52ab68b7f17eddbff46c1f8e5a105cd901f8e Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Thu, 9 Aug 2012 15:40:39 -0700 Subject: debugfs: make __create_file static It's only used locally, no need to pollute global namespace. Signed-off-by: Chris Wright Cc: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 4733eab34a23..2c9fafbe8425 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -291,9 +291,9 @@ static struct file_system_type debug_fs_type = { .kill_sb = kill_litter_super, }; -struct dentry *__create_file(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops) +static struct dentry *__create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) { struct dentry *dentry = NULL; int error; -- cgit v1.2.3 From 1ae811ee27912a0521e4b92dc9a1850c0243a247 Mon Sep 17 00:00:00 2001 From: "bjschuma@gmail.com" Date: Wed, 8 Aug 2012 13:57:06 -0400 Subject: NFS: Fix a regression when loading the NFS v4 module Some systems have a modprobe.d/nfs.conf file that sets an nfs4 alias pointing to nfs.ko, rather than nfs4.ko. This can prevent the v4 module from loading on mount, since the kernel sees that something named "nfs4" has already been loaded. To work around this, I've renamed the modules to "nfsv2.ko" "nfsv3.ko" and "nfsv4.ko". I also had to move the nfs4_fs_type back to nfs.ko to ensure that `mount -t nfs4` still works. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 18 +++++++++--------- fs/nfs/client.c | 2 +- fs/nfs/nfs4_fs.h | 3 +++ fs/nfs/nfs4super.c | 15 --------------- fs/nfs/super.c | 37 ++++++++++++++++++++++++++++++++++++- 5 files changed, 49 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 8bf3a3f6925a..b7db60897f91 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -12,19 +12,19 @@ nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o -obj-$(CONFIG_NFS_V2) += nfs2.o -nfs2-y := nfs2super.o proc.o nfs2xdr.o +obj-$(CONFIG_NFS_V2) += nfsv2.o +nfsv2-y := nfs2super.o proc.o nfs2xdr.o -obj-$(CONFIG_NFS_V3) += nfs3.o -nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o -nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o +obj-$(CONFIG_NFS_V3) += nfsv3.o +nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o +nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o -obj-$(CONFIG_NFS_V4) += nfs4.o -nfs4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ +obj-$(CONFIG_NFS_V4) += nfsv4.o +nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o nfs4getroot.o nfs4client.o -nfs4-$(CONFIG_SYSCTL) += nfs4sysctl.o -nfs4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o +nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o +nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 9fc0d9dfc91b..99694442b93f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -105,7 +105,7 @@ struct nfs_subversion *get_nfs_version(unsigned int version) if (IS_ERR(nfs)) { mutex_lock(&nfs_version_mutex); - request_module("nfs%d", version); + request_module("nfsv%d", version); nfs = find_nfs_version(version); mutex_unlock(&nfs_version_mutex); } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3b950dd81e81..da0618aeeadb 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -205,6 +205,9 @@ extern const struct dentry_operations nfs4_dentry_operations; int nfs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t, int *); +/* super.c */ +extern struct file_system_type nfs4_fs_type; + /* nfs4namespace.c */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 12a31a9dbcdd..bd61221ad2c5 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -23,14 +23,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static struct file_system_type nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs_fs_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - static struct file_system_type nfs4_remote_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -344,14 +336,8 @@ static int __init init_nfs_v4(void) if (err) goto out1; - err = register_filesystem(&nfs4_fs_type); - if (err < 0) - goto out2; - register_nfs_version(&nfs_v4); return 0; -out2: - nfs4_unregister_sysctl(); out1: nfs_idmap_quit(); out: @@ -361,7 +347,6 @@ out: static void __exit exit_nfs_v4(void) { unregister_nfs_version(&nfs_v4); - unregister_filesystem(&nfs4_fs_type); nfs4_unregister_sysctl(); nfs_idmap_quit(); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ac6a3c55dce4..c4a15c55519c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -319,6 +319,34 @@ EXPORT_SYMBOL_GPL(nfs_sops); static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); static int nfs4_validate_mount_data(void *options, struct nfs_parsed_mount_data *args, const char *dev_name); + +struct file_system_type nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs_fs_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; +EXPORT_SYMBOL_GPL(nfs4_fs_type); + +static int __init register_nfs4_fs(void) +{ + return register_filesystem(&nfs4_fs_type); +} + +static void unregister_nfs4_fs(void) +{ + unregister_filesystem(&nfs4_fs_type); +} +#else +static int __init register_nfs4_fs(void) +{ + return 0; +} + +static void unregister_nfs4_fs(void) +{ +} #endif static struct shrinker acl_shrinker = { @@ -337,12 +365,18 @@ int __init register_nfs_fs(void) if (ret < 0) goto error_0; - ret = nfs_register_sysctl(); + ret = register_nfs4_fs(); if (ret < 0) goto error_1; + + ret = nfs_register_sysctl(); + if (ret < 0) + goto error_2; register_shrinker(&acl_shrinker); return 0; +error_2: + unregister_nfs4_fs(); error_1: unregister_filesystem(&nfs_fs_type); error_0: @@ -356,6 +390,7 @@ void __exit unregister_nfs_fs(void) { unregister_shrinker(&acl_shrinker); nfs_unregister_sysctl(); + unregister_nfs4_fs(); unregister_filesystem(&nfs_fs_type); } -- cgit v1.2.3 From 425e776d93a7a5070b77d4f458a5bab0f924652c Mon Sep 17 00:00:00 2001 From: "bjschuma@gmail.com" Date: Wed, 8 Aug 2012 13:57:10 -0400 Subject: NFS: Alias the nfs module to nfs4 This allows distros to remove the line from their modprobe configuration. Signed-off-by: Bryan Schumaker Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c4a15c55519c..239aff7338eb 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2680,4 +2680,6 @@ MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, "Send implementation ID with NFSv4.1 exchange_id"); +MODULE_ALIAS("nfs4"); + #endif /* CONFIG_NFS_V4 */ -- cgit v1.2.3 From 519d3959e30a98f8e135e7a16647c10af5ad63d5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2012 17:30:10 -0400 Subject: NFSv4: Fix pointer arithmetic in decode_getacl Resetting the cursor xdr->p to a previous value is not a safe practice: if the xdr_stream has crossed out of the initial iovec, then a bunch of other fields would need to be reset too. Fix this issue by using xdr_enter_page() so that the buffer gets page aligned at the bitmap _before_ we decode it. Also fix the confusion of the ACL length with the page buffer length by not adding the base offset to the ACL length... Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4xdr.c | 21 +++++++-------------- 2 files changed, 8 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c77d296bdaa6..286ab7078413 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3819,7 +3819,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu if (ret) goto out_free; - acl_len = res.acl_len - res.acl_data_offset; + acl_len = res.acl_len; if (acl_len > args.acl_len) nfs4_write_cached_acl(inode, NULL, 0, acl_len); else diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ca13483edd60..54d3f5a9faa6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5049,18 +5049,14 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, uint32_t attrlen, bitmap[3] = {0}; int status; - size_t page_len = xdr->buf->page_len; res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto out; + xdr_enter_page(xdr, xdr->buf->page_len); + bm_p = xdr->p; - res->acl_data_offset = be32_to_cpup(bm_p) + 2; - res->acl_data_offset <<= 2; - /* Check if the acl data starts beyond the allocated buffer */ - if (res->acl_data_offset > page_len) - return -ERANGE; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; @@ -5074,23 +5070,20 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of * variable length bitmaps.*/ - xdr->p = bm_p; + res->acl_data_offset = (xdr->p - bm_p) << 2; /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ - attrlen += res->acl_data_offset; - if (attrlen > page_len) { + res->acl_len = attrlen; + if (attrlen + res->acl_data_offset > xdr->buf->page_len) { if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { /* getxattr interface called with a NULL buf */ - res->acl_len = attrlen; goto out; } - dprintk("NFS: acl reply: attrlen %u > page_len %zu\n", - attrlen, page_len); + dprintk("NFS: acl reply: attrlen %u > page_len %u\n", + attrlen, xdr->buf->page_len); return -EINVAL; } - xdr_read_pages(xdr, attrlen); - res->acl_len = attrlen; } else status = -EOPNOTSUPP; -- cgit v1.2.3 From b291f1b1c86aa0c7bc3df2994e6a1a4e53f1fde0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2012 18:30:41 -0400 Subject: NFSv4: Fix the acl cache size calculation Currently, we do not take into account the size of the 16 byte struct nfs4_cached_acl header, when deciding whether or not we should cache the acl data. Consequently, we will end up allocating an 8k buffer in order to fit a maximum size 4k acl. This patch adjusts the calculation so that we limit the cache size to 4k for the acl header+data. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 286ab7078413..635274140b18 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3737,9 +3737,10 @@ out: static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len) { struct nfs4_cached_acl *acl; + size_t buflen = sizeof(*acl) + acl_len; - if (pages && acl_len <= PAGE_SIZE) { - acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); + if (pages && buflen <= PAGE_SIZE) { + acl = kmalloc(buflen, GFP_KERNEL); if (acl == NULL) goto out; acl->cached = 1; -- cgit v1.2.3 From cff298c721099c9ac4cea7196a37097ba2847946 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2012 17:14:17 -0400 Subject: NFSv4: Don't use private xdr_stream fields in decode_getacl Instead of using the private field xdr->p from struct xdr_stream, use the public xdr_stream_pos(). Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 54d3f5a9faa6..1bfbd67c556d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5045,10 +5045,10 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_getaclres *res) { unsigned int savep; - __be32 *bm_p; uint32_t attrlen, bitmap[3] = {0}; int status; + unsigned int pg_offset; res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -5056,7 +5056,8 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, xdr_enter_page(xdr, xdr->buf->page_len); - bm_p = xdr->p; + /* Calculate the offset of the page data */ + pg_offset = xdr->buf->head[0].iov_len; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; @@ -5070,18 +5071,18 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of * variable length bitmaps.*/ - res->acl_data_offset = (xdr->p - bm_p) << 2; + res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset; /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ res->acl_len = attrlen; - if (attrlen + res->acl_data_offset > xdr->buf->page_len) { + if (attrlen > (xdr->nwords << 2)) { if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { /* getxattr interface called with a NULL buf */ goto out; } dprintk("NFS: acl reply: attrlen %u > page_len %u\n", - attrlen, xdr->buf->page_len); + attrlen, xdr->nwords << 2); return -EINVAL; } } else -- cgit v1.2.3 From c5066945b7ea346a11424dbeb7830b7d7d00c206 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 9 Aug 2012 14:05:49 -0400 Subject: NFS: Clear key construction data if the idmap upcall fails idmap_pipe_downcall already clears this field if the upcall succeeds, but if it fails (rpc.idmapd isn't running) the field will still be set on the next call triggering a BUG_ON(). This patch tries to handle all possible ways that the upcall could fail and clear the idmap key data for each one. Signed-off-by: Bryan Schumaker Tested-by: William Dauchy Cc: stable@vger.kernel.org [>= 3.4] Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 56 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b701358c39c3..6703c73307a5 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -61,6 +61,12 @@ struct idmap { struct mutex idmap_mutex; }; +struct idmap_legacy_upcalldata { + struct rpc_pipe_msg pipe_msg; + struct idmap_msg idmap_msg; + struct idmap *idmap; +}; + /** * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields * @fattr: fully initialised struct nfs_fattr @@ -324,6 +330,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, name, namelen, type, data, data_size, idmap); + idmap->idmap_key_cons = NULL; mutex_unlock(&idmap->idmap_mutex); } return ret; @@ -380,11 +387,13 @@ static const match_table_t nfs_idmap_tokens = { static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); static ssize_t idmap_pipe_downcall(struct file *, const char __user *, size_t); +static void idmap_release_pipe(struct inode *); static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); static const struct rpc_pipe_ops idmap_upcall_ops = { .upcall = rpc_pipe_generic_upcall, .downcall = idmap_pipe_downcall, + .release_pipe = idmap_release_pipe, .destroy_msg = idmap_pipe_destroy_msg, }; @@ -616,7 +625,8 @@ void nfs_idmap_quit(void) nfs_idmap_quit_keyring(); } -static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im, +static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap, + struct idmap_msg *im, struct rpc_pipe_msg *msg) { substring_t substr; @@ -659,6 +669,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, const char *op, void *aux) { + struct idmap_legacy_upcalldata *data; struct rpc_pipe_msg *msg; struct idmap_msg *im; struct idmap *idmap = (struct idmap *)aux; @@ -666,15 +677,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, int ret = -ENOMEM; /* msg and im are freed in idmap_pipe_destroy_msg */ - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - goto out0; - - im = kmalloc(sizeof(*im), GFP_KERNEL); - if (!im) + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) goto out1; - ret = nfs_idmap_prepare_message(key->description, im, msg); + msg = &data->pipe_msg; + im = &data->idmap_msg; + data->idmap = idmap; + + ret = nfs_idmap_prepare_message(key->description, idmap, im, msg); if (ret < 0) goto out2; @@ -683,15 +694,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, ret = rpc_queue_upcall(idmap->idmap_pipe, msg); if (ret < 0) - goto out2; + goto out3; return ret; +out3: + idmap->idmap_key_cons = NULL; out2: - kfree(im); + kfree(data); out1: - kfree(msg); -out0: complete_request_key(cons, ret); return ret; } @@ -775,9 +786,26 @@ out_incomplete: static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) { + struct idmap_legacy_upcalldata *data = container_of(msg, + struct idmap_legacy_upcalldata, + pipe_msg); + struct idmap *idmap = data->idmap; + struct key_construction *cons; + if (msg->errno) { + cons = ACCESS_ONCE(idmap->idmap_key_cons); + idmap->idmap_key_cons = NULL; + complete_request_key(cons, msg->errno); + } /* Free memory allocated in nfs_idmap_legacy_upcall() */ - kfree(msg->data); - kfree(msg); + kfree(data); +} + +static void +idmap_release_pipe(struct inode *inode) +{ + struct rpc_inode *rpci = RPC_I(inode); + struct idmap *idmap = (struct idmap *)rpci->private; + idmap->idmap_key_cons = NULL; } int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) -- cgit v1.2.3 From 12dfd080556124088ed61a292184947711b46cbe Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 9 Aug 2012 14:05:50 -0400 Subject: NFS: return -ENOKEY when the upcall fails to map the name This allows the normal error-paths to handle the error, rather than making a special call to complete_request_key() just for this instance. Signed-off-by: Bryan Schumaker Tested-by: William Dauchy Cc: stable@vger.kernel.org [>= 3.4] Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 6703c73307a5..a850079467d8 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -760,9 +760,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) } if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { - ret = mlen; - complete_request_key(cons, -ENOKEY); - goto out_incomplete; + ret = -ENOKEY; + goto out; } namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); @@ -779,7 +778,6 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) out: complete_request_key(cons, ret); -out_incomplete: return ret; } -- cgit v1.2.3 From c4982110ae93d7575503feb81d15e93c0c5f393c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Aug 2012 02:02:02 -0400 Subject: xfs: unlock the AGI buffer when looping in xfs_dialloc Also update some commens in the area to make the code easier to read. Signed-off-by: Christoph Hellwig Reviewed-by: Mark Tinguely Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_ialloc.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 21e37b55f7e5..5aceb3f8ecd6 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -962,23 +962,22 @@ xfs_dialloc( if (!pag->pagi_freecount && !okalloc) goto nextag; + /* + * Then read in the AGI buffer and recheck with the AGI buffer + * lock held. + */ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); if (error) goto out_error; - /* - * Once the AGI has been read in we have to recheck - * pagi_freecount with the AGI buffer lock held. - */ if (pag->pagi_freecount) { xfs_perag_put(pag); goto out_alloc; } - if (!okalloc) { - xfs_trans_brelse(tp, agbp); - goto nextag; - } + if (!okalloc) + goto nextag_relse_buffer; + error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); if (error) { @@ -1007,6 +1006,8 @@ xfs_dialloc( return 0; } +nextag_relse_buffer: + xfs_trans_brelse(tp, agbp); nextag: xfs_perag_put(pag); if (++agno == mp->m_sb.sb_agcount) -- cgit v1.2.3 From 643bfc061c47e9c7661324a09fb0a0bc6601e5d6 Mon Sep 17 00:00:00 2001 From: Tomas Racek Date: Tue, 14 Aug 2012 10:35:04 +0200 Subject: xfs: check for possible overflow in xfs_ioc_trim If range.start or range.minlen is bigger than filesystem size, return invalid value error. This fixes possible overflow in BTOBB macro when passed value was nearly ULLONG_MAX. Signed-off-by: Tomas Racek Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_discard.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index f9c3fe304a17..69cf4fcde03e 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -179,12 +179,14 @@ xfs_ioc_trim( * used by the fstrim application. In the end it really doesn't * matter as trimming blocks is an advisory interface. */ + if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || + range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp))) + return -XFS_ERROR(EINVAL); + start = BTOBB(range.start); end = start + BTOBBT(range.len) - 1; minlen = BTOBB(max_t(u64, granularity, range.minlen)); - if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks) - return -XFS_ERROR(EINVAL); if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1; -- cgit v1.2.3 From 7a4c5de27efa4c2ecca87af0a3deea63446367e2 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 10 Aug 2012 13:57:52 -0400 Subject: ext4: don't call ext4_error while block group is locked While in ext4_validate_block_bitmap(), if an block allocation bitmap is found to be invalid, we call ext4_error() while the block group is still locked. This causes ext4_commit_super() to call a function which might sleep while in an atomic context. There's no need to keep the block group locked at this point, so hoist the ext4_error() call up to ext4_validate_block_bitmap() and release the block group spinlock before calling ext4_error(). The reported stack trace can be found at: http://article.gmane.org/gmane.comp.file-systems.ext4/33731 Reported-by: Dave Jones Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/balloc.c | 62 +++++++++++++++++++++++++++++++++----------------------- fs/ext4/bitmap.c | 1 - 2 files changed, 37 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d23b31ca9d7a..1b5089067d01 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -280,14 +280,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, return desc; } -static int ext4_valid_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - unsigned int block_group, - struct buffer_head *bh) +/* + * Return the block number which was discovered to be invalid, or 0 if + * the block bitmap is valid. + */ +static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, + struct ext4_group_desc *desc, + unsigned int block_group, + struct buffer_head *bh) { ext4_grpblk_t offset; ext4_grpblk_t next_zero_bit; - ext4_fsblk_t bitmap_blk; + ext4_fsblk_t blk; ext4_fsblk_t group_first_block; if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { @@ -297,37 +301,33 @@ static int ext4_valid_block_bitmap(struct super_block *sb, * or it has to also read the block group where the bitmaps * are located to verify they are set. */ - return 1; + return 0; } group_first_block = ext4_group_first_block_no(sb, block_group); /* check whether block bitmap block number is set */ - bitmap_blk = ext4_block_bitmap(sb, desc); - offset = bitmap_blk - group_first_block; + blk = ext4_block_bitmap(sb, desc); + offset = blk - group_first_block; if (!ext4_test_bit(offset, bh->b_data)) /* bad block bitmap */ - goto err_out; + return blk; /* check whether the inode bitmap block number is set */ - bitmap_blk = ext4_inode_bitmap(sb, desc); - offset = bitmap_blk - group_first_block; + blk = ext4_inode_bitmap(sb, desc); + offset = blk - group_first_block; if (!ext4_test_bit(offset, bh->b_data)) /* bad block bitmap */ - goto err_out; + return blk; /* check whether the inode table block number is set */ - bitmap_blk = ext4_inode_table(sb, desc); - offset = bitmap_blk - group_first_block; + blk = ext4_inode_table(sb, desc); + offset = blk - group_first_block; next_zero_bit = ext4_find_next_zero_bit(bh->b_data, offset + EXT4_SB(sb)->s_itb_per_group, offset); - if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group) - /* good bitmap for inode tables */ - return 1; - -err_out: - ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu", - block_group, bitmap_blk); + if (next_zero_bit < offset + EXT4_SB(sb)->s_itb_per_group) + /* bad bitmap for inode tables */ + return blk; return 0; } @@ -336,14 +336,26 @@ void ext4_validate_block_bitmap(struct super_block *sb, unsigned int block_group, struct buffer_head *bh) { + ext4_fsblk_t blk; + if (buffer_verified(bh)) return; ext4_lock_group(sb, block_group); - if (ext4_valid_block_bitmap(sb, desc, block_group, bh) && - ext4_block_bitmap_csum_verify(sb, block_group, desc, bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8)) - set_buffer_verified(bh); + blk = ext4_valid_block_bitmap(sb, desc, block_group, bh); + if (unlikely(blk != 0)) { + ext4_unlock_group(sb, block_group); + ext4_error(sb, "bg %u: block %llu: invalid block bitmap", + block_group, blk); + return; + } + if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, + desc, bh, EXT4_BLOCKS_PER_GROUP(sb) / 8))) { + ext4_unlock_group(sb, block_group); + ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); + return; + } + set_buffer_verified(bh); ext4_unlock_group(sb, block_group); } diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index f8716eab9995..5c2d1813ebe9 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -79,7 +79,6 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, if (provided == calculated) return 1; - ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group); return 0; } -- cgit v1.2.3 From 0548bbb85337e532ca2ed697c3e9b227ff2ed4b4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 16 Aug 2012 11:59:04 -0400 Subject: ext4: fix long mount times on very big file systems Commit 8aeb00ff85a: "ext4: fix overhead calculation used by ext4_statfs()" introduced a O(n**2) calculation which makes very large file systems take forever to mount. Fix this with an optimization for non-bigalloc file systems. (For bigalloc file systems the overhead needs to be set in the the superblock.) Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/super.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 56bcaec9149c..598498904035 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3120,6 +3120,10 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, ext4_group_t i, ngroups = ext4_get_groups_count(sb); int s, j, count = 0; + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) + return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + + sbi->s_itb_per_group + 2); + first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + (grp * EXT4_BLOCKS_PER_GROUP(sb)); last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; -- cgit v1.2.3 From 89a4e48f8479f8145eca9698f39fe188c982212f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 17 Aug 2012 08:54:52 -0400 Subject: ext4: fix kernel BUG on large-scale rm -rf commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 968dee7722: "ext4: fix hole punch failure when depth is greater than 0" introduced a regression in v3.5.1/v3.6-rc1 which caused kernel crashes when users ran run "rm -rf" on large directory hierarchy on ext4 filesystems on RAID devices: BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 Process rm (pid: 18229, threadinfo ffff8801276bc000, task ffff880123631710) Call Trace: [] ? __ext4_handle_dirty_metadata+0x83/0x110 [] ext4_ext_truncate+0x193/0x1d0 [] ? ext4_mark_inode_dirty+0x7f/0x1f0 [] ext4_truncate+0xf5/0x100 [] ext4_evict_inode+0x461/0x490 [] evict+0xa2/0x1a0 [] iput+0x103/0x1f0 [] do_unlinkat+0x154/0x1c0 [] ? sys_newfstatat+0x2a/0x40 [] sys_unlinkat+0x1b/0x50 [] system_call_fastpath+0x16/0x1b Code: 8b 4d 20 0f b7 41 02 48 8d 04 40 48 8d 04 81 49 89 45 18 0f b7 49 02 48 83 c1 01 49 89 4d 00 e9 ae f8 ff ff 0f 1f 00 49 8b 45 28 <48> 8b 40 28 49 89 45 20 e9 85 f8 ff ff 0f 1f 80 00 00 00 RIP [] ext4_ext_remove_space+0xa34/0xdf0 This could be reproduced as follows: The problem in commit 968dee7722 was that caused the variable 'i' to be left uninitialized if the truncate required more space than was available in the journal. This resulted in the function ext4_ext_truncate_extend_restart() returning -EAGAIN, which caused ext4_ext_remove_space() to restart the truncate operation after starting a new jbd2 handle. Reported-by: Maciej Żenczykowski Reported-by: Marti Raudsepp Tested-by: Fengguang Wu Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index cd0c7ed06772..aabbb3f53683 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2662,6 +2662,7 @@ cont: } path[0].p_depth = depth; path[0].p_hdr = ext_inode_hdr(inode); + i = 0; if (ext4_ext_check(inode, path[0].p_hdr, depth)) { err = -EIO; -- cgit v1.2.3 From ecb94f5fdf4b72547fca022421a9dca1672bddd4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 17 Aug 2012 09:44:17 -0400 Subject: ext4: collapse a single extent tree block into the inode if possible If an inode has more than 4 extents, but then later some of the extents are merged together, we can optimize the file system by moving the extents up into the inode, and discarding the extent tree block. This is important, because if there are a large number of inodes with an external extent tree blocks where the contents could fit in the inode, this can significantly increase the fsck time of the file system. Google-Bug-Id: 6801242 Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 72 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index aabbb3f53683..e8755c21f4b9 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1655,17 +1655,61 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, return merge_done; } +/* + * This function does a very simple check to see if we can collapse + * an extent tree with a single extent tree leaf block into the inode. + */ +static void ext4_ext_try_to_merge_up(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path) +{ + size_t s; + unsigned max_root = ext4_ext_space_root(inode, 0); + ext4_fsblk_t blk; + + if ((path[0].p_depth != 1) || + (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) || + (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root)) + return; + + /* + * We need to modify the block allocation bitmap and the block + * group descriptor to release the extent tree block. If we + * can't get the journal credits, give up. + */ + if (ext4_journal_extend(handle, 2)) + return; + + /* + * Copy the extent data up to the inode + */ + blk = ext4_idx_pblock(path[0].p_idx); + s = le16_to_cpu(path[1].p_hdr->eh_entries) * + sizeof(struct ext4_extent_idx); + s += sizeof(struct ext4_extent_header); + + memcpy(path[0].p_hdr, path[1].p_hdr, s); + path[0].p_depth = 0; + path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + + (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr)); + path[0].p_hdr->eh_max = cpu_to_le16(max_root); + + brelse(path[1].p_bh); + ext4_free_blocks(handle, inode, NULL, blk, 1, + EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); +} + /* * This function tries to merge the @ex extent to neighbours in the tree. * return 1 if merge left else 0. */ -static int ext4_ext_try_to_merge(struct inode *inode, +static void ext4_ext_try_to_merge(handle_t *handle, + struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *ex) { struct ext4_extent_header *eh; unsigned int depth; int merge_done = 0; - int ret = 0; depth = ext_depth(inode); BUG_ON(path[depth].p_hdr == NULL); @@ -1675,9 +1719,9 @@ static int ext4_ext_try_to_merge(struct inode *inode, merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); if (!merge_done) - ret = ext4_ext_try_to_merge_right(inode, path, ex); + (void) ext4_ext_try_to_merge_right(inode, path, ex); - return ret; + ext4_ext_try_to_merge_up(handle, inode, path); } /* @@ -1893,7 +1937,7 @@ has_space: merge: /* try to merge extents */ if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) - ext4_ext_try_to_merge(inode, path, nearex); + ext4_ext_try_to_merge(handle, inode, path, nearex); /* time to correct all indexes above */ @@ -1901,7 +1945,7 @@ merge: if (err) goto cleanup; - err = ext4_ext_dirty(handle, inode, path + depth); + err = ext4_ext_dirty(handle, inode, path + path->p_depth); cleanup: if (npath) { @@ -2924,9 +2968,9 @@ static int ext4_split_extent_at(handle_t *handle, ext4_ext_mark_initialized(ex); if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) - ext4_ext_try_to_merge(inode, path, ex); + ext4_ext_try_to_merge(handle, inode, path, ex); - err = ext4_ext_dirty(handle, inode, path + depth); + err = ext4_ext_dirty(handle, inode, path + path->p_depth); goto out; } @@ -2958,8 +3002,8 @@ static int ext4_split_extent_at(handle_t *handle, goto fix_extent_len; /* update the extent length and mark as initialized */ ex->ee_len = cpu_to_le16(ee_len); - ext4_ext_try_to_merge(inode, path, ex); - err = ext4_ext_dirty(handle, inode, path + depth); + ext4_ext_try_to_merge(handle, inode, path, ex); + err = ext4_ext_dirty(handle, inode, path + path->p_depth); goto out; } else if (err) goto fix_extent_len; @@ -3191,8 +3235,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, if (err) goto out; ext4_ext_mark_initialized(ex); - ext4_ext_try_to_merge(inode, path, ex); - err = ext4_ext_dirty(handle, inode, path + depth); + ext4_ext_try_to_merge(handle, inode, path, ex); + err = ext4_ext_dirty(handle, inode, path + path->p_depth); goto out; } @@ -3333,10 +3377,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, /* note: ext4_ext_correct_indexes() isn't needed here because * borders are not changed */ - ext4_ext_try_to_merge(inode, path, ex); + ext4_ext_try_to_merge(handle, inode, path, ex); /* Mark modified extent as dirty */ - err = ext4_ext_dirty(handle, inode, path + depth); + err = ext4_ext_dirty(handle, inode, path + path->p_depth); out: ext4_ext_show_leaf(inode, path); return err; -- cgit v1.2.3 From 01fc48e8929e45e67527200017cff4e74e4ba054 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 17 Aug 2012 09:46:17 -0400 Subject: ext4: don't load the block bitmap for block groups which have no space Add a short circuit check to ext4_mb_group_group() so that we don't bother to load the block bitmap for a block group which does not have any space available. (Or which does not have enough space until we are in desperation mode, i.e., when cr == 3.) Resolves-bug: https://bugzilla.kernel.org/show_bug.cgi?id=45741 Reported-by: mirek@me.com Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8eae94771c45..3a57975b73cc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1862,6 +1862,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, BUG_ON(cr < 0 || cr >= 4); + free = grp->bb_free; + if (free == 0) + return 0; + if (cr <= 2 && free < ac->ac_g_ex.fe_len) + return 0; + /* We only do this if the grp has never been initialized */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { int ret = ext4_mb_init_group(ac->ac_sb, group); @@ -1869,10 +1875,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, return 0; } - free = grp->bb_free; fragments = grp->bb_fragments; - if (free == 0) - return 0; if (fragments == 0) return 0; -- cgit v1.2.3 From df981d03eeff7971ac7e6ff37000bfa702327ef1 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 17 Aug 2012 09:48:17 -0400 Subject: ext4: add max_dir_size_kb mount option Very large directories can cause significant performance problems, or perhaps even invoke the OOM killer, if the process is running in a highly constrained memory environment (whether it is VM's with a small amount of memory or in a small memory cgroup). So it is useful, in cloud server/data center environments, to be able to set a filesystem-wide cap on the maximum size of a directory, to ensure that directories never get larger than a sane size. We do this via a new mount option, max_dir_size_kb. If there is an attempt to grow the directory larger than max_dir_size_kb, the system call will return ENOSPC instead. Google-Bug-Id: 6863013 Signed-off-by: "Theodore Ts'o" --- Documentation/filesystems/ext4.txt | 10 ++++++++++ fs/ext4/ext4.h | 1 + fs/ext4/namei.c | 7 +++++++ fs/ext4/super.c | 7 +++++++ 4 files changed, 25 insertions(+) (limited to 'fs') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 1b7f9acbcbbe..104322bf378c 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -375,6 +375,16 @@ dioread_nolock locking. If the dioread_nolock option is specified Because of the restrictions this options comprises it is off by default (e.g. dioread_lock). +max_dir_size_kb=n This limits the size of directories so that any + attempt to expand them beyond the specified + limit in kilobytes will cause an ENOSPC error. + This is useful in memory constrained + environments, where a very large directory can + cause severe performance problems or even + provoke the Out Of Memory killer. (For example, + if there is only 512mb memory available, a 176mb + directory may seriously cramp the system's style.) + i_version Enable 64-bit inode version support. This option is off by default. diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c3411d4ce2da..7c0841ecde6c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1243,6 +1243,7 @@ struct ext4_sb_info { unsigned int s_mb_order2_reqs; unsigned int s_mb_group_prealloc; unsigned int s_max_writeback_mb_bump; + unsigned int s_max_dir_size_kb; /* where last allocation was done - for stream allocation */ unsigned long s_mb_last_group; unsigned long s_mb_last_start; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2a42cc04466f..7450ff01c3c4 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -55,6 +55,13 @@ static struct buffer_head *ext4_append(handle_t *handle, { struct buffer_head *bh; + if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && + ((inode->i_size >> 10) >= + EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) { + *err = -ENOSPC; + return NULL; + } + *block = inode->i_size >> inode->i_sb->s_blocksize_bits; bh = ext4_bread(handle, inode, *block, 1, err); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 598498904035..5a97e590692d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1230,6 +1230,7 @@ enum { Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_dioread_nolock, Opt_dioread_lock, Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, + Opt_max_dir_size_kb, }; static const match_table_t tokens = { @@ -1303,6 +1304,7 @@ static const match_table_t tokens = { {Opt_init_itable, "init_itable=%u"}, {Opt_init_itable, "init_itable"}, {Opt_noinit_itable, "noinit_itable"}, + {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, {Opt_removed, "check=none"}, /* mount option from ext2/3 */ {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ {Opt_removed, "reservation"}, /* mount option from ext2/3 */ @@ -1483,6 +1485,7 @@ static const struct mount_opts { {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, + {Opt_max_dir_size_kb, 0, MOPT_GTE0}, {Opt_err, 0, 0} }; @@ -1598,6 +1601,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, if (!args->from) arg = EXT4_DEF_LI_WAIT_MULT; sbi->s_li_wait_mult = arg; + } else if (token == Opt_max_dir_size_kb) { + sbi->s_max_dir_size_kb = arg; } else if (token == Opt_stripe) { sbi->s_stripe = arg; } else if (m->flags & MOPT_DATAJ) { @@ -1829,6 +1834,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); + if (nodefs || sbi->s_max_dir_size_kb) + SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb); ext4_show_quota_options(seq, sb); return 0; -- cgit v1.2.3 From 67a5da564f97f31c4054d358e00b34d7ee570da5 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Fri, 17 Aug 2012 09:54:17 -0400 Subject: ext4: make the zero-out chunk size tunable Currently in ext4 the length of zero-out chunk is set to 7 file system blocks. But if an inode has uninitailized extents from using fallocate to preallocate space, and the workload issues many random writes, this can cause a fragmented extent tree that will unnecessarily grow the extent tree. So create a new sysfs tunable, extent_max_zeroout_kb, which controls the maximum size where blocks will be zeroed out instead of creating a new uninitialized extent. The default of this has been sent to 32kb. CC: Zach Brown CC: Andreas Dilger Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" --- Documentation/ABI/testing/sysfs-fs-ext4 | 13 +++++++++++++ fs/ext4/ext4.h | 3 +++ fs/ext4/extents.c | 25 +++++++++++++------------ fs/ext4/super.c | 3 +++ 4 files changed, 32 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4 index f22ac0872ae8..c631253cf85c 100644 --- a/Documentation/ABI/testing/sysfs-fs-ext4 +++ b/Documentation/ABI/testing/sysfs-fs-ext4 @@ -96,3 +96,16 @@ Contact: "Theodore Ts'o" Description: The maximum number of megabytes the writeback code will try to write out before move on to another inode. + +What: /sys/fs/ext4//extent_max_zeroout_kb +Date: August 2012 +Contact: "Theodore Ts'o" +Description: + The maximum number of kilobytes which will be zeroed + out in preference to creating a new uninitialized + extent when manipulating an inode's extent tree. Note + that using a larger value will increase the + variability of time necessary to complete a random + write operation (since a 4k random write might turn + into a much larger write due to the zeroout + operation). diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 7c0841ecde6c..0df5ee102b61 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1271,6 +1271,9 @@ struct ext4_sb_info { unsigned long s_sectors_written_start; u64 s_kbytes_written; + /* the size of zero-out chunk */ + unsigned int s_extent_max_zeroout_kb; + unsigned int s_log_groups_per_flex; struct flex_groups *s_flex_groups; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e8755c21f4b9..2f082abf4992 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3085,7 +3085,6 @@ out: return err ? err : map->m_len; } -#define EXT4_EXT_ZERO_LEN 7 /* * This function is called by ext4_ext_map_blocks() if someone tries to write * to an uninitialized extent. It may result in splitting the uninitialized @@ -3111,13 +3110,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, struct ext4_map_blocks *map, struct ext4_ext_path *path) { + struct ext4_sb_info *sbi; struct ext4_extent_header *eh; struct ext4_map_blocks split_map; struct ext4_extent zero_ex; struct ext4_extent *ex; ext4_lblk_t ee_block, eof_block; unsigned int ee_len, depth; - int allocated; + int allocated, max_zeroout = 0; int err = 0; int split_flag = 0; @@ -3125,6 +3125,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, "block %llu, max_blocks %u\n", inode->i_ino, (unsigned long long)map->m_lblk, map->m_len); + sbi = EXT4_SB(inode->i_sb); eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map->m_len) @@ -3224,9 +3225,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, */ split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; - /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ - if (ee_len <= 2*EXT4_EXT_ZERO_LEN && - (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + if (EXT4_EXT_MAY_ZEROOUT & split_flag) + max_zeroout = sbi->s_extent_max_zeroout_kb >> + inode->i_sb->s_blocksize_bits; + + /* If extent is less than s_max_zeroout_kb, zeroout directly */ + if (max_zeroout && (ee_len <= max_zeroout)) { err = ext4_ext_zeroout(inode, ex); if (err) goto out; @@ -3250,9 +3254,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, split_map.m_lblk = map->m_lblk; split_map.m_len = map->m_len; - if (allocated > map->m_len) { - if (allocated <= EXT4_EXT_ZERO_LEN && - (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + if (max_zeroout && (allocated > map->m_len)) { + if (allocated <= max_zeroout) { /* case 3 */ zero_ex.ee_block = cpu_to_le32(map->m_lblk); @@ -3264,9 +3267,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, goto out; split_map.m_lblk = map->m_lblk; split_map.m_len = allocated; - } else if ((map->m_lblk - ee_block + map->m_len < - EXT4_EXT_ZERO_LEN) && - (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) { /* case 2 */ if (map->m_lblk != ee_block) { zero_ex.ee_block = ex->ee_block; @@ -3286,7 +3287,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, } allocated = ext4_split_extent(handle, inode, path, - &split_map, split_flag, 0); + &split_map, split_flag, 0); if (allocated < 0) err = allocated; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5a97e590692d..0423e2e7f615 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2541,6 +2541,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); +EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); static struct attribute *ext4_attrs[] = { @@ -2556,6 +2557,7 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(mb_stream_req), ATTR_LIST(mb_group_prealloc), ATTR_LIST(max_writeback_mb_bump), + ATTR_LIST(extent_max_zeroout_kb), ATTR_LIST(trigger_fs_error), NULL, }; @@ -3756,6 +3758,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_stripe = ext4_get_stripe_size(sbi); sbi->s_max_writeback_mb_bump = 128; + sbi->s_extent_max_zeroout_kb = 32; /* * set up enough so that it can read an inode -- cgit v1.2.3 From 316e4cfd0b0b4ce846fd0fbda2deebcffbd3e440 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 17 Aug 2012 09:56:17 -0400 Subject: jbd2: check return value of blkdev_issue_flush() blkdev_issue_flush() can fail; make sure the error gets properly propagated. This is a port of the equivalent jbd patch from commit 349ecd6a3c0e. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/recovery.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 0131e4362534..626846bac32f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -289,8 +289,11 @@ int jbd2_journal_recover(journal_t *journal) if (!err) err = err2; /* Make sure all replayed data is on permanent storage */ - if (journal->j_flags & JBD2_BARRIER) - blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + if (journal->j_flags & JBD2_BARRIER) { + err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + if (!err) + err = err2; + } return err; } -- cgit v1.2.3 From d807ff838f48e7778996e577e2a57a5796c32e84 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 17 Aug 2012 11:09:04 +0800 Subject: autofs4 - fix expire check In some cases when an autofs indirect mount is contained in a file system that is marked as shared (such as when systemd does the equivalent of "mount --make-rshared /" early in the boot), mounts stop expiring. When this happens the first expiry check on a mountpoint dentry in autofs_expire_indirect() sees a mountpoint dentry with a higher than minimal reference count. Consequently the dentry is condidered busy and the actual expiry check is never done. This particular check was originally meant as an optimisation to detect a path walk in progress but with the addition of rcu-walk it can be ineffective anyway. Removing the test allows automounts to expire again since the actual expire check doesn't rely on the dentry reference count. Signed-off-by: Ian Kent Signed-off-by: Linus Torvalds --- fs/autofs4/expire.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 8c0e56d92938..842d00048a65 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -399,11 +399,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, DPRINTK("checking mountpoint %p %.*s", dentry, (int)dentry->d_name.len, dentry->d_name.name); - /* Path walk currently on this dentry? */ - ino_count = atomic_read(&ino->count) + 2; - if (dentry->d_count > ino_count) - goto next; - /* Can we umount this guy */ if (autofs4_mount_busy(mnt, dentry)) goto next; -- cgit v1.2.3 From a4a39040e9dacb5fa28b7ee7f842237ee534e7bf Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 17 Aug 2012 09:58:17 -0400 Subject: ext4: check return value of blkdev_issue_flush() blkdev_issue_flush() can fail; make sure the error gets properly propagated. This is a port of the equivalent ext3 patch from commit 44f4f729e7a1. Signed-off-by: "Theodore Ts'o" --- fs/ext4/fsync.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 2a1dcea4f12e..323eb15c2d94 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -203,7 +203,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct inode *inode = file->f_mapping->host; struct ext4_inode_info *ei = EXT4_I(inode); journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; - int ret; + int ret, err; tid_t commit_tid; bool needs_barrier = false; @@ -255,8 +255,11 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) needs_barrier = true; jbd2_log_start_commit(journal, commit_tid); ret = jbd2_log_wait_commit(journal, commit_tid); - if (needs_barrier) - blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + if (needs_barrier) { + err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + if (!ret) + ret = err; + } out: mutex_unlock(&inode->i_mutex); trace_ext4_sync_file_exit(inode, ret); -- cgit v1.2.3 From cc6eb18d68fb52a7de65b7a318461ca600240177 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Fri, 17 Aug 2012 10:00:17 -0400 Subject: ext4: remove unused macro MB_DEFAULT_MAX_GROUPS_TO_SCAN Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c070618c21ce..3ccd889ba953 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -64,11 +64,6 @@ extern u8 mb_enable_debug; */ #define MB_DEFAULT_MIN_TO_SCAN 10 -/* - * How many groups mballoc will scan looking for the best chunk - */ -#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5 - /* * with 'ext4_mb_stats' allocator will collect stats that will be * shown at umount. The collecting costs though! -- cgit v1.2.3 From 15c006a22f8e004afbce42a54c878162355f1587 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Fri, 17 Aug 2012 10:02:17 -0400 Subject: ext4: remove unused function argument 'order' in mb_find_extent() All the routines call mb_find_extent are setting argument 'order' to 0 just like: mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex); therefore the useless argument should be removed. Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3a57975b73cc..6873571c9f44 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1338,17 +1338,17 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, mb_check_buddy(e4b); } -static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, +static int mb_find_extent(struct ext4_buddy *e4b, int block, int needed, struct ext4_free_extent *ex) { int next = block; - int max; + int max, order; void *buddy; assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); BUG_ON(ex == NULL); - buddy = mb_find_buddy(e4b, order, &max); + buddy = mb_find_buddy(e4b, 0, &max); BUG_ON(buddy == NULL); BUG_ON(block >= max); if (mb_test_bit(block, buddy)) { @@ -1358,12 +1358,9 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, return 0; } - /* FIXME dorp order completely ? */ - if (likely(order == 0)) { - /* find actual order */ - order = mb_find_order_for_block(e4b, block); - block = block >> order; - } + /* find actual order */ + order = mb_find_order_for_block(e4b, block); + block = block >> order; ex->fe_len = 1 << order; ex->fe_start = block << order; @@ -1549,7 +1546,7 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac, /* recheck chunk's availability - we don't know * when it was found (within this lock-unlock * period or not) */ - max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex); + max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex); if (max >= gex->fe_len) { ext4_mb_use_best_found(ac, e4b); return; @@ -1641,7 +1638,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, return err; ext4_lock_group(ac->ac_sb, group); - max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex); + max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); if (max > 0) { ac->ac_b_ex = ex; @@ -1672,7 +1669,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, return err; ext4_lock_group(ac->ac_sb, group); - max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start, + max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, ac->ac_g_ex.fe_len, &ex); if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { @@ -1788,7 +1785,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, break; } - mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); + mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex); BUG_ON(ex.fe_len <= 0); if (free < ex.fe_len) { ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, @@ -1840,7 +1837,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, while (i < EXT4_CLUSTERS_PER_GROUP(sb)) { if (!mb_test_bit(i, bitmap)) { - max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); + max = mb_find_extent(e4b, i, sbi->s_stripe, &ex); if (max >= sbi->s_stripe) { ac->ac_found++; ac->ac_b_ex = ex; -- cgit v1.2.3 From 0e376b1e3ccedee49cb8cc6b652fbc1e7c15eeef Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 17 Aug 2012 10:04:17 -0400 Subject: ext4: return an error if kset_create_and_add fails in ext4_init_fs() In the very unlikely case that kset_create_and_add() fails when the ext4.ko module is being loaded (or during kernel startup) set err so that it's clear that the module load failed. https://bugzilla.kernel.org/show_bug.cgi?id=27912 Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0423e2e7f615..3ab798d00f02 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5285,8 +5285,10 @@ static int __init ext4_init_fs(void) if (err) goto out6; ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); - if (!ext4_kset) + if (!ext4_kset) { + err = -ENOMEM; goto out5; + } ext4_proc_root = proc_mkdir("fs/ext4", NULL); err = ext4_init_feat_adverts(); -- cgit v1.2.3 From 07724f98978ad39386a3996a4e1b6780489a4dda Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 17 Aug 2012 19:08:42 -0400 Subject: ext4: drop lock_super()/unlock_super() We don't need lock_super()/unlock_super() any more, since the places where it is used, we are protected by the s_umount r/w semaphore. Signed-off-by: "Theodore Ts'o" Cc: Marco Stornelli --- fs/ext4/super.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3ab798d00f02..bae4124e1473 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -861,7 +861,6 @@ static void ext4_put_super(struct super_block *sb) flush_workqueue(sbi->dio_unwritten_wq); destroy_workqueue(sbi->dio_unwritten_wq); - lock_super(sb); if (sbi->s_journal) { err = jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; @@ -928,7 +927,6 @@ static void ext4_put_super(struct super_block *sb) * Now that we are completely done shutting down the * superblock, we need to actually destroy the kobject. */ - unlock_super(sb); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); if (sbi->s_chksum_driver) @@ -4535,11 +4533,9 @@ static int ext4_unfreeze(struct super_block *sb) if (sb->s_flags & MS_RDONLY) return 0; - lock_super(sb); /* Reset the needs_recovery flag before the fs is unlocked. */ EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); ext4_commit_super(sb, 1); - unlock_super(sb); return 0; } @@ -4575,7 +4571,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) char *orig_data = kstrdup(data, GFP_KERNEL); /* Store the original options */ - lock_super(sb); old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; old_opts.s_mount_opt2 = sbi->s_mount_opt2; @@ -4717,7 +4712,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal == NULL) ext4_commit_super(sb, 1); - unlock_super(sb); #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) @@ -4730,10 +4724,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { err = ext4_enable_quotas(sb); - if (err) { - lock_super(sb); + if (err) goto restore_opts; - } } } #endif @@ -4760,7 +4752,6 @@ restore_opts: sbi->s_qf_names[i] = old_opts.s_qf_names[i]; } #endif - unlock_super(sb); kfree(orig_data); return err; } -- cgit v1.2.3 From caecd0af8fe0635e8e6900399b951433af35bf52 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Sat, 18 Aug 2012 22:29:18 -0400 Subject: ext4: replace plain integer with NULL in super.c Fixes the following sparse warning: fs/ext4/super.c:1672:45: warning: Using plain integer as NULL pointer Signed-off-by: Sachin Kamat Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index bae4124e1473..b875ff555865 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1673,7 +1673,7 @@ static int parse_options(char *options, struct super_block *sb, * Initialize args struct so we know whether arg was * found; some options take optional arguments. */ - args[0].to = args[0].from = 0; + args[0].to = args[0].from = NULL; token = match_token(p, tokens, args); if (handle_mount_opt(sb, p, token, args, journal_devnum, journal_ioprio, is_remount) < 0) -- cgit v1.2.3 From eeecef0af5ea4efd763c9554cf2bd80fc4a0efd3 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sat, 18 Aug 2012 22:29:40 -0400 Subject: jbd2: don't write superblock when if its empty This sequence: # truncate --size=1g fsfile # mkfs.ext4 -F fsfile # mount -o loop,ro fsfile /mnt # umount /mnt # dmesg | tail results in an IO error when unmounting the RO filesystem: [ 318.020828] Buffer I/O error on device loop1, logical block 196608 [ 318.027024] lost page write due to I/O error on loop1 [ 318.032088] JBD2: Error -5 detected when updating journal superblock for loop1-8. This was a regression introduced by commit 24bcc89c7e7c: "jbd2: split updating of journal superblock and marking journal empty". Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/jbd2/journal.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index bd23f2ebaa67..0f16edd51f67 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1354,6 +1354,11 @@ static void jbd2_mark_journal_empty(journal_t *journal) BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); read_lock(&journal->j_state_lock); + /* Is it already empty? */ + if (sb->s_start == 0) { + read_unlock(&journal->j_state_lock); + return; + } jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", journal->j_tail_sequence); -- cgit v1.2.3 From e3d2e433e32a4b7a05818cbc9158037e26c74c8e Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Sat, 18 Aug 2012 22:29:46 -0400 Subject: ext4: no need to add inode to orphan list during hole punch While performing punch hole for an inode, i_disksize is not changed. So, there is no need to add the inode to orphan list. Signed-off-by: Ashish Sangwan Signed-off-by: Namjae Jeon Acked-by: Zheng Liu Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2f082abf4992..2e56903e8d53 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4860,9 +4860,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) if (IS_ERR(handle)) return PTR_ERR(handle); - err = ext4_orphan_add(handle, inode); - if (err) - goto out; /* * Now we need to zero out the non-page-aligned data in the @@ -4948,7 +4945,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) up_write(&EXT4_I(inode)->i_data_sem); out: - ext4_orphan_del(handle, inode); inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); -- cgit v1.2.3 From 30cb27d661f5fdc582add0a9c297946a18ee4a4b Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Sat, 18 Aug 2012 22:38:07 -0400 Subject: ext4: fix trivial typo in comment Signed-off-by: Wang Sheng-Hui Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2e56903e8d53..cc6d2b984e8f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3301,7 +3301,7 @@ out: * to an uninitialized extent. * * Writing to an uninitialized extent may result in splitting the uninitialized - * extent into multiple /initialized uninitialized extents (up to three) + * extent into multiple initialized/uninitialized extents (up to three) * There are three possibilities: * a> There is no split required: Entire extent should be uninitialized * b> Splits in two extents: Write is happening at either end of the extent -- cgit v1.2.3 From 8a2f8460e816f4786939a0cefbda35af6bd1a1c5 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Sun, 19 Aug 2012 18:07:40 -0400 Subject: ext4: remove duplicated declarations in inode.c In patch cb20d5188366f04d96d2e07b1240cc92170ade40, ext4_set_bh_endio and ext4_end_io_buffer_write are declared at the beginning of inode.c, and again later on in the middle of the file. Remove the second set of duplicated function declarations. Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6324f74e0342..b4effbda7a96 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1954,9 +1954,6 @@ out: return ret; } -static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); -static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); - /* * Note that we don't need to start a transaction unless we're journaling data * because we should have holes filled from ext4_page_mkwrite(). We even don't -- cgit v1.2.3 From b7ca69289680cf631fb20b7d436467c4ec1153cd Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 3 Aug 2012 08:43:01 -0500 Subject: CIFS: Protect i_nlink from being negative that can cause warning messages. Pavel had initially suggested a smaller patch around drop_nlink, after a similar problem was discovered NFS. Protecting additional places where nlink is touched was suggested by Jeff Layton and is included in this. Reviewed-by: Pavel Shilovsky Reviewed-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Steve French --- fs/cifs/inode.c | 24 ++++++++++++++++-------- fs/cifs/link.c | 2 ++ 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7354877fa3bd..cb79c7edecb0 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -124,10 +124,10 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) { struct cifsInodeInfo *cifs_i = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - unsigned long oldtime = cifs_i->time; cifs_revalidate_cache(inode, fattr); + spin_lock(&inode->i_lock); inode->i_atime = fattr->cf_atime; inode->i_mtime = fattr->cf_mtime; inode->i_ctime = fattr->cf_ctime; @@ -148,9 +148,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) else cifs_i->time = jiffies; - cFYI(1, "inode 0x%p old_time=%ld new_time=%ld", inode, - oldtime, cifs_i->time); - cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING; cifs_i->server_eof = fattr->cf_eof; @@ -158,7 +155,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) * Can't safely change the file size here if the client is writing to * it due to potential races. */ - spin_lock(&inode->i_lock); if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) { i_size_write(inode, fattr->cf_eof); @@ -859,12 +855,14 @@ struct inode *cifs_root_iget(struct super_block *sb) if (rc && tcon->ipc) { cFYI(1, "ipc connection - fake read inode"); + spin_lock(&inode->i_lock); inode->i_mode |= S_IFDIR; set_nlink(inode, 2); inode->i_op = &cifs_ipc_inode_ops; inode->i_fop = &simple_dir_operations; inode->i_uid = cifs_sb->mnt_uid; inode->i_gid = cifs_sb->mnt_gid; + spin_unlock(&inode->i_lock); } else if (rc) { iget_failed(inode); inode = ERR_PTR(rc); @@ -1110,6 +1108,15 @@ undo_setattr: goto out_close; } +/* copied from fs/nfs/dir.c with small changes */ +static void +cifs_drop_nlink(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (inode->i_nlink > 0) + drop_nlink(inode); + spin_unlock(&inode->i_lock); +} /* * If dentry->d_inode is null (usually meaning the cached dentry @@ -1166,13 +1173,13 @@ retry_std_delete: psx_del_no_retry: if (!rc) { if (inode) - drop_nlink(inode); + cifs_drop_nlink(inode); } else if (rc == -ENOENT) { d_drop(dentry); } else if (rc == -ETXTBSY) { rc = cifs_rename_pending_delete(full_path, dentry, xid); if (rc == 0) - drop_nlink(inode); + cifs_drop_nlink(inode); } else if ((rc == -EACCES) && (dosattr == 0) && inode) { attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); if (attrs == NULL) { @@ -1241,9 +1248,10 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, * setting nlink not necessary except in cases where we failed to get it * from the server or was set bogus */ + spin_lock(&dentry->d_inode->i_lock); if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2)) set_nlink(dentry->d_inode, 2); - + spin_unlock(&dentry->d_inode->i_lock); mode &= ~current_umask(); /* must turn on setgid bit if parent dir has it */ if (inode->i_mode & S_ISGID) diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 09e4b3ae4564..e6ce3b112875 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -433,7 +433,9 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, if (old_file->d_inode) { cifsInode = CIFS_I(old_file->d_inode); if (rc == 0) { + spin_lock(&old_file->d_inode->i_lock); inc_nlink(old_file->d_inode); + spin_unlock(&old_file->d_inode->i_lock); /* BB should we make this contingent on superblock flag NOATIME? */ /* old_file->d_inode->i_ctime = CURRENT_TIME;*/ /* parent dir timestamps will update from srv -- cgit v1.2.3 From 7411286088d5ba879e9ffcaaa296f657642ef2c4 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 27 Jul 2012 01:20:41 +0400 Subject: CIFS: Fix log messages in packet checking for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2misc.c | 16 +++++++++------- fs/cifs/smb2pdu.h | 10 ++++++---- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index a4ff5d547554..e4d3b9964167 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -52,7 +52,8 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid) cERROR(1, "Bad protocol string signature header %x", *(unsigned int *) hdr->ProtocolId); if (mid != hdr->MessageId) - cERROR(1, "Mids do not match"); + cERROR(1, "Mids do not match: %llu and %llu", mid, + hdr->MessageId); } cERROR(1, "Bad SMB detected. The Mid=%llu", hdr->MessageId); return 1; @@ -107,7 +108,7 @@ smb2_check_message(char *buf, unsigned int length) * ie Validate the wct via smb2_struct_sizes table above */ - if (length < 2 + sizeof(struct smb2_hdr)) { + if (length < sizeof(struct smb2_pdu)) { if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) { pdu->StructureSize2 = 0; /* @@ -121,15 +122,15 @@ smb2_check_message(char *buf, unsigned int length) return 1; } if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) { - cERROR(1, "SMB length greater than maximum, mid=%lld", mid); + cERROR(1, "SMB length greater than maximum, mid=%llu", mid); return 1; } if (check_smb2_hdr(hdr, mid)) return 1; - if (hdr->StructureSize != SMB2_HEADER_SIZE) { - cERROR(1, "Illegal structure size %d", + if (hdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) { + cERROR(1, "Illegal structure size %u", le16_to_cpu(hdr->StructureSize)); return 1; } @@ -161,8 +162,9 @@ smb2_check_message(char *buf, unsigned int length) if (4 + len != clc_len) { cFYI(1, "Calculated size %u length %u mismatch mid %llu", clc_len, 4 + len, mid); - if (clc_len == 4 + len + 1) /* BB FIXME (fix samba) */ - return 0; /* BB workaround Samba 3 bug SessSetup rsp */ + /* server can return one byte more */ + if (clc_len == 4 + len + 1) + return 0; return 1; } return 0; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index f37a1b41b402..c5fbfac5d576 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -87,10 +87,6 @@ #define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe) -#define SMB2_HEADER_SIZE __constant_le16_to_cpu(64) - -#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_le16_to_cpu(9) - /* * SMB2 Header Definition * @@ -99,6 +95,9 @@ * "PDU" : "Protocol Data Unit" (ie a network "frame") * */ + +#define SMB2_HEADER_STRUCTURE_SIZE __constant_le16_to_cpu(64) + struct smb2_hdr { __be32 smb2_buf_length; /* big endian on wire */ /* length is only two or three bytes - with @@ -140,6 +139,9 @@ struct smb2_pdu { * command code name for the struct. Note that structures must be packed. * */ + +#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_le16_to_cpu(9) + struct smb2_err_rsp { struct smb2_hdr hdr; __le16 StructureSize; -- cgit v1.2.3 From 985e4ff016b5f3d95c12fe8073d1df89300dab3d Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 3 Aug 2012 09:42:45 -0500 Subject: cifs: print error code if smb signature verification fails While trying to debug a SMB signature related issue with Windows Servers figured out it might be easier to debug if we print the error code from cifs_verify_signature(). Also, fix indendation while at it. Signed-off-by: Suresh Jayaraman Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 11 ++++++++--- fs/cifs/transport.c | 9 ++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 074923ce593d..f0cf934ba877 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1576,9 +1576,14 @@ cifs_readv_callback(struct mid_q_entry *mid) /* result already set, check signature */ if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { - if (cifs_verify_signature(rdata->iov, rdata->nr_iov, - server, mid->sequence_number + 1)) - cERROR(1, "Unexpected SMB signature"); + int rc = 0; + + rc = cifs_verify_signature(rdata->iov, rdata->nr_iov, + server, + mid->sequence_number + 1); + if (rc) + cERROR(1, "SMB signature verification returned " + "error = %d", rc); } /* FIXME: should this be counted toward the initiating task? */ task_io_account_read(rdata->bytes); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 83867ef348df..d9b639b95fa8 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -503,13 +503,16 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, /* convert the length into a more usable form */ if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { struct kvec iov; + int rc = 0; iov.iov_base = mid->resp_buf; iov.iov_len = len; /* FIXME: add code to kill session */ - if (cifs_verify_signature(&iov, 1, server, - mid->sequence_number + 1) != 0) - cERROR(1, "Unexpected SMB signature"); + rc = cifs_verify_signature(&iov, 1, server, + mid->sequence_number + 1); + if (rc) + cERROR(1, "SMB signature verification returned error = " + "%d", rc); } /* BB special case reconnect tid and uid here? */ -- cgit v1.2.3 From ea7b4887e7266b93fa0c203cc452a926a0fef4f0 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 17 Aug 2012 18:02:19 +0400 Subject: CIFS: Fix cifs_do_create error hadnling Commit d2c127197dfc0b2bae62a52e1e0d3e3ff493919e caused a regression in cifs_do_create error handling. Fix this by closing a file handle in the case of a get_inode_info(_unix) error. Also remove unnecessary checks for newinode being NULL. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/dir.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index cbe709ad6663..781025be48bc 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -356,19 +356,12 @@ cifs_create_get_file_info: cifs_create_set_dentry: if (rc != 0) { cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); + CIFSSMBClose(xid, tcon, *fileHandle); goto out; } d_drop(direntry); d_add(direntry, newinode); - /* ENOENT for create? How weird... */ - rc = -ENOENT; - if (!newinode) { - CIFSSMBClose(xid, tcon, *fileHandle); - goto out; - } - rc = 0; - out: kfree(buf); kfree(full_path); -- cgit v1.2.3 From 7653f6ff4ebab2a094e65b60fb19ee66ed2f78e7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Aug 2012 12:12:29 -0400 Subject: NFSv4: Ensure that nfs4_alloc_client cleans up on error. Any pointer that was allocated through nfs_alloc_client() needs to be freed via a call to nfs_free_client(). Reported-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index cbcdfaf32505..24eb663f8ed5 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -74,7 +74,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) return clp; error: - kfree(clp); + nfs_free_client(clp); return ERR_PTR(err); } -- cgit v1.2.3 From 086600430493e04b802bee6e5b3ce0458e4eb77f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Aug 2012 12:42:15 -0400 Subject: NFSv3: Ensure that do_proc_get_root() reports errors correctly If the rpc call to NFS3PROC_FSINFO fails, then we need to report that error so that the mount fails. Otherwise we can end up with a superblock with completely unusable values for block sizes, maxfilesize, etc. Reported-by: Yuanming Chen Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 0952c791df36..d6b3b5f2d779 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -69,7 +69,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, nfs_fattr_init(info->fattr); status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply fsinfo: %d\n", __func__, status); - if (!(info->fattr->valid & NFS_ATTR_FATTR)) { + if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_resp = info->fattr; status = rpc_call_sync(client, &msg, 0); -- cgit v1.2.3 From d1c338a509cea5378df59629ad47382810c38623 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 19 Aug 2012 12:29:16 -0700 Subject: libceph: delay debugfs initialization until we learn global_id The debugfs directory includes the cluster fsid and our unique global_id. We need to delay the initialization of the debug entry until we have learned both the fsid and our global_id from the monitor or else the second client can't create its debugfs entry and will fail (and multiple client instances aren't properly reflected in debugfs). Reported by: Yan, Zheng Signed-off-by: Sage Weil Reviewed-by: Yehuda Sadeh --- fs/ceph/debugfs.c | 1 + net/ceph/ceph_common.c | 1 - net/ceph/debugfs.c | 4 ++++ net/ceph/mon_client.c | 51 +++++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 51 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index fb962efdacee..6d59006bfa27 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) int err = -ENOMEM; dout("ceph_fs_debugfs_init\n"); + BUG_ON(!fsc->client->debugfs_dir); fsc->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb", 0600, diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 69e38db28e5f..a8020293f342 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -84,7 +84,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) return -1; } } else { - pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); memcpy(&client->fsid, fsid, sizeof(*fsid)); } return 0; diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 54b531a01121..38b5dc1823d4 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph_client *client) snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, client->monc.auth->global_id); + dout("ceph_debugfs_client_init %p %s\n", client, name); + + BUG_ON(client->debugfs_dir); client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); if (!client->debugfs_dir) goto out; @@ -234,6 +237,7 @@ out: void ceph_debugfs_client_cleanup(struct ceph_client *client) { + dout("ceph_debugfs_client_cleanup %p\n", client); debugfs_remove(client->debugfs_osdmap); debugfs_remove(client->debugfs_monmap); debugfs_remove(client->osdc.debugfs_file); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 105d533b55f3..900ea0f043fc 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -310,6 +310,17 @@ int ceph_monc_open_session(struct ceph_mon_client *monc) } EXPORT_SYMBOL(ceph_monc_open_session); +/* + * We require the fsid and global_id in order to initialize our + * debugfs dir. + */ +static bool have_debugfs_info(struct ceph_mon_client *monc) +{ + dout("have_debugfs_info fsid %d globalid %lld\n", + (int)monc->client->have_fsid, monc->auth->global_id); + return monc->client->have_fsid && monc->auth->global_id > 0; +} + /* * The monitor responds with mount ack indicate mount success. The * included client ticket allows the client to talk to MDSs and OSDs. @@ -320,9 +331,12 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, struct ceph_client *client = monc->client; struct ceph_monmap *monmap = NULL, *old = monc->monmap; void *p, *end; + int had_debugfs_info, init_debugfs = 0; mutex_lock(&monc->mutex); + had_debugfs_info = have_debugfs_info(monc); + dout("handle_monmap\n"); p = msg->front.iov_base; end = p + msg->front.iov_len; @@ -344,12 +358,22 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, if (!client->have_fsid) { client->have_fsid = true; + if (!had_debugfs_info && have_debugfs_info(monc)) { + pr_info("client%lld fsid %pU\n", + ceph_client_id(monc->client), + &monc->client->fsid); + init_debugfs = 1; + } mutex_unlock(&monc->mutex); - /* - * do debugfs initialization without mutex to avoid - * creating a locking dependency - */ - ceph_debugfs_client_init(client); + + if (init_debugfs) { + /* + * do debugfs initialization without mutex to avoid + * creating a locking dependency + */ + ceph_debugfs_client_init(monc->client); + } + goto out_unlocked; } out: @@ -865,8 +889,10 @@ static void handle_auth_reply(struct ceph_mon_client *monc, { int ret; int was_auth = 0; + int had_debugfs_info, init_debugfs = 0; mutex_lock(&monc->mutex); + had_debugfs_info = have_debugfs_info(monc); if (monc->auth->ops) was_auth = monc->auth->ops->is_authenticated(monc->auth); monc->pending_auth = 0; @@ -889,7 +915,22 @@ static void handle_auth_reply(struct ceph_mon_client *monc, __send_subscribe(monc); __resend_generic_request(monc); } + + if (!had_debugfs_info && have_debugfs_info(monc)) { + pr_info("client%lld fsid %pU\n", + ceph_client_id(monc->client), + &monc->client->fsid); + init_debugfs = 1; + } mutex_unlock(&monc->mutex); + + if (init_debugfs) { + /* + * do debugfs initialization without mutex to avoid + * creating a locking dependency + */ + ceph_debugfs_client_init(monc->client); + } } static int __validate_auth(struct ceph_mon_client *monc) -- cgit v1.2.3 From 0e665d5d1125f9f4ccff56a75e814f10f88861a2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Aug 2012 15:28:00 +0100 Subject: vfs: missed source of ->f_pos races compat_sys_{read,write}v() need the same "pass a copy of file->f_pos" thing as sys_{read,write}{,v}(). Signed-off-by: Al Viro Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/compat.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/compat.c b/fs/compat.c index 6161255fac45..1bdb350ea5d3 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1155,11 +1155,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_readv(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_readv(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } @@ -1221,11 +1224,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_writev(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_writev(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } -- cgit v1.2.3 From 43829731dd372d04d6706c51052b9dabab9ca356 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 Aug 2012 14:51:24 -0700 Subject: workqueue: deprecate flush[_delayed]_work_sync() flush[_delayed]_work_sync() are now spurious. Mark them deprecated and convert all users to flush[_delayed]_work(). If you're cc'd and wondering what's going on: Now all workqueues are non-reentrant and the regular flushes guarantee that the work item is not pending or running on any CPU on return, so there's no reason to use the sync flushes at all and they're going away. This patch doesn't make any functional difference. Signed-off-by: Tejun Heo Cc: Russell King Cc: Paul Mundt Cc: Ian Campbell Cc: Jens Axboe Cc: Mattia Dongili Cc: Kent Yoder Cc: David Airlie Cc: Jiri Kosina Cc: Karsten Keil Cc: Bryan Wu Cc: Benjamin Herrenschmidt Cc: Alasdair Kergon Cc: Mauro Carvalho Chehab Cc: Florian Tobias Schandinat Cc: David Woodhouse Cc: "David S. Miller" Cc: linux-wireless@vger.kernel.org Cc: Anton Vorontsov Cc: Sangbeom Kim Cc: "James E.J. Bottomley" Cc: Greg Kroah-Hartman Cc: Eric Van Hensbergen Cc: Takashi Iwai Cc: Steven Whitehouse Cc: Petr Vandrovec Cc: Mark Fasheh Cc: Christoph Hellwig Cc: Avi Kivity --- arch/arm/mach-pxa/sharpsl_pm.c | 4 ++-- arch/arm/plat-omap/mailbox.c | 2 +- arch/sh/drivers/push-switch.c | 2 +- drivers/block/xen-blkfront.c | 4 ++-- drivers/cdrom/gdrom.c | 2 +- drivers/char/sonypi.c | 2 +- drivers/char/tpm/tpm.c | 4 ++-- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 2 +- drivers/gpu/drm/nouveau/nouveau_gpio.c | 2 +- drivers/gpu/drm/radeon/radeon_irq_kms.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 2 +- drivers/hid/hid-picolcd.c | 2 +- drivers/input/touchscreen/wm831x-ts.c | 2 +- drivers/isdn/mISDN/hwchannel.c | 4 ++-- drivers/leds/leds-lm3533.c | 6 +++--- drivers/leds/leds-lp8788.c | 2 +- drivers/leds/leds-wm8350.c | 2 +- drivers/macintosh/ams/ams-core.c | 2 +- drivers/md/dm-mpath.c | 2 +- drivers/md/dm-raid1.c | 2 +- drivers/md/dm-stripe.c | 2 +- drivers/media/dvb/dvb-core/dvb_net.c | 4 ++-- drivers/media/dvb/mantis/mantis_evm.c | 2 +- drivers/media/dvb/mantis/mantis_uart.c | 2 +- drivers/media/video/bt8xx/bttv-driver.c | 2 +- drivers/media/video/cx18/cx18-driver.c | 2 +- drivers/media/video/cx231xx/cx231xx-cards.c | 2 +- drivers/media/video/cx23885/cx23885-input.c | 6 +++--- drivers/media/video/cx88/cx88-mpeg.c | 2 +- drivers/media/video/em28xx/em28xx-cards.c | 2 +- drivers/media/video/omap24xxcam.c | 6 +++--- drivers/media/video/saa7134/saa7134-core.c | 2 +- drivers/media/video/saa7134/saa7134-empress.c | 2 +- drivers/media/video/tm6000/tm6000-cards.c | 2 +- drivers/mfd/menelaus.c | 4 ++-- drivers/misc/ioc4.c | 2 +- drivers/mtd/mtdoops.c | 4 ++-- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 2 +- drivers/net/ethernet/neterion/vxge/vxge-main.c | 2 +- drivers/net/ethernet/sun/cassini.c | 2 +- drivers/net/ethernet/sun/niu.c | 2 +- drivers/net/wireless/hostap/hostap_ap.c | 4 ++-- drivers/net/wireless/hostap/hostap_hw.c | 10 +++++----- drivers/power/collie_battery.c | 2 +- drivers/power/tosa_battery.c | 2 +- drivers/power/wm97xx_battery.c | 2 +- drivers/power/z2_battery.c | 2 +- drivers/regulator/core.c | 2 +- drivers/scsi/arcmsr/arcmsr_hba.c | 4 ++-- drivers/scsi/ipr.c | 2 +- drivers/scsi/pmcraid.c | 2 +- drivers/scsi/qla2xxx/qla_target.c | 2 +- drivers/tty/hvc/hvsi.c | 2 +- drivers/tty/ipwireless/hardware.c | 2 +- drivers/tty/ipwireless/network.c | 4 ++-- drivers/tty/serial/kgdboc.c | 2 +- drivers/tty/serial/omap-serial.c | 2 +- drivers/tty/tty_ldisc.c | 6 +++--- drivers/usb/atm/speedtch.c | 2 +- drivers/usb/atm/ueagle-atm.c | 2 +- drivers/usb/gadget/u_ether.c | 2 +- drivers/usb/host/ohci-hcd.c | 2 +- drivers/usb/otg/isp1301_omap.c | 2 +- fs/affs/super.c | 2 +- fs/gfs2/lock_dlm.c | 2 +- fs/gfs2/super.c | 2 +- fs/hfs/inode.c | 2 +- fs/ncpfs/inode.c | 6 +++--- fs/ocfs2/cluster/quorum.c | 2 +- fs/xfs/xfs_super.c | 2 +- fs/xfs/xfs_sync.c | 2 +- include/linux/workqueue.h | 4 ++-- net/9p/trans_fd.c | 2 +- net/dsa/dsa.c | 2 +- sound/i2c/other/ak4113.c | 2 +- sound/i2c/other/ak4114.c | 2 +- sound/pci/oxygen/oxygen_lib.c | 8 ++++---- sound/soc/codecs/wm8350.c | 2 +- sound/soc/codecs/wm8753.c | 2 +- sound/soc/soc-core.c | 6 +++--- virt/kvm/eventfd.c | 2 +- 81 files changed, 111 insertions(+), 111 deletions(-) (limited to 'fs') diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c index bdf4cb88ca0a..7875ad6456bd 100644 --- a/arch/arm/mach-pxa/sharpsl_pm.c +++ b/arch/arm/mach-pxa/sharpsl_pm.c @@ -579,8 +579,8 @@ static int sharpsl_ac_check(void) static int sharpsl_pm_suspend(struct platform_device *pdev, pm_message_t state) { sharpsl_pm.flags |= SHARPSL_SUSPENDED; - flush_delayed_work_sync(&toggle_charger); - flush_delayed_work_sync(&sharpsl_bat); + flush_delayed_work(&toggle_charger); + flush_delayed_work(&sharpsl_bat); if (sharpsl_pm.charge_mode == CHRG_ON) sharpsl_pm.flags |= SHARPSL_DO_OFFLINE_CHRG; diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c index 5e13c3884aa4..42377ef9ea3d 100644 --- a/arch/arm/plat-omap/mailbox.c +++ b/arch/arm/plat-omap/mailbox.c @@ -310,7 +310,7 @@ static void omap_mbox_fini(struct omap_mbox *mbox) omap_mbox_disable_irq(mbox, IRQ_RX); free_irq(mbox->irq, mbox); tasklet_kill(&mbox->txq->tasklet); - flush_work_sync(&mbox->rxq->work); + flush_work(&mbox->rxq->work); mbox_queue_free(mbox->txq); mbox_queue_free(mbox->rxq); } diff --git a/arch/sh/drivers/push-switch.c b/arch/sh/drivers/push-switch.c index 637b79b09657..5bfb341cc5c4 100644 --- a/arch/sh/drivers/push-switch.c +++ b/arch/sh/drivers/push-switch.c @@ -107,7 +107,7 @@ static int switch_drv_remove(struct platform_device *pdev) device_remove_file(&pdev->dev, &dev_attr_switch); platform_set_drvdata(pdev, NULL); - flush_work_sync(&psw->work); + flush_work(&psw->work); del_timer_sync(&psw->debounce); free_irq(irq, pdev); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2c2d2e5c1597..007db8986e84 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -670,7 +670,7 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) spin_unlock_irqrestore(&info->io_lock, flags); /* Flush gnttab callback work. Must be done with no locks held. */ - flush_work_sync(&info->work); + flush_work(&info->work); del_gendisk(info->gd); @@ -719,7 +719,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) spin_unlock_irq(&info->io_lock); /* Flush gnttab callback work. Must be done with no locks held. */ - flush_work_sync(&info->work); + flush_work(&info->work); /* Free resources associated with old device channel. */ if (info->ring_ref != GRANT_INVALID_REF) { diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 3ceaf006e7f0..75d485afe56c 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -840,7 +840,7 @@ probe_fail_no_mem: static int __devexit remove_gdrom(struct platform_device *devptr) { - flush_work_sync(&work); + flush_work(&work); blk_cleanup_queue(gd.gdrom_rq); free_irq(HW_EVENT_GDROM_CMD, &gd); free_irq(HW_EVENT_GDROM_DMA, &gd); diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c index f87780502b41..320debbe32fa 100644 --- a/drivers/char/sonypi.c +++ b/drivers/char/sonypi.c @@ -1433,7 +1433,7 @@ static int __devexit sonypi_remove(struct platform_device *dev) sonypi_disable(); synchronize_irq(sonypi_device.irq); - flush_work_sync(&sonypi_device.input_work); + flush_work(&sonypi_device.input_work); if (useinput) { input_unregister_device(sonypi_device.input_key_dev); diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 817f0ee202b6..3af9f4d1a23f 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -1172,7 +1172,7 @@ int tpm_release(struct inode *inode, struct file *file) struct tpm_chip *chip = file->private_data; del_singleshot_timer_sync(&chip->user_read_timer); - flush_work_sync(&chip->work); + flush_work(&chip->work); file->private_data = NULL; atomic_set(&chip->data_pending, 0); kfree(chip->data_buffer); @@ -1225,7 +1225,7 @@ ssize_t tpm_read(struct file *file, char __user *buf, int rc; del_singleshot_timer_sync(&chip->user_read_timer); - flush_work_sync(&chip->work); + flush_work(&chip->work); ret_size = atomic_read(&chip->data_pending); atomic_set(&chip->data_pending, 0); if (ret_size > 0) { /* relay data */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index d2d88f22a037..a40808e51338 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -908,7 +908,7 @@ static int g2d_suspend(struct device *dev) /* FIXME: good range? */ usleep_range(500, 1000); - flush_work_sync(&g2d->runqueue_work); + flush_work(&g2d->runqueue_work); return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_gpio.c b/drivers/gpu/drm/nouveau/nouveau_gpio.c index 82c19e82ff02..0fe4e17c461d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gpio.c +++ b/drivers/gpu/drm/nouveau/nouveau_gpio.c @@ -302,7 +302,7 @@ nouveau_gpio_isr_del(struct drm_device *dev, int idx, u8 tag, u8 line, spin_unlock_irqrestore(&pgpio->lock, flags); list_for_each_entry_safe(isr, tmp, &tofree, head) { - flush_work_sync(&isr->work); + flush_work(&isr->work); kfree(isr); } } diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index afaa1727abd2..50b596ec7b7e 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -277,7 +277,7 @@ void radeon_irq_kms_fini(struct radeon_device *rdev) if (rdev->msi_enabled) pci_disable_msi(rdev->pdev); } - flush_work_sync(&rdev->hotplug_work); + flush_work(&rdev->hotplug_work); } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index 3c447bf317cb..a32f2e96dd02 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -594,7 +594,7 @@ int vmw_fb_off(struct vmw_private *vmw_priv) par->dirty.active = false; spin_unlock_irqrestore(&par->dirty.lock, flags); - flush_delayed_work_sync(&info->deferred_work); + flush_delayed_work(&info->deferred_work); par->bo_ptr = NULL; ttm_bo_kunmap(&par->map); diff --git a/drivers/hid/hid-picolcd.c b/drivers/hid/hid-picolcd.c index 27c8ebdfad01..3e90dee30ff6 100644 --- a/drivers/hid/hid-picolcd.c +++ b/drivers/hid/hid-picolcd.c @@ -2737,7 +2737,7 @@ static void __exit picolcd_exit(void) { hid_unregister_driver(&picolcd_driver); #ifdef CONFIG_HID_PICOLCD_FB - flush_work_sync(&picolcd_fb_cleanup); + flush_work(&picolcd_fb_cleanup); WARN_ON(fb_pending); #endif } diff --git a/drivers/input/touchscreen/wm831x-ts.c b/drivers/input/touchscreen/wm831x-ts.c index e83410721e38..52abb98a8ae5 100644 --- a/drivers/input/touchscreen/wm831x-ts.c +++ b/drivers/input/touchscreen/wm831x-ts.c @@ -221,7 +221,7 @@ static void wm831x_ts_input_close(struct input_dev *idev) synchronize_irq(wm831x_ts->pd_irq); /* Make sure the IRQ completion work is quiesced */ - flush_work_sync(&wm831x_ts->pd_data_work); + flush_work(&wm831x_ts->pd_data_work); /* If we ended up with the pen down then make sure we revert back * to pen detection state for the next time we start up. diff --git a/drivers/isdn/mISDN/hwchannel.c b/drivers/isdn/mISDN/hwchannel.c index ef34fd40867c..4b85f7279f18 100644 --- a/drivers/isdn/mISDN/hwchannel.c +++ b/drivers/isdn/mISDN/hwchannel.c @@ -116,7 +116,7 @@ mISDN_freedchannel(struct dchannel *ch) } skb_queue_purge(&ch->squeue); skb_queue_purge(&ch->rqueue); - flush_work_sync(&ch->workq); + flush_work(&ch->workq); return 0; } EXPORT_SYMBOL(mISDN_freedchannel); @@ -157,7 +157,7 @@ mISDN_freebchannel(struct bchannel *ch) mISDN_clear_bchannel(ch); skb_queue_purge(&ch->rqueue); ch->rcount = 0; - flush_work_sync(&ch->workq); + flush_work(&ch->workq); return 0; } EXPORT_SYMBOL(mISDN_freebchannel); diff --git a/drivers/leds/leds-lm3533.c b/drivers/leds/leds-lm3533.c index f56b6e7ffdac..f6837b99908c 100644 --- a/drivers/leds/leds-lm3533.c +++ b/drivers/leds/leds-lm3533.c @@ -737,7 +737,7 @@ err_sysfs_remove: sysfs_remove_group(&led->cdev.dev->kobj, &lm3533_led_attribute_group); err_unregister: led_classdev_unregister(&led->cdev); - flush_work_sync(&led->work); + flush_work(&led->work); return ret; } @@ -751,7 +751,7 @@ static int __devexit lm3533_led_remove(struct platform_device *pdev) lm3533_ctrlbank_disable(&led->cb); sysfs_remove_group(&led->cdev.dev->kobj, &lm3533_led_attribute_group); led_classdev_unregister(&led->cdev); - flush_work_sync(&led->work); + flush_work(&led->work); return 0; } @@ -765,7 +765,7 @@ static void lm3533_led_shutdown(struct platform_device *pdev) lm3533_ctrlbank_disable(&led->cb); lm3533_led_set(&led->cdev, LED_OFF); /* disable blink */ - flush_work_sync(&led->work); + flush_work(&led->work); } static struct platform_driver lm3533_led_driver = { diff --git a/drivers/leds/leds-lp8788.c b/drivers/leds/leds-lp8788.c index 53bd136f1ef0..cb76bc46a021 100644 --- a/drivers/leds/leds-lp8788.c +++ b/drivers/leds/leds-lp8788.c @@ -172,7 +172,7 @@ static int __devexit lp8788_led_remove(struct platform_device *pdev) struct lp8788_led *led = platform_get_drvdata(pdev); led_classdev_unregister(&led->led_dev); - flush_work_sync(&led->work); + flush_work(&led->work); return 0; } diff --git a/drivers/leds/leds-wm8350.c b/drivers/leds/leds-wm8350.c index 918d4baff1c7..4c62113f7a77 100644 --- a/drivers/leds/leds-wm8350.c +++ b/drivers/leds/leds-wm8350.c @@ -275,7 +275,7 @@ static int wm8350_led_remove(struct platform_device *pdev) struct wm8350_led *led = platform_get_drvdata(pdev); led_classdev_unregister(&led->cdev); - flush_work_sync(&led->work); + flush_work(&led->work); wm8350_led_disable(led); regulator_put(led->dcdc); regulator_put(led->isink); diff --git a/drivers/macintosh/ams/ams-core.c b/drivers/macintosh/ams/ams-core.c index 5c6a2d876562..36a4fdddd64a 100644 --- a/drivers/macintosh/ams/ams-core.c +++ b/drivers/macintosh/ams/ams-core.c @@ -226,7 +226,7 @@ void ams_sensor_detach(void) * We do this after ams_info.exit(), because an interrupt might * have arrived before disabling them. */ - flush_work_sync(&ams_info.worker); + flush_work(&ams_info.worker); /* Remove device */ of_device_unregister(ams_info.of_dev); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d8abb90a6c2f..bb18eafcc85a 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -944,7 +944,7 @@ static void flush_multipath_work(struct multipath *m) flush_workqueue(kmpath_handlerd); multipath_wait_for_pg_init_completion(m); flush_workqueue(kmultipathd); - flush_work_sync(&m->trigger_event); + flush_work(&m->trigger_event); } static void multipath_dtr(struct dm_target *ti) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index bc5ddba8045b..fd61f98ee1f6 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1146,7 +1146,7 @@ static void mirror_dtr(struct dm_target *ti) del_timer_sync(&ms->timer); flush_workqueue(ms->kmirrord_wq); - flush_work_sync(&ms->trigger_event); + flush_work(&ms->trigger_event); dm_kcopyd_client_destroy(ms->kcopyd_client); destroy_workqueue(ms->kmirrord_wq); free_context(ms, ti, ms->nr_mirrors); diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index a087bf2a8d66..e2f876539743 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -199,7 +199,7 @@ static void stripe_dtr(struct dm_target *ti) for (i = 0; i < sc->stripes; i++) dm_put_device(ti, sc->stripe[i].dev); - flush_work_sync(&sc->trigger_event); + flush_work(&sc->trigger_event); kfree(sc); } diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c index 8766ce8c354d..c2117688aa23 100644 --- a/drivers/media/dvb/dvb-core/dvb_net.c +++ b/drivers/media/dvb/dvb-core/dvb_net.c @@ -1329,8 +1329,8 @@ static int dvb_net_remove_if(struct dvb_net *dvbnet, unsigned long num) return -EBUSY; dvb_net_stop(net); - flush_work_sync(&priv->set_multicast_list_wq); - flush_work_sync(&priv->restart_net_feed_wq); + flush_work(&priv->set_multicast_list_wq); + flush_work(&priv->restart_net_feed_wq); printk("dvb_net: removed network interface %s\n", net->name); unregister_netdev(net); dvbnet->state[num]=0; diff --git a/drivers/media/dvb/mantis/mantis_evm.c b/drivers/media/dvb/mantis/mantis_evm.c index 71ce52875c38..909ff54868a3 100644 --- a/drivers/media/dvb/mantis/mantis_evm.c +++ b/drivers/media/dvb/mantis/mantis_evm.c @@ -111,7 +111,7 @@ void mantis_evmgr_exit(struct mantis_ca *ca) struct mantis_pci *mantis = ca->ca_priv; dprintk(MANTIS_DEBUG, 1, "Mantis Host I/F Event manager exiting"); - flush_work_sync(&ca->hif_evm_work); + flush_work(&ca->hif_evm_work); mantis_hif_exit(ca); mantis_pcmcia_exit(ca); } diff --git a/drivers/media/dvb/mantis/mantis_uart.c b/drivers/media/dvb/mantis/mantis_uart.c index 18340dafa426..85e977861b4a 100644 --- a/drivers/media/dvb/mantis/mantis_uart.c +++ b/drivers/media/dvb/mantis/mantis_uart.c @@ -183,6 +183,6 @@ void mantis_uart_exit(struct mantis_pci *mantis) { /* disable interrupt */ mmwrite(mmread(MANTIS_UART_CTL) & 0xffef, MANTIS_UART_CTL); - flush_work_sync(&mantis->uart_work); + flush_work(&mantis->uart_work); } EXPORT_SYMBOL_GPL(mantis_uart_exit); diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c index b58ff87db771..2ce7179a3864 100644 --- a/drivers/media/video/bt8xx/bttv-driver.c +++ b/drivers/media/video/bt8xx/bttv-driver.c @@ -196,7 +196,7 @@ static void request_modules(struct bttv *dev) static void flush_request_modules(struct bttv *dev) { - flush_work_sync(&dev->request_module_wk); + flush_work(&dev->request_module_wk); } #else #define request_modules(dev) diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c index 7e5ffd6f5178..75c890907920 100644 --- a/drivers/media/video/cx18/cx18-driver.c +++ b/drivers/media/video/cx18/cx18-driver.c @@ -272,7 +272,7 @@ static void request_modules(struct cx18 *dev) static void flush_request_modules(struct cx18 *dev) { - flush_work_sync(&dev->request_module_wk); + flush_work(&dev->request_module_wk); } #else #define request_modules(dev) diff --git a/drivers/media/video/cx231xx/cx231xx-cards.c b/drivers/media/video/cx231xx/cx231xx-cards.c index 02d4d36735d3..b84ebc54d91b 100644 --- a/drivers/media/video/cx231xx/cx231xx-cards.c +++ b/drivers/media/video/cx231xx/cx231xx-cards.c @@ -1002,7 +1002,7 @@ static void request_modules(struct cx231xx *dev) static void flush_request_modules(struct cx231xx *dev) { - flush_work_sync(&dev->request_module_wk); + flush_work(&dev->request_module_wk); } #else #define request_modules(dev) diff --git a/drivers/media/video/cx23885/cx23885-input.c b/drivers/media/video/cx23885/cx23885-input.c index ce765e3f77bd..bcbf7faf1bab 100644 --- a/drivers/media/video/cx23885/cx23885-input.c +++ b/drivers/media/video/cx23885/cx23885-input.c @@ -231,9 +231,9 @@ static void cx23885_input_ir_stop(struct cx23885_dev *dev) v4l2_subdev_call(dev->sd_ir, ir, rx_s_parameters, ¶ms); v4l2_subdev_call(dev->sd_ir, ir, rx_g_parameters, ¶ms); } - flush_work_sync(&dev->cx25840_work); - flush_work_sync(&dev->ir_rx_work); - flush_work_sync(&dev->ir_tx_work); + flush_work(&dev->cx25840_work); + flush_work(&dev->ir_rx_work); + flush_work(&dev->ir_tx_work); } static void cx23885_input_ir_close(struct rc_dev *rc) diff --git a/drivers/media/video/cx88/cx88-mpeg.c b/drivers/media/video/cx88/cx88-mpeg.c index cd5386ee210c..c04fb618e10b 100644 --- a/drivers/media/video/cx88/cx88-mpeg.c +++ b/drivers/media/video/cx88/cx88-mpeg.c @@ -70,7 +70,7 @@ static void request_modules(struct cx8802_dev *dev) static void flush_request_modules(struct cx8802_dev *dev) { - flush_work_sync(&dev->request_module_wk); + flush_work(&dev->request_module_wk); } #else #define request_modules(dev) diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index ca62b9981380..f7831e73f077 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -2900,7 +2900,7 @@ static void request_modules(struct em28xx *dev) static void flush_request_modules(struct em28xx *dev) { - flush_work_sync(&dev->request_module_wk); + flush_work(&dev->request_module_wk); } #else #define request_modules(dev) diff --git a/drivers/media/video/omap24xxcam.c b/drivers/media/video/omap24xxcam.c index e5015b0d5508..8d7283bbd431 100644 --- a/drivers/media/video/omap24xxcam.c +++ b/drivers/media/video/omap24xxcam.c @@ -1198,7 +1198,7 @@ static int vidioc_streamoff(struct file *file, void *fh, enum v4l2_buf_type i) atomic_inc(&cam->reset_disable); - flush_work_sync(&cam->sensor_reset_work); + flush_work(&cam->sensor_reset_work); rval = videobuf_streamoff(q); if (!rval) { @@ -1512,7 +1512,7 @@ static int omap24xxcam_release(struct file *file) atomic_inc(&cam->reset_disable); - flush_work_sync(&cam->sensor_reset_work); + flush_work(&cam->sensor_reset_work); /* stop streaming capture */ videobuf_streamoff(&fh->vbq); @@ -1536,7 +1536,7 @@ static int omap24xxcam_release(struct file *file) * not be scheduled anymore since streaming is already * disabled.) */ - flush_work_sync(&cam->sensor_reset_work); + flush_work(&cam->sensor_reset_work); mutex_lock(&cam->mutex); if (atomic_dec_return(&cam->users) == 0) { diff --git a/drivers/media/video/saa7134/saa7134-core.c b/drivers/media/video/saa7134/saa7134-core.c index 5fbb4e49495c..f2b37e05b964 100644 --- a/drivers/media/video/saa7134/saa7134-core.c +++ b/drivers/media/video/saa7134/saa7134-core.c @@ -170,7 +170,7 @@ static void request_submodules(struct saa7134_dev *dev) static void flush_request_submodules(struct saa7134_dev *dev) { - flush_work_sync(&dev->request_module_wk); + flush_work(&dev->request_module_wk); } #else diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c index dde361a9194e..4df79c656909 100644 --- a/drivers/media/video/saa7134/saa7134-empress.c +++ b/drivers/media/video/saa7134/saa7134-empress.c @@ -556,7 +556,7 @@ static int empress_fini(struct saa7134_dev *dev) if (NULL == dev->empress_dev) return 0; - flush_work_sync(&dev->empress_workqueue); + flush_work(&dev->empress_workqueue); video_unregister_device(dev->empress_dev); dev->empress_dev = NULL; return 0; diff --git a/drivers/media/video/tm6000/tm6000-cards.c b/drivers/media/video/tm6000/tm6000-cards.c index 034659b13174..307d8c5fb7cd 100644 --- a/drivers/media/video/tm6000/tm6000-cards.c +++ b/drivers/media/video/tm6000/tm6000-cards.c @@ -1074,7 +1074,7 @@ static void request_modules(struct tm6000_core *dev) static void flush_request_modules(struct tm6000_core *dev) { - flush_work_sync(&dev->request_module_wk); + flush_work(&dev->request_module_wk); } #else #define request_modules(dev) diff --git a/drivers/mfd/menelaus.c b/drivers/mfd/menelaus.c index cb4910ac4d12..55d589981412 100644 --- a/drivers/mfd/menelaus.c +++ b/drivers/mfd/menelaus.c @@ -1259,7 +1259,7 @@ static int menelaus_probe(struct i2c_client *client, return 0; fail2: free_irq(client->irq, menelaus); - flush_work_sync(&menelaus->work); + flush_work(&menelaus->work); fail1: kfree(menelaus); return err; @@ -1270,7 +1270,7 @@ static int __exit menelaus_remove(struct i2c_client *client) struct menelaus_chip *menelaus = i2c_get_clientdata(client); free_irq(client->irq, menelaus); - flush_work_sync(&menelaus->work); + flush_work(&menelaus->work); kfree(menelaus); the_menelaus = NULL; return 0; diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c index df03dd3bd0e2..6a7710603a90 100644 --- a/drivers/misc/ioc4.c +++ b/drivers/misc/ioc4.c @@ -487,7 +487,7 @@ static void __exit ioc4_exit(void) { /* Ensure ioc4_load_modules() has completed before exiting */ - flush_work_sync(&ioc4_load_modules_work); + flush_work(&ioc4_load_modules_work); pci_unregister_driver(&ioc4_driver); } diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index 551e316e4454..438737a1f59a 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -387,8 +387,8 @@ static void mtdoops_notify_remove(struct mtd_info *mtd) printk(KERN_WARNING "mtdoops: could not unregister kmsg_dumper\n"); cxt->mtd = NULL; - flush_work_sync(&cxt->work_erase); - flush_work_sync(&cxt->work_write); + flush_work(&cxt->work_erase); + flush_work(&cxt->work_write); } diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 6505070abcfa..089ed134369b 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -1394,7 +1394,7 @@ static int offload_close(struct t3cdev *tdev) sysfs_remove_group(&tdev->lldev->dev.kobj, &offload_attr_group); /* Flush work scheduled while releasing TIDs */ - flush_work_sync(&td->tid_release_task); + flush_work(&td->tid_release_task); tdev->lldev = NULL; cxgb3_set_dummy_ops(tdev); diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index de2190443510..30ce10bc9149 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3521,7 +3521,7 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev) strncpy(buf, dev->name, IFNAMSIZ); - flush_work_sync(&vdev->reset_task); + flush_work(&vdev->reset_task); /* in 2.6 will call stop() if device is up */ unregister_netdev(dev); diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index ce4df61b4b56..c8251be104d6 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -3890,7 +3890,7 @@ static int cas_change_mtu(struct net_device *dev, int new_mtu) schedule_work(&cp->reset_task); #endif - flush_work_sync(&cp->reset_task); + flush_work(&cp->reset_task); return 0; } diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index c2a0fe393267..710f35395195 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -9932,7 +9932,7 @@ static int niu_suspend(struct pci_dev *pdev, pm_message_t state) if (!netif_running(dev)) return 0; - flush_work_sync(&np->reset_task); + flush_work(&np->reset_task); niu_netif_stop(np); del_timer_sync(&np->timer); diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c index e1f410277242..c6ea995750db 100644 --- a/drivers/net/wireless/hostap/hostap_ap.c +++ b/drivers/net/wireless/hostap/hostap_ap.c @@ -860,10 +860,10 @@ void hostap_free_data(struct ap_data *ap) return; } - flush_work_sync(&ap->add_sta_proc_queue); + flush_work(&ap->add_sta_proc_queue); #ifndef PRISM2_NO_KERNEL_IEEE80211_MGMT - flush_work_sync(&ap->wds_oper_queue); + flush_work(&ap->wds_oper_queue); if (ap->crypt) ap->crypt->deinit(ap->crypt_priv); ap->crypt = ap->crypt_priv = NULL; diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c index 50f87b60b0bd..8e7000fd4414 100644 --- a/drivers/net/wireless/hostap/hostap_hw.c +++ b/drivers/net/wireless/hostap/hostap_hw.c @@ -3311,13 +3311,13 @@ static void prism2_free_local_data(struct net_device *dev) unregister_netdev(local->dev); - flush_work_sync(&local->reset_queue); - flush_work_sync(&local->set_multicast_list_queue); - flush_work_sync(&local->set_tim_queue); + flush_work(&local->reset_queue); + flush_work(&local->set_multicast_list_queue); + flush_work(&local->set_tim_queue); #ifndef PRISM2_NO_STATION_MODES - flush_work_sync(&local->info_queue); + flush_work(&local->info_queue); #endif - flush_work_sync(&local->comms_qual_update); + flush_work(&local->comms_qual_update); lib80211_crypt_info_free(&local->crypt_info); diff --git a/drivers/power/collie_battery.c b/drivers/power/collie_battery.c index 74c6b23aeabf..b19bfe400f8c 100644 --- a/drivers/power/collie_battery.c +++ b/drivers/power/collie_battery.c @@ -290,7 +290,7 @@ static struct gpio collie_batt_gpios[] = { static int collie_bat_suspend(struct ucb1x00_dev *dev, pm_message_t state) { /* flush all pending status updates */ - flush_work_sync(&bat_work); + flush_work(&bat_work); return 0; } diff --git a/drivers/power/tosa_battery.c b/drivers/power/tosa_battery.c index 28bbe7e094e3..51199b5ce221 100644 --- a/drivers/power/tosa_battery.c +++ b/drivers/power/tosa_battery.c @@ -327,7 +327,7 @@ static struct gpio tosa_bat_gpios[] = { static int tosa_bat_suspend(struct platform_device *dev, pm_message_t state) { /* flush all pending status updates */ - flush_work_sync(&bat_work); + flush_work(&bat_work); return 0; } diff --git a/drivers/power/wm97xx_battery.c b/drivers/power/wm97xx_battery.c index d2d4c08c681c..1245fe1f48c3 100644 --- a/drivers/power/wm97xx_battery.c +++ b/drivers/power/wm97xx_battery.c @@ -146,7 +146,7 @@ static irqreturn_t wm97xx_chrg_irq(int irq, void *data) #ifdef CONFIG_PM static int wm97xx_bat_suspend(struct device *dev) { - flush_work_sync(&bat_work); + flush_work(&bat_work); return 0; } diff --git a/drivers/power/z2_battery.c b/drivers/power/z2_battery.c index 8c9a607ea77a..5757d0d6782f 100644 --- a/drivers/power/z2_battery.c +++ b/drivers/power/z2_battery.c @@ -276,7 +276,7 @@ static int z2_batt_suspend(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct z2_charger *charger = i2c_get_clientdata(client); - flush_work_sync(&charger->bat_work); + flush_work(&charger->bat_work); return 0; } diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index f092588a078c..1d2b70017a8d 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -3335,7 +3335,7 @@ void regulator_unregister(struct regulator_dev *rdev) regulator_put(rdev->supply); mutex_lock(®ulator_list_mutex); debugfs_remove_recursive(rdev->debugfs); - flush_work_sync(&rdev->disable_work.work); + flush_work(&rdev->disable_work.work); WARN_ON(rdev->open_count); unset_regulator_supplies(rdev); list_del(&rdev->list); diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index def24a1079ad..33c52bc2c7b4 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -999,7 +999,7 @@ static void arcmsr_remove(struct pci_dev *pdev) int poll_count = 0; arcmsr_free_sysfs_attr(acb); scsi_remove_host(host); - flush_work_sync(&acb->arcmsr_do_message_isr_bh); + flush_work(&acb->arcmsr_do_message_isr_bh); del_timer_sync(&acb->eternal_timer); arcmsr_disable_outbound_ints(acb); arcmsr_stop_adapter_bgrb(acb); @@ -1045,7 +1045,7 @@ static void arcmsr_shutdown(struct pci_dev *pdev) (struct AdapterControlBlock *)host->hostdata; del_timer_sync(&acb->eternal_timer); arcmsr_disable_outbound_ints(acb); - flush_work_sync(&acb->arcmsr_do_message_isr_bh); + flush_work(&acb->arcmsr_do_message_isr_bh); arcmsr_stop_adapter_bgrb(acb); arcmsr_flush_adapter_cache(acb); } diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 467dc38246f9..2ffeb9afd1b7 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -9020,7 +9020,7 @@ static void __ipr_remove(struct pci_dev *pdev) spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); - flush_work_sync(&ioa_cfg->work_q); + flush_work(&ioa_cfg->work_q); spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); spin_lock(&ipr_driver_lock); diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index ea8a0b47d66d..af763eab2039 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -5459,7 +5459,7 @@ static void __devexit pmcraid_remove(struct pci_dev *pdev) pmcraid_shutdown(pdev); pmcraid_disable_interrupts(pinstance, ~0); - flush_work_sync(&pinstance->worker_q); + flush_work(&pinstance->worker_q); pmcraid_kill_tasklets(pinstance); pmcraid_unregister_interrupt_handler(pinstance); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 5b30132960c7..bddc97c5c8e9 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -969,7 +969,7 @@ void qlt_stop_phase1(struct qla_tgt *tgt) spin_unlock_irqrestore(&ha->hardware_lock, flags); mutex_unlock(&ha->tgt.tgt_mutex); - flush_delayed_work_sync(&tgt->sess_del_work); + flush_delayed_work(&tgt->sess_del_work); ql_dbg(ql_dbg_tgt_mgt, vha, 0xf009, "Waiting for sess works (tgt %p)", tgt); diff --git a/drivers/tty/hvc/hvsi.c b/drivers/tty/hvc/hvsi.c index 6f5bc49c441f..1f8e8b37ed23 100644 --- a/drivers/tty/hvc/hvsi.c +++ b/drivers/tty/hvc/hvsi.c @@ -765,7 +765,7 @@ static void hvsi_flush_output(struct hvsi_struct *hp) /* 'writer' could still be pending if it didn't see n_outbuf = 0 yet */ cancel_delayed_work_sync(&hp->writer); - flush_work_sync(&hp->handshaker); + flush_work(&hp->handshaker); /* * it's also possible that our timeout expired and hvsi_write_worker diff --git a/drivers/tty/ipwireless/hardware.c b/drivers/tty/ipwireless/hardware.c index 0aeb5a38d296..b4ba0670dc54 100644 --- a/drivers/tty/ipwireless/hardware.c +++ b/drivers/tty/ipwireless/hardware.c @@ -1729,7 +1729,7 @@ void ipwireless_hardware_free(struct ipw_hardware *hw) ipwireless_stop_interrupts(hw); - flush_work_sync(&hw->work_rx); + flush_work(&hw->work_rx); for (i = 0; i < NL_NUM_OF_ADDRESSES; i++) if (hw->packet_assembler[i] != NULL) diff --git a/drivers/tty/ipwireless/network.c b/drivers/tty/ipwireless/network.c index 57c8b481113f..90ea902ff191 100644 --- a/drivers/tty/ipwireless/network.c +++ b/drivers/tty/ipwireless/network.c @@ -430,8 +430,8 @@ void ipwireless_network_free(struct ipw_network *network) network->shutting_down = 1; ipwireless_ppp_close(network); - flush_work_sync(&network->work_go_online); - flush_work_sync(&network->work_go_offline); + flush_work(&network->work_go_online); + flush_work(&network->work_go_offline); ipwireless_stop_interrupts(network->hardware); ipwireless_associate_network(network->hardware, NULL); diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index 2b42a01a81c6..ebabf929f460 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -122,7 +122,7 @@ static void kgdboc_unregister_kbd(void) i--; } } - flush_work_sync(&kgdboc_restore_input_work); + flush_work(&kgdboc_restore_input_work); } #else /* ! CONFIG_KDB_KEYBOARD */ #define kgdboc_register_kbd(x) 0 diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index d3cda0cb2df0..0952d71bdf28 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -1205,7 +1205,7 @@ static int serial_omap_suspend(struct device *dev) if (up) { uart_suspend_port(&serial_omap_reg, &up->port); - flush_work_sync(&up->qos_work); + flush_work(&up->qos_work); } return 0; diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 6f99c9959f0c..ac5be812dbe3 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -523,9 +523,9 @@ static int tty_ldisc_halt(struct tty_struct *tty) */ static void tty_ldisc_flush_works(struct tty_struct *tty) { - flush_work_sync(&tty->hangup_work); - flush_work_sync(&tty->SAK_work); - flush_work_sync(&tty->buf.work); + flush_work(&tty->hangup_work); + flush_work(&tty->SAK_work); + flush_work(&tty->buf.work); } /** diff --git a/drivers/usb/atm/speedtch.c b/drivers/usb/atm/speedtch.c index 975e9c6691d6..807627b36cc8 100644 --- a/drivers/usb/atm/speedtch.c +++ b/drivers/usb/atm/speedtch.c @@ -718,7 +718,7 @@ static void speedtch_atm_stop(struct usbatm_data *usbatm, struct atm_dev *atm_de del_timer_sync(&instance->resubmit_timer); usb_free_urb(int_urb); - flush_work_sync(&instance->status_check_work); + flush_work(&instance->status_check_work); } static int speedtch_pre_reset(struct usb_interface *intf) diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index d7e422dc0ef7..9d1a044504e8 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -2234,7 +2234,7 @@ static void uea_stop(struct uea_softc *sc) usb_free_urb(sc->urb_int); /* flush the work item, when no one can schedule it */ - flush_work_sync(&sc->task); + flush_work(&sc->task); release_firmware(sc->dsp_firm); uea_leaves(INS_TO_USBDEV(sc)); diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index 90e82e288eb9..f1e5fbf5c2c7 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -834,7 +834,7 @@ void gether_cleanup(void) return; unregister_netdev(the_dev->net); - flush_work_sync(&the_dev->work); + flush_work(&the_dev->work); free_netdev(the_dev->net); the_dev = NULL; diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 2b1e8d84c873..2364098ea83c 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -893,7 +893,7 @@ static void ohci_stop (struct usb_hcd *hcd) ohci_dump (ohci, 1); if (quirk_nec(ohci)) - flush_work_sync(&ohci->nec_work); + flush_work(&ohci->nec_work); ohci_usb_reset (ohci); ohci_writel (ohci, OHCI_INTR_MIE, &ohci->regs->intrdisable); diff --git a/drivers/usb/otg/isp1301_omap.c b/drivers/usb/otg/isp1301_omap.c index 7a88667742b6..720df35561c9 100644 --- a/drivers/usb/otg/isp1301_omap.c +++ b/drivers/usb/otg/isp1301_omap.c @@ -1230,7 +1230,7 @@ static int __exit isp1301_remove(struct i2c_client *i2c) isp->timer.data = 0; set_bit(WORK_STOP, &isp->todo); del_timer_sync(&isp->timer); - flush_work_sync(&isp->work); + flush_work(&isp->work); put_device(&i2c->dev); the_transceiver = NULL; diff --git a/fs/affs/super.c b/fs/affs/super.c index c70f1e5fc024..022cecb0757d 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -551,7 +551,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) return -EINVAL; } - flush_delayed_work_sync(&sbi->sb_work); + flush_delayed_work(&sbi->sb_work); replace_mount_options(sb, new_opts); sbi->s_flags = mount_flags; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 4a38db739ca0..0fb6539b0c8c 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1289,7 +1289,7 @@ static void gdlm_unmount(struct gfs2_sbd *sdp) spin_lock(&ls->ls_recover_spin); set_bit(DFL_UNMOUNT, &ls->ls_recover_flags); spin_unlock(&ls->ls_recover_spin); - flush_delayed_work_sync(&sdp->sd_control_work); + flush_delayed_work(&sdp->sd_control_work); /* mounted_lock and control_lock will be purged in dlm recovery */ release: diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index fc3168f47a14..867700aba536 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1572,7 +1572,7 @@ out: clear_inode(inode); gfs2_dir_hash_inval(ip); ip->i_gl->gl_object = NULL; - flush_delayed_work_sync(&ip->i_gl->gl_work); + flush_delayed_work(&ip->i_gl->gl_work); gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put(ip->i_gl); ip->i_gl = NULL; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index ee1bc55677f1..553909395270 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -644,7 +644,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, /* sync the superblock to buffers */ sb = inode->i_sb; - flush_delayed_work_sync(&HFS_SB(sb)->mdb_work); + flush_delayed_work(&HFS_SB(sb)->mdb_work); /* .. finally sync the buffers to disk */ err = sync_blockdev(sb->s_bdev); if (!ret) diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 333df07ae3bd..eaa74323663a 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -314,11 +314,11 @@ static void ncp_stop_tasks(struct ncp_server *server) { release_sock(sk); del_timer_sync(&server->timeout_tm); - flush_work_sync(&server->rcv.tq); + flush_work(&server->rcv.tq); if (sk->sk_socket->type == SOCK_STREAM) - flush_work_sync(&server->tx.tq); + flush_work(&server->tx.tq); else - flush_work_sync(&server->timeout_tq); + flush_work(&server->timeout_tq); } static int ncp_show_options(struct seq_file *seq, struct dentry *root) diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index 8f9cea1597af..c19897d0fe14 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c @@ -327,5 +327,5 @@ void o2quo_exit(void) { struct o2quo_state *qs = &o2quo_state; - flush_work_sync(&qs->qs_work); + flush_work(&qs->qs_work); } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index bdaf4cb9f4a2..e8e6be439bcd 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -953,7 +953,7 @@ xfs_fs_sync_fs( * We schedule xfssyncd now (now that the disk is * active) instead of later (when it might not be). */ - flush_delayed_work_sync(&mp->m_sync_work); + flush_delayed_work(&mp->m_sync_work); } return 0; diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 96548176db80..9500caf15acf 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c @@ -475,7 +475,7 @@ xfs_flush_inodes( struct xfs_mount *mp = ip->i_mount; queue_work(xfs_syncd_wq, &mp->m_flush_work); - flush_work_sync(&mp->m_flush_work); + flush_work(&mp->m_flush_work); } STATIC void diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 855fcdaa2d72..a351be7c3e91 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -460,13 +460,13 @@ static inline bool __cancel_delayed_work(struct delayed_work *work) } /* used to be different but now identical to flush_work(), deprecated */ -static inline bool flush_work_sync(struct work_struct *work) +static inline bool __deprecated flush_work_sync(struct work_struct *work) { return flush_work(work); } /* used to be different but now identical to flush_delayed_work(), deprecated */ -static inline bool flush_delayed_work_sync(struct delayed_work *dwork) +static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwork) { return flush_delayed_work(dwork); } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 6449bae15702..505f0ce3f10b 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1083,7 +1083,7 @@ int p9_trans_fd_init(void) void p9_trans_fd_exit(void) { - flush_work_sync(&p9_poll_work); + flush_work(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 88e7c2f3fa0d..45295ca09571 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -370,7 +370,7 @@ static int dsa_remove(struct platform_device *pdev) if (dst->link_poll_needed) del_timer_sync(&dst->link_poll_timer); - flush_work_sync(&dst->link_poll_work); + flush_work(&dst->link_poll_work); for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; diff --git a/sound/i2c/other/ak4113.c b/sound/i2c/other/ak4113.c index dde5c9c92132..ef68d710d08c 100644 --- a/sound/i2c/other/ak4113.c +++ b/sound/i2c/other/ak4113.c @@ -141,7 +141,7 @@ void snd_ak4113_reinit(struct ak4113 *chip) { chip->init = 1; mb(); - flush_delayed_work_sync(&chip->work); + flush_delayed_work(&chip->work); ak4113_init_regs(chip); /* bring up statistics / event queing */ chip->init = 0; diff --git a/sound/i2c/other/ak4114.c b/sound/i2c/other/ak4114.c index fdf3c1b65e38..816e7d225fb0 100644 --- a/sound/i2c/other/ak4114.c +++ b/sound/i2c/other/ak4114.c @@ -154,7 +154,7 @@ void snd_ak4114_reinit(struct ak4114 *chip) { chip->init = 1; mb(); - flush_delayed_work_sync(&chip->work); + flush_delayed_work(&chip->work); ak4114_init_regs(chip); /* bring up statistics / event queing */ chip->init = 0; diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c index ab8738e21ad1..e9fa2d07951d 100644 --- a/sound/pci/oxygen/oxygen_lib.c +++ b/sound/pci/oxygen/oxygen_lib.c @@ -573,8 +573,8 @@ static void oxygen_card_free(struct snd_card *card) oxygen_shutdown(chip); if (chip->irq >= 0) free_irq(chip->irq, chip); - flush_work_sync(&chip->spdif_input_bits_work); - flush_work_sync(&chip->gpio_work); + flush_work(&chip->spdif_input_bits_work); + flush_work(&chip->gpio_work); chip->model.cleanup(chip); kfree(chip->model_data); mutex_destroy(&chip->mutex); @@ -751,8 +751,8 @@ static int oxygen_pci_suspend(struct device *dev) spin_unlock_irq(&chip->reg_lock); synchronize_irq(chip->irq); - flush_work_sync(&chip->spdif_input_bits_work); - flush_work_sync(&chip->gpio_work); + flush_work(&chip->spdif_input_bits_work); + flush_work(&chip->gpio_work); chip->interrupt_mask = saved_interrupt_mask; pci_disable_device(pci); diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index d26c8ae4e6d9..a4cae060bf26 100644 --- a/sound/soc/codecs/wm8350.c +++ b/sound/soc/codecs/wm8350.c @@ -1601,7 +1601,7 @@ static int wm8350_codec_remove(struct snd_soc_codec *codec) /* if there was any work waiting then we run it now and * wait for its completion */ - flush_delayed_work_sync(&codec->dapm.delayed_work); + flush_delayed_work(&codec->dapm.delayed_work); wm8350_set_bias_level(codec, SND_SOC_BIAS_OFF); diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c index 13bff87ddcf5..2e4a775ae560 100644 --- a/sound/soc/codecs/wm8753.c +++ b/sound/soc/codecs/wm8753.c @@ -1509,7 +1509,7 @@ static int wm8753_probe(struct snd_soc_codec *codec) /* power down chip */ static int wm8753_remove(struct snd_soc_codec *codec) { - flush_delayed_work_sync(&codec->dapm.delayed_work); + flush_delayed_work(&codec->dapm.delayed_work); wm8753_set_bias_level(codec, SND_SOC_BIAS_OFF); return 0; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index f219b2f7ee68..f6eec782c61e 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -591,7 +591,7 @@ int snd_soc_suspend(struct device *dev) /* close any waiting streams and save state */ for (i = 0; i < card->num_rtd; i++) { - flush_delayed_work_sync(&card->rtd[i].delayed_work); + flush_delayed_work(&card->rtd[i].delayed_work); card->rtd[i].codec->dapm.suspend_bias_level = card->rtd[i].codec->dapm.bias_level; } @@ -1846,7 +1846,7 @@ static int soc_cleanup_card_resources(struct snd_soc_card *card) /* make sure any delayed work runs */ for (i = 0; i < card->num_rtd; i++) { struct snd_soc_pcm_runtime *rtd = &card->rtd[i]; - flush_delayed_work_sync(&rtd->delayed_work); + flush_delayed_work(&rtd->delayed_work); } /* remove auxiliary devices */ @@ -1890,7 +1890,7 @@ int snd_soc_poweroff(struct device *dev) * now, we're shutting down so no imminent restart. */ for (i = 0; i < card->num_rtd; i++) { struct snd_soc_pcm_runtime *rtd = &card->rtd[i]; - flush_delayed_work_sync(&rtd->delayed_work); + flush_delayed_work(&rtd->delayed_work); } snd_soc_dapm_shutdown(card); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 7d7e2aaffece..67a35e90384c 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -90,7 +90,7 @@ irqfd_shutdown(struct work_struct *work) * We know no new events will be scheduled at this point, so block * until all previously outstanding events have completed */ - flush_work_sync(&irqfd->inject); + flush_work(&irqfd->inject); /* * It is now safe to release the object's resources -- cgit v1.2.3 From 73e8712aa02d924844fbd5bd84a2445a1c3f68d7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 20 Aug 2012 14:10:00 +0300 Subject: UBIFS: remove stale commentary Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index c3fa6c5327a3..71a197f0f93d 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1157,9 +1157,6 @@ static int check_free_space(struct ubifs_info *c) * * This function mounts UBIFS file system. Returns zero in case of success and * a negative error code in case of failure. - * - * Note, the function does not de-allocate resources it it fails half way - * through, and the caller has to do this instead. */ static int mount_ubifs(struct ubifs_info *c) { -- cgit v1.2.3 From 11e3be0be2a1314e0861304857e7efcaed5d3e54 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 20 Aug 2012 15:16:24 +0300 Subject: UBIFS: fix crash on error path This patch fixes a regression introduced by "4994297 UBIFS: make ubifs_lpt_init clean-up in case of failure" which I've hit while running the 'integck -p' test. When remount the file-system from R/O mode to R/W mode and 'lpt_init_wr()' fails, we free _all_ LPT resources by calling 'ubifs_lpt_free(c, 0)', even those needed for R/O mode. This leads to subsequent crashes, e.g., if we try to unmount the file-system. Cc: stable@vger.kernel.org [v3.5+] Signed-off-by: Artem Bityutskiy --- fs/ubifs/lpt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index ce33b2beb151..8640920766ed 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1749,7 +1749,10 @@ int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) return 0; out_err: - ubifs_lpt_free(c, 0); + if (wr) + ubifs_lpt_free(c, 1); + if (rd) + ubifs_lpt_free(c, 0); return err; } -- cgit v1.2.3 From c212f4020de7b5d35a71327d1483120a698d60a0 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 21 Aug 2012 13:45:35 +0300 Subject: UBIFS: fix replay regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit "d51f17e UBIFS: simplify reply code a bit" introduces a bug with the following symptoms: UBIFS error (pid 1): replay_log_leb: first CS node at LEB 3:0 has wrong commit number 0 expected 1 The issue is that we start replaying the log from UBIFS_LOG_LNUM instead of c->lhead_lnum. This patch fixes that. Reported-by: Uwe Kleine-König Signed-off-by: Artem Bityutskiy --- fs/ubifs/replay.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index eba46d4a7619..94d78fc5d4e0 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -1026,7 +1026,6 @@ int ubifs_replay_journal(struct ubifs_info *c) c->replaying = 1; lnum = c->ltail_lnum = c->lhead_lnum; - lnum = UBIFS_LOG_LNUM; do { err = replay_log_leb(c, lnum, 0, c->sbuf); if (err == 1) @@ -1035,7 +1034,7 @@ int ubifs_replay_journal(struct ubifs_info *c) if (err) goto out; lnum = ubifs_next_log_lnum(c, lnum); - } while (lnum != UBIFS_LOG_LNUM); + } while (lnum != c->ltail_lnum); err = replay_buds(c); if (err) -- cgit v1.2.3 From 6c5e50fa614fea5325a2973be06f7ec6f1055316 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 21 Aug 2012 15:55:25 -0700 Subject: ceph: tolerate (and warn on) extraneous dentry from mds If the MDS gives us a dentry and we weren't prepared to handle it, WARN_ON_ONCE instead of crashing. Reported-by: Yan, Zheng Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/inode.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 9fff9f3b17e4..4b5762ef7c2b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -992,11 +992,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, if (rinfo->head->is_dentry) { struct inode *dir = req->r_locked_dir; - err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, - session, req->r_request_started, -1, - &req->r_caps_reservation); - if (err < 0) - return err; + if (dir) { + err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, + session, req->r_request_started, -1, + &req->r_caps_reservation); + if (err < 0) + return err; + } else { + WARN_ON_ONCE(1); + } } /* @@ -1004,6 +1008,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, * will have trouble splicing in the virtual snapdir later */ if (rinfo->head->is_dentry && !req->r_aborted && + req->r_locked_dir && (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, fsc->mount_options->snapdir_name, req->r_dentry->d_name.len))) { -- cgit v1.2.3 From 45f2e081f573526977abfa781a12728f83e9641f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 21 Aug 2012 12:11:51 -0700 Subject: ceph: avoid divide by zero in __validate_layout() If "l->stripe_unit" is zero the the mod on the next line will cause a divide by zero bug. This comes from the copy_from_user() in ceph_ioctl_set_layout_policy(). Passing 0 is valid, though (it means "do not change") so avoid the % check in that case. Reported-by: Dan Carpenter Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 8e3fb69fbe62..1396ceb46797 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -42,7 +42,8 @@ static long __validate_layout(struct ceph_mds_client *mdsc, /* validate striping parameters */ if ((l->object_size & ~PAGE_MASK) || (l->stripe_unit & ~PAGE_MASK) || - ((unsigned)l->object_size % (unsigned)l->stripe_unit)) + (l->stripe_unit != 0 && + ((unsigned)l->object_size % (unsigned)l->stripe_unit))) return -EINVAL; /* make sure it's a valid data pool */ -- cgit v1.2.3 From 65b455b123c7e2b835a0b7148f9bae584f95000e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 21 Aug 2012 21:50:58 +0300 Subject: UBIFS: fix complaints about too small debug buffer size When debugging is enabled, we use a temporary on-stack buffer for formatting the key strings like "(11368871, direntry, 0xcd0750)". The buffer size is 32 bytes and sometimes it is not enough to fit the key string - e.g., when inode numbers are high. This is not fatal, but the key strings are incomplete and UBIFS complains like this: UBIFS assert failed in dbg_snprintf_key at 137 (pid 1) This is a regression caused by "515315a UBIFS: fix key printing". Fix the issue by increasing the buffer to 48 bytes. Reported-by: Michael Hench Signed-off-by: Artem Bityutskiy Tested-by: Michael Hench Cc: stable@vger.kernel.org [v3.3+] --- fs/ubifs/debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 8b8cc4e945f4..760de723dadb 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -167,7 +167,7 @@ struct ubifs_global_debug_info { #define ubifs_dbg_msg(type, fmt, ...) \ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) -#define DBG_KEY_BUF_LEN 32 +#define DBG_KEY_BUF_LEN 48 #define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \ -- cgit v1.2.3 From 98022748f6c7bce85b9f123fd4d1a621219dd8d9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Aug 2012 22:42:36 -0400 Subject: eventpoll: use-after-possible-free in epoll_create1() As soon as we'd installed the file into descriptor table, it can get closed by another thread. Freeing ep in process... Signed-off-by: Al Viro --- fs/eventpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1c8b55670804..eedec84c1809 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1654,8 +1654,8 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) error = PTR_ERR(file); goto out_free_fd; } - fd_install(fd, file); ep->file = file; + fd_install(fd, file); return fd; out_free_fd: -- cgit v1.2.3 From 55852635a8e2803cbc22d0e143d727813f0fcdb5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Aug 2012 17:39:25 -0700 Subject: fs: fix fs/namei.c kernel-doc warnings Fix kernel-doc warnings in fs/namei.c: Warning(fs/namei.c:360): No description found for parameter 'inode' Warning(fs/namei.c:672): No description found for parameter 'nd' Signed-off-by: Randy Dunlap Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Al Viro --- fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index db76b866a097..dd1ed1b8e98e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -352,6 +352,7 @@ int __inode_permission(struct inode *inode, int mask) /** * sb_permission - Check superblock-level permissions * @sb: Superblock of inode to check permission on + * @inode: Inode to check permission on * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Separate out file-system wide checks from inode-specific permission checks. @@ -656,6 +657,7 @@ int sysctl_protected_hardlinks __read_mostly = 1; /** * may_follow_link - Check symlink following for unsafe situations * @link: The path of the symlink + * @nd: nameidata pathwalk data * * In the case of the sysctl_protected_symlinks sysctl being enabled, * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is -- cgit v1.2.3 From 69f9025894c391fec2f7c7ea9150203418454915 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 22 Aug 2012 16:47:28 +0300 Subject: UBIFS: fix error messages spelling Corruptio -> corruption. Signed-off-by: Artem Bityutskiy --- fs/ubifs/recovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index c30d976b4be8..edeec499c048 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -788,7 +788,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, corrupted_rescan: /* Re-scan the corrupted data with verbose messages */ - ubifs_err("corruptio %d", ret); + ubifs_err("corruption %d", ret); ubifs_scan_a_node(c, buf, len, lnum, offs, 1); corrupted: ubifs_scanned_corruption(c, lnum, offs, buf); -- cgit v1.2.3 From c45640d87b8272e85d19147dad914a6b82e51370 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 22 Aug 2012 16:37:13 +0300 Subject: UBIFS: print PID in debug messages Signed-off-by: Artem Bityutskiy --- fs/ubifs/debug.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 760de723dadb..46b8dd123585 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -165,12 +165,14 @@ struct ubifs_global_debug_info { } while (0) #define ubifs_dbg_msg(type, fmt, ...) \ - pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) + pr_debug("UBIFS DBG " type " (pid %d): " fmt "\n", current->pid, \ + ##__VA_ARGS__) #define DBG_KEY_BUF_LEN 48 #define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ - pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \ + pr_debug("UBIFS DBG " type " (pid %d): " fmt "%s\n", current->pid, \ + ##__VA_ARGS__, \ dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \ } while (0) -- cgit v1.2.3 From db0f89690639ac1d5f89c0dd9713c2e83dd37281 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 22 Aug 2012 16:55:08 +0300 Subject: UBIFS: always print full error reports Even when we are emulating power cuts, otherwise it is difficult to investigate failures during emulated power cuts testing. Signed-off-by: Artem Bityutskiy --- fs/ubifs/debug.c | 6 ------ fs/ubifs/scan.c | 2 -- 2 files changed, 8 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index bb3167257aab..1aaa9f519485 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -305,9 +305,6 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) const struct ubifs_ch *ch = node; char key_buf[DBG_KEY_BUF_LEN]; - if (dbg_is_tst_rcvry(c)) - return; - /* If the magic is incorrect, just hexdump the first bytes */ if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { printk(KERN_ERR "Not a node, first %zu bytes:", UBIFS_CH_SZ); @@ -882,9 +879,6 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) struct ubifs_scan_node *snod; void *buf; - if (dbg_is_tst_rcvry(c)) - return; - printk(KERN_ERR "(pid %d) start dumping LEB %d\n", current->pid, lnum); diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 7c40e6025fd6..07a49258ccfe 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -240,8 +240,6 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, int len; ubifs_err("corruption at LEB %d:%d", lnum, offs); - if (dbg_is_tst_rcvry(c)) - return; len = c->leb_size - offs; if (len > 8192) len = 8192; -- cgit v1.2.3 From 676ce6d5ca3098339c028d44fe0427d1566a4d2d Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 23 Aug 2012 12:17:36 +0200 Subject: block: replace __getblk_slow misfix by grow_dev_page fix Commit 91f68c89d8f3 ("block: fix infinite loop in __getblk_slow") is not good: a successful call to grow_buffers() cannot guarantee that the page won't be reclaimed before the immediate next call to __find_get_block(), which is why there was always a loop there. Yesterday I got "EXT4-fs error (device loop0): __ext4_get_inode_loc:3595: inode #19278: block 664: comm cc1: unable to read itable block" on console, which pointed to this commit. I've been trying to bisect for weeks, why kbuild-on-ext4-on-loop-on-tmpfs sometimes fails from a missing header file, under memory pressure on ppc G5. I've never seen this on x86, and I've never seen it on 3.5-rc7 itself, despite that commit being in there: bisection pointed to an irrelevant pinctrl merge, but hard to tell when failure takes between 18 minutes and 38 hours (but so far it's happened quicker on 3.6-rc2). (I've since found such __ext4_get_inode_loc errors in /var/log/messages from previous weeks: why the message never appeared on console until yesterday morning is a mystery for another day.) Revert 91f68c89d8f3, restoring __getblk_slow() to how it was (plus a checkpatch nitfix). Simplify the interface between grow_buffers() and grow_dev_page(), and avoid the infinite loop beyond end of device by instead checking init_page_buffers()'s end_block there (I presume that's more efficient than a repeated call to blkdev_max_block()), returning -ENXIO to __getblk_slow() in that case. And remove akpm's ten-year-old "__getblk() cannot fail ... weird" comment, but that is worrying: are all users of __getblk() really now prepared for a NULL bh beyond end of device, or will some oops?? Signed-off-by: Hugh Dickins Cc: stable@vger.kernel.org # 3.0 3.2 3.4 3.5 Signed-off-by: Jens Axboe --- fs/buffer.c | 66 ++++++++++++++++++++++++++++--------------------------------- 1 file changed, 30 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 9f6d2e41281d..58e2e7b77372 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -914,7 +914,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head) /* * Initialise the state of a blockdev page's buffers. */ -static void +static sector_t init_page_buffers(struct page *page, struct block_device *bdev, sector_t block, int size) { @@ -936,33 +936,41 @@ init_page_buffers(struct page *page, struct block_device *bdev, block++; bh = bh->b_this_page; } while (bh != head); + + /* + * Caller needs to validate requested block against end of device. + */ + return end_block; } /* * Create the page-cache page that contains the requested block. * - * This is user purely for blockdev mappings. + * This is used purely for blockdev mappings. */ -static struct page * +static int grow_dev_page(struct block_device *bdev, sector_t block, - pgoff_t index, int size) + pgoff_t index, int size, int sizebits) { struct inode *inode = bdev->bd_inode; struct page *page; struct buffer_head *bh; + sector_t end_block; + int ret = 0; /* Will call free_more_memory() */ page = find_or_create_page(inode->i_mapping, index, (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); if (!page) - return NULL; + return ret; BUG_ON(!PageLocked(page)); if (page_has_buffers(page)) { bh = page_buffers(page); if (bh->b_size == size) { - init_page_buffers(page, bdev, block, size); - return page; + end_block = init_page_buffers(page, bdev, + index << sizebits, size); + goto done; } if (!try_to_free_buffers(page)) goto failed; @@ -982,14 +990,14 @@ grow_dev_page(struct block_device *bdev, sector_t block, */ spin_lock(&inode->i_mapping->private_lock); link_dev_buffers(page, bh); - init_page_buffers(page, bdev, block, size); + end_block = init_page_buffers(page, bdev, index << sizebits, size); spin_unlock(&inode->i_mapping->private_lock); - return page; - +done: + ret = (block < end_block) ? 1 : -ENXIO; failed: unlock_page(page); page_cache_release(page); - return NULL; + return ret; } /* @@ -999,7 +1007,6 @@ failed: static int grow_buffers(struct block_device *bdev, sector_t block, int size) { - struct page *page; pgoff_t index; int sizebits; @@ -1023,22 +1030,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) bdevname(bdev, b)); return -EIO; } - block = index << sizebits; + /* Create a page with the proper size buffers.. */ - page = grow_dev_page(bdev, block, index, size); - if (!page) - return 0; - unlock_page(page); - page_cache_release(page); - return 1; + return grow_dev_page(bdev, block, index, size, sizebits); } static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, int size) { - int ret; - struct buffer_head *bh; - /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { @@ -1051,21 +1050,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) return NULL; } -retry: - bh = __find_get_block(bdev, block, size); - if (bh) - return bh; + for (;;) { + struct buffer_head *bh; + int ret; - ret = grow_buffers(bdev, block, size); - if (ret == 0) { - free_more_memory(); - goto retry; - } else if (ret > 0) { bh = __find_get_block(bdev, block, size); if (bh) return bh; + + ret = grow_buffers(bdev, block, size); + if (ret < 0) + return NULL; + if (ret == 0) + free_more_memory(); } - return NULL; } /* @@ -1321,10 +1319,6 @@ EXPORT_SYMBOL(__find_get_block); * which corresponds to the passed block_device, block and size. The * returned buffer has its reference count incremented. * - * __getblk() cannot fail - it just keeps trying. If you pass it an - * illegal block number, __getblk() will happily return a buffer_head - * which represents the non-existent block. Very weird. - * * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() * attempt is failing. FIXME, perhaps? */ -- cgit v1.2.3 From 4b3e58a6d6c62b5457ff876fdfc79f66fff04cd2 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 23 Aug 2012 17:28:58 +0300 Subject: UBIFS: improve scanning debug output Include LEB number and offset in scanning debugging output. Signed-off-by: Artem Bityutskiy --- fs/ubifs/scan.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 07a49258ccfe..019ca47a1f8e 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -75,7 +75,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, magic = le32_to_cpu(ch->magic); if (magic == 0xFFFFFFFF) { - dbg_scan("hit empty space"); + dbg_scan("hit empty space at LEB %d:%d", lnum, offs); return SCANNED_EMPTY_SPACE; } @@ -85,7 +85,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, if (len < UBIFS_CH_SZ) return SCANNED_GARBAGE; - dbg_scan("scanning %s", dbg_ntype(ch->node_type)); + dbg_scan("scanning %s at LEB %d:%d", dbg_ntype(ch->node_type), lnum, offs); if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) return SCANNED_A_CORRUPT_NODE; @@ -114,8 +114,8 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, return SCANNED_A_BAD_PAD_NODE; } - dbg_scan("%d bytes padded, offset now %d", - pad_len, ALIGN(offs + node_len + pad_len, 8)); + dbg_scan("%d bytes padded at LEB %d:%d, offset now %d", pad_len, + lnum, offs, ALIGN(offs + node_len + pad_len, 8)); return node_len + pad_len; } -- cgit v1.2.3 From 0b9e3f6d84ce619f697bb622d9165cccaa93d67c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 31 Jul 2012 14:55:51 +1000 Subject: xfs: fix uninitialised variable in xfs_rtbuf_get() Results in this assert failure in generic/090: XFS: Assertion failed: *nmap >= 1, file: fs/xfs/xfs_bmap.c, line: 4363 ..... Call Trace: [] xfs_bmapi_read+0x6b/0x370 [] xfs_rtbuf_get+0x42/0x130 [] xfs_rtget_summary+0x89/0x120 [] xfs_rtallocate_extent_size+0xce/0x340 [] xfs_rtallocate_extent+0x240/0x290 [] xfs_bmap_rtalloc+0x1ba/0x340 [] xfs_bmap_alloc+0x35/0x40 [] xfs_bmapi_allocate+0xf1/0x350 [] xfs_bmapi_write+0x66e/0xa60 [] xfs_iomap_write_direct+0x22a/0x3f0 [] __xfs_get_blocks+0x38b/0x5d0 [] xfs_get_blocks_direct+0x14/0x20 [] do_blockdev_direct_IO+0xf71/0x1eb0 [] __blockdev_direct_IO+0x55/0x60 [] xfs_vm_direct_IO+0x11a/0x1e0 [] generic_file_direct_write+0xd7/0x1b0 [] xfs_file_dio_aio_write+0x13c/0x320 [] xfs_file_aio_write+0x1c2/0x1d0 [] do_sync_write+0xa7/0xe0 [] vfs_write+0xa8/0x160 [] sys_pwrite64+0x92/0xb0 [] system_call_fastpath+0x16/0x1b Signed-off-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_rtalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 92d4331cd4f1..ca28a4ba4b54 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -857,7 +857,7 @@ xfs_rtbuf_get( xfs_buf_t *bp; /* block buffer, result */ xfs_inode_t *ip; /* bitmap or summary inode */ xfs_bmbt_irec_t map; - int nmap; + int nmap = 1; int error; /* error value */ ip = issum ? mp->m_rsumip : mp->m_rbmip; -- cgit v1.2.3 From 761290309939743ddf97e2bd94c6da18c6436b79 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Aug 2012 02:02:02 -0400 Subject: xfs: unlock the AGI buffer when looping in xfs_dialloc Also update some commens in the area to make the code easier to read. Signed-off-by: Christoph Hellwig Reviewed-by: Mark Tinguely Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_ialloc.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 21e37b55f7e5..5aceb3f8ecd6 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -962,23 +962,22 @@ xfs_dialloc( if (!pag->pagi_freecount && !okalloc) goto nextag; + /* + * Then read in the AGI buffer and recheck with the AGI buffer + * lock held. + */ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); if (error) goto out_error; - /* - * Once the AGI has been read in we have to recheck - * pagi_freecount with the AGI buffer lock held. - */ if (pag->pagi_freecount) { xfs_perag_put(pag); goto out_alloc; } - if (!okalloc) { - xfs_trans_brelse(tp, agbp); - goto nextag; - } + if (!okalloc) + goto nextag_relse_buffer; + error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); if (error) { @@ -1007,6 +1006,8 @@ xfs_dialloc( return 0; } +nextag_relse_buffer: + xfs_trans_brelse(tp, agbp); nextag: xfs_perag_put(pag); if (++agno == mp->m_sb.sb_agcount) -- cgit v1.2.3 From a672e1be30d5bc848cd0067c55ed29b2015b7c17 Mon Sep 17 00:00:00 2001 From: Tomas Racek Date: Tue, 14 Aug 2012 10:35:04 +0200 Subject: xfs: check for possible overflow in xfs_ioc_trim If range.start or range.minlen is bigger than filesystem size, return invalid value error. This fixes possible overflow in BTOBB macro when passed value was nearly ULLONG_MAX. Signed-off-by: Tomas Racek Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_discard.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index f9c3fe304a17..69cf4fcde03e 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -179,12 +179,14 @@ xfs_ioc_trim( * used by the fstrim application. In the end it really doesn't * matter as trimming blocks is an advisory interface. */ + if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || + range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp))) + return -XFS_ERROR(EINVAL); + start = BTOBB(range.start); end = start + BTOBBT(range.len) - 1; minlen = BTOBB(max_t(u64, granularity, range.minlen)); - if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks) - return -XFS_ERROR(EINVAL); if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1; -- cgit v1.2.3 From e599b3253c5e49f7a2a579eef2fc2aa066989ef5 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Fri, 10 Aug 2012 15:01:51 -0300 Subject: xfs: fix race while discarding buffers [V4] While xfs_buftarg_shrink() is freeing buffers from the dispose list (filled with buffers from lru list), there is a possibility to have xfs_buf_stale() racing with it, and removing buffers from dispose list before xfs_buftarg_shrink() does it. This happens because xfs_buftarg_shrink() handle the dispose list without locking and the test condition in xfs_buf_stale() checks for the buffer being in *any* list: if (!list_empty(&bp->b_lru)) If the buffer happens to be on dispose list, this causes the buffer counter of lru list (btp->bt_lru_nr) to be decremented twice (once in xfs_buftarg_shrink() and another in xfs_buf_stale()) causing a wrong account usage of the lru list. This may cause xfs_buftarg_shrink() to return a wrong value to the memory shrinker shrink_slab(), and such account error may also cause an underflowed value to be returned; since the counter is lower than the current number of items in the lru list, a decrement may happen when the counter is 0, causing an underflow on the counter. The fix uses a new flag field (and a new buffer flag) to serialize buffer handling during the shrink process. The new flag field has been designed to use btp->bt_lru_lock/unlock instead of xfs_buf_lock/unlock mechanism. dchinner, sandeen, aquini and aris also deserve credits for this. Signed-off-by: Carlos Maiolino Reviewed-by: Ben Myers Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 5 ++++- fs/xfs/xfs_buf.h | 41 ++++++++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d7a9dd735e1e..933b7930b863 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -96,6 +96,7 @@ xfs_buf_lru_add( atomic_inc(&bp->b_hold); list_add_tail(&bp->b_lru, &btp->bt_lru); btp->bt_lru_nr++; + bp->b_lru_flags &= ~_XBF_LRU_DISPOSE; } spin_unlock(&btp->bt_lru_lock); } @@ -154,7 +155,8 @@ xfs_buf_stale( struct xfs_buftarg *btp = bp->b_target; spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru)) { + if (!list_empty(&bp->b_lru) && + !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) { list_del_init(&bp->b_lru); btp->bt_lru_nr--; atomic_dec(&bp->b_hold); @@ -1501,6 +1503,7 @@ xfs_buftarg_shrink( */ list_move(&bp->b_lru, &dispose); btp->bt_lru_nr--; + bp->b_lru_flags |= _XBF_LRU_DISPOSE; } spin_unlock(&btp->bt_lru_lock); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d03b73b9604e..7c0b6a0a1557 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -38,27 +38,28 @@ typedef enum { XBRW_ZERO = 3, /* Zero target memory */ } xfs_buf_rw_t; -#define XBF_READ (1 << 0) /* buffer intended for reading from device */ -#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ -#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ -#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ -#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ -#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ +#define XBF_READ (1 << 0) /* buffer intended for reading from device */ +#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ +#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ +#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ +#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ +#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ /* I/O hints for the BIO layer */ -#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ -#define XBF_FUA (1 << 11)/* force cache write through mode */ -#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ +#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ +#define XBF_FUA (1 << 11)/* force cache write through mode */ +#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ /* flags used only as arguments to access routines */ -#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ -#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ +#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ +#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ /* flags used only internally */ -#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ -#define _XBF_KMEM (1 << 21)/* backed by heap memory */ -#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ -#define _XBF_COMPOUND (1 << 23)/* compound buffer */ +#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ +#define _XBF_KMEM (1 << 21)/* backed by heap memory */ +#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ +#define _XBF_COMPOUND (1 << 23)/* compound buffer */ +#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */ typedef unsigned int xfs_buf_flags_t; @@ -72,12 +73,13 @@ typedef unsigned int xfs_buf_flags_t; { XBF_SYNCIO, "SYNCIO" }, \ { XBF_FUA, "FUA" }, \ { XBF_FLUSH, "FLUSH" }, \ - { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ + { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" } + { _XBF_COMPOUND, "COMPOUND" }, \ + { _XBF_LRU_DISPOSE, "LRU_DISPOSE" } typedef struct xfs_buftarg { dev_t bt_dev; @@ -124,7 +126,12 @@ typedef struct xfs_buf { xfs_buf_flags_t b_flags; /* status flags */ struct semaphore b_sema; /* semaphore for lockables */ + /* + * concurrent access to b_lru and b_lru_flags are protected by + * bt_lru_lock and not by b_sema + */ struct list_head b_lru; /* lru list */ + xfs_buf_flags_t b_lru_flags; /* internal lru status flags */ wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; struct xfs_perag *b_pag; /* contains rbtree root */ -- cgit v1.2.3 From 834ab12228fad777a11007a24cb6286b02c9a41c Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Tue, 21 Aug 2012 17:11:45 +0800 Subject: xfs: Remove type argument from xfs_seek_data()/xfs_seek_hole() The type is already indicated by the function naming explicitly, so this argument can be omitted from those calls. Signed-off-by: Jie Liu Reviewed-by: Mark Tinguely Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_file.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 56afcdb2377d..92ba18f841f1 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -962,8 +962,7 @@ xfs_vm_page_mkwrite( STATIC loff_t xfs_seek_data( struct file *file, - loff_t start, - u32 type) + loff_t start) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); @@ -1029,8 +1028,7 @@ out_unlock: STATIC loff_t xfs_seek_hole( struct file *file, - loff_t start, - u32 type) + loff_t start) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); @@ -1092,9 +1090,9 @@ xfs_file_llseek( case SEEK_SET: return generic_file_llseek(file, offset, origin); case SEEK_DATA: - return xfs_seek_data(file, offset, origin); + return xfs_seek_data(file, offset); case SEEK_HOLE: - return xfs_seek_hole(file, offset, origin); + return xfs_seek_hole(file, offset); default: return -EINVAL; } -- cgit v1.2.3 From d126d43f631f996daeee5006714fed914be32368 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Tue, 21 Aug 2012 17:11:57 +0800 Subject: xfs: Introduce a helper routine to probe data or hole offset from page cache Introduce helpers to probe data or hole offset from page cache. Signed-off-by: Jie Liu Reviewed-by: Mark Tinguely Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_file.c | 219 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) (limited to 'fs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 92ba18f841f1..d78a746b6c7c 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -36,6 +36,7 @@ #include #include +#include static const struct vm_operations_struct xfs_file_vm_ops; @@ -959,6 +960,224 @@ xfs_vm_page_mkwrite( return block_page_mkwrite(vma, vmf, xfs_get_blocks); } +/* + * This type is designed to indicate the type of offset we would like + * to search from page cache for either xfs_seek_data() or xfs_seek_hole(). + */ +enum { + HOLE_OFF = 0, + DATA_OFF, +}; + +/* + * Lookup the desired type of offset from the given page. + * + * On success, return true and the offset argument will point to the + * start of the region that was found. Otherwise this function will + * return false and keep the offset argument unchanged. + */ +STATIC bool +xfs_lookup_buffer_offset( + struct page *page, + loff_t *offset, + unsigned int type) +{ + loff_t lastoff = page_offset(page); + bool found = false; + struct buffer_head *bh, *head; + + bh = head = page_buffers(page); + do { + /* + * Unwritten extents that have data in the page + * cache covering them can be identified by the + * BH_Unwritten state flag. Pages with multiple + * buffers might have a mix of holes, data and + * unwritten extents - any buffer with valid + * data in it should have BH_Uptodate flag set + * on it. + */ + if (buffer_unwritten(bh) || + buffer_uptodate(bh)) { + if (type == DATA_OFF) + found = true; + } else { + if (type == HOLE_OFF) + found = true; + } + + if (found) { + *offset = lastoff; + break; + } + lastoff += bh->b_size; + } while ((bh = bh->b_this_page) != head); + + return found; +} + +/* + * This routine is called to find out and return a data or hole offset + * from the page cache for unwritten extents according to the desired + * type for xfs_seek_data() or xfs_seek_hole(). + * + * The argument offset is used to tell where we start to search from the + * page cache. Map is used to figure out the end points of the range to + * lookup pages. + * + * Return true if the desired type of offset was found, and the argument + * offset is filled with that address. Otherwise, return false and keep + * offset unchanged. + */ +STATIC bool +xfs_find_get_desired_pgoff( + struct inode *inode, + struct xfs_bmbt_irec *map, + unsigned int type, + loff_t *offset) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct pagevec pvec; + pgoff_t index; + pgoff_t end; + loff_t endoff; + loff_t startoff = *offset; + loff_t lastoff = startoff; + bool found = false; + + pagevec_init(&pvec, 0); + + index = startoff >> PAGE_CACHE_SHIFT; + endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount); + end = endoff >> PAGE_CACHE_SHIFT; + do { + int want; + unsigned nr_pages; + unsigned int i; + + want = min_t(pgoff_t, end - index, PAGEVEC_SIZE); + nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, + want); + /* + * No page mapped into given range. If we are searching holes + * and if this is the first time we got into the loop, it means + * that the given offset is landed in a hole, return it. + * + * If we have already stepped through some block buffers to find + * holes but they all contains data. In this case, the last + * offset is already updated and pointed to the end of the last + * mapped page, if it does not reach the endpoint to search, + * that means there should be a hole between them. + */ + if (nr_pages == 0) { + /* Data search found nothing */ + if (type == DATA_OFF) + break; + + ASSERT(type == HOLE_OFF); + if (lastoff == startoff || lastoff < endoff) { + found = true; + *offset = lastoff; + } + break; + } + + /* + * At lease we found one page. If this is the first time we + * step into the loop, and if the first page index offset is + * greater than the given search offset, a hole was found. + */ + if (type == HOLE_OFF && lastoff == startoff && + lastoff < page_offset(pvec.pages[0])) { + found = true; + break; + } + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + loff_t b_offset; + + /* + * At this point, the page may be truncated or + * invalidated (changing page->mapping to NULL), + * or even swizzled back from swapper_space to tmpfs + * file mapping. However, page->index will not change + * because we have a reference on the page. + * + * Searching done if the page index is out of range. + * If the current offset is not reaches the end of + * the specified search range, there should be a hole + * between them. + */ + if (page->index > end) { + if (type == HOLE_OFF && lastoff < endoff) { + *offset = lastoff; + found = true; + } + goto out; + } + + lock_page(page); + /* + * Page truncated or invalidated(page->mapping == NULL). + * We can freely skip it and proceed to check the next + * page. + */ + if (unlikely(page->mapping != inode->i_mapping)) { + unlock_page(page); + continue; + } + + if (!page_has_buffers(page)) { + unlock_page(page); + continue; + } + + found = xfs_lookup_buffer_offset(page, &b_offset, type); + if (found) { + /* + * The found offset may be less than the start + * point to search if this is the first time to + * come here. + */ + *offset = max_t(loff_t, startoff, b_offset); + unlock_page(page); + goto out; + } + + /* + * We either searching data but nothing was found, or + * searching hole but found a data buffer. In either + * case, probably the next page contains the desired + * things, update the last offset to it so. + */ + lastoff = page_offset(page) + PAGE_SIZE; + unlock_page(page); + } + + /* + * The number of returned pages less than our desired, search + * done. In this case, nothing was found for searching data, + * but we found a hole behind the last offset. + */ + if (nr_pages < want) { + if (type == HOLE_OFF) { + *offset = lastoff; + found = true; + } + break; + } + + index = pvec.pages[i - 1]->index + 1; + pagevec_release(&pvec); + } while (index <= end); + +out: + pagevec_release(&pvec); + return found; +} + STATIC loff_t xfs_seek_data( struct file *file, -- cgit v1.2.3 From 52f1acc8b56a333fbc7218711c3fa2fb3bf78b92 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Tue, 21 Aug 2012 17:12:07 +0800 Subject: xfs: xfs_seek_data() refinement with unwritten extents check up from page cache xfs_seek_data() refinement with unwritten extents check up from page cache. Signed-off-by: Jie Liu Reviewed-by: Mark Tinguely Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_file.c | 72 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index d78a746b6c7c..3f9107431dfb 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1186,8 +1186,6 @@ xfs_seek_data( struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - struct xfs_bmbt_irec map[2]; - int nmap = 2; loff_t uninitialized_var(offset); xfs_fsize_t isize; xfs_fileoff_t fsbno; @@ -1203,36 +1201,74 @@ xfs_seek_data( goto out_unlock; } - fsbno = XFS_B_TO_FSBT(mp, start); - /* * Try to read extents from the first block indicated * by fsbno to the end block of the file. */ + fsbno = XFS_B_TO_FSBT(mp, start); end = XFS_B_TO_FSB(mp, isize); + for (;;) { + struct xfs_bmbt_irec map[2]; + int nmap = 2; + unsigned int i; - error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, - XFS_BMAPI_ENTIRE); - if (error) - goto out_unlock; + error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, + XFS_BMAPI_ENTIRE); + if (error) + goto out_unlock; - /* - * Treat unwritten extent as data extent since it might - * contains dirty data in page cache. - */ - if (map[0].br_startblock != HOLESTARTBLOCK) { - offset = max_t(loff_t, start, - XFS_FSB_TO_B(mp, map[0].br_startoff)); - } else { + /* No extents at given offset, must be beyond EOF */ + if (nmap == 0) { + error = ENXIO; + goto out_unlock; + } + + for (i = 0; i < nmap; i++) { + offset = max_t(loff_t, start, + XFS_FSB_TO_B(mp, map[i].br_startoff)); + + /* Landed in a data extent */ + if (map[i].br_startblock == DELAYSTARTBLOCK || + (map[i].br_state == XFS_EXT_NORM && + !isnullstartblock(map[i].br_startblock))) + goto out; + + /* + * Landed in an unwritten extent, try to search data + * from page cache. + */ + if (map[i].br_state == XFS_EXT_UNWRITTEN) { + if (xfs_find_get_desired_pgoff(inode, &map[i], + DATA_OFF, &offset)) + goto out; + } + } + + /* + * map[0] is hole or its an unwritten extent but + * without data in page cache. Probably means that + * we are reading after EOF if nothing in map[1]. + */ if (nmap == 1) { error = ENXIO; goto out_unlock; } - offset = max_t(loff_t, start, - XFS_FSB_TO_B(mp, map[1].br_startoff)); + ASSERT(i > 1); + + /* + * Nothing was found, proceed to the next round of search + * if reading offset not beyond or hit EOF. + */ + fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; + start = XFS_FSB_TO_B(mp, fsbno); + if (start >= isize) { + error = ENXIO; + goto out_unlock; + } } +out: if (offset != file->f_pos) file->f_pos = offset; -- cgit v1.2.3 From b686d1f79acb65c6a34473c15fcfa2ee54aed8e2 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Tue, 21 Aug 2012 17:12:18 +0800 Subject: xfs: xfs_seek_hole() refinement with hole searching from page cache for unwritten extents xfs_seek_hole() refinement with hole searching from page cache for unwritten extent. Signed-off-by: Jie Liu Reviewed-by: Mark Tinguely Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_file.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 3f9107431dfb..1eaeb8be3aae 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1289,9 +1289,9 @@ xfs_seek_hole( struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; loff_t uninitialized_var(offset); - loff_t holeoff; xfs_fsize_t isize; xfs_fileoff_t fsbno; + xfs_filblks_t end; uint lock; int error; @@ -1307,21 +1307,77 @@ xfs_seek_hole( } fsbno = XFS_B_TO_FSBT(mp, start); - error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK); - if (error) - goto out_unlock; + end = XFS_B_TO_FSB(mp, isize); + + for (;;) { + struct xfs_bmbt_irec map[2]; + int nmap = 2; + unsigned int i; + + error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, + XFS_BMAPI_ENTIRE); + if (error) + goto out_unlock; + + /* No extents at given offset, must be beyond EOF */ + if (nmap == 0) { + error = ENXIO; + goto out_unlock; + } + + for (i = 0; i < nmap; i++) { + offset = max_t(loff_t, start, + XFS_FSB_TO_B(mp, map[i].br_startoff)); + + /* Landed in a hole */ + if (map[i].br_startblock == HOLESTARTBLOCK) + goto out; + + /* + * Landed in an unwritten extent, try to search hole + * from page cache. + */ + if (map[i].br_state == XFS_EXT_UNWRITTEN) { + if (xfs_find_get_desired_pgoff(inode, &map[i], + HOLE_OFF, &offset)) + goto out; + } + } - holeoff = XFS_FSB_TO_B(mp, fsbno); - if (holeoff <= start) - offset = start; - else { /* - * xfs_bmap_first_unused() could return a value bigger than - * isize if there are no more holes past the supplied offset. + * map[0] contains data or its unwritten but contains + * data in page cache, probably means that we are + * reading after EOF. We should fix offset to point + * to the end of the file(i.e., there is an implicit + * hole at the end of any file). */ - offset = min_t(loff_t, holeoff, isize); + if (nmap == 1) { + offset = isize; + break; + } + + ASSERT(i > 1); + + /* + * Both mappings contains data, proceed to the next round of + * search if the current reading offset not beyond or hit EOF. + */ + fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; + start = XFS_FSB_TO_B(mp, fsbno); + if (start >= isize) { + offset = isize; + break; + } } +out: + /* + * At this point, we must have found a hole. However, the returned + * offset may be bigger than the file size as it may be aligned to + * page boundary for unwritten extents, we need to deal with this + * situation in particular. + */ + offset = min_t(loff_t, offset, isize); if (offset != file->f_pos) file->f_pos = offset; -- cgit v1.2.3 From 38f38657444d15e1a8574eae80ed3de9f501737a Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Thu, 23 Aug 2012 16:53:28 -0400 Subject: xattr: extract simple_xattr code from tmpfs Extract in-memory xattr APIs from tmpfs. Will be used by cgroup. $ size vmlinux.o text data bss dec hex filename 4658782 880729 5195032 10734543 a3cbcf vmlinux.o $ size vmlinux.o text data bss dec hex filename 4658957 880729 5195032 10734718 a3cc7e vmlinux.o v7: - checkpatch warnings fixed - Implement the changes requested by Hugh Dickins: - make simple_xattrs_init and simple_xattrs_free inline - get rid of locking and list reinitialization in simple_xattrs_free, they're not needed v6: - no changes v5: - no changes v4: - move simple_xattrs_free() to fs/xattr.c v3: - in kmem_xattrs_free(), reinitialize the list - use simple_xattr_* prefix - introduce simple_xattr_add() to prevent direct list usage Original-patch-by: Li Zefan Cc: Li Zefan Cc: Hillf Danton Cc: Lennart Poettering Acked-by: Hugh Dickins Signed-off-by: Li Zefan Signed-off-by: Aristeu Rozanski Signed-off-by: Tejun Heo --- fs/xattr.c | 166 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/shmem_fs.h | 3 +- include/linux/xattr.h | 48 +++++++++++++ mm/shmem.c | 171 ++++------------------------------------------- 4 files changed, 229 insertions(+), 159 deletions(-) (limited to 'fs') diff --git a/fs/xattr.c b/fs/xattr.c index 4d45b7189e7e..e17e773517ef 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -791,3 +791,169 @@ EXPORT_SYMBOL(generic_getxattr); EXPORT_SYMBOL(generic_listxattr); EXPORT_SYMBOL(generic_setxattr); EXPORT_SYMBOL(generic_removexattr); + +/* + * Allocate new xattr and copy in the value; but leave the name to callers. + */ +struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) +{ + struct simple_xattr *new_xattr; + size_t len; + + /* wrap around? */ + len = sizeof(*new_xattr) + size; + if (len <= sizeof(*new_xattr)) + return NULL; + + new_xattr = kmalloc(len, GFP_KERNEL); + if (!new_xattr) + return NULL; + + new_xattr->size = size; + memcpy(new_xattr->value, value, size); + return new_xattr; +} + +/* + * xattr GET operation for in-memory/pseudo filesystems + */ +int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, + void *buffer, size_t size) +{ + struct simple_xattr *xattr; + int ret = -ENODATA; + + spin_lock(&xattrs->lock); + list_for_each_entry(xattr, &xattrs->head, list) { + if (strcmp(name, xattr->name)) + continue; + + ret = xattr->size; + if (buffer) { + if (size < xattr->size) + ret = -ERANGE; + else + memcpy(buffer, xattr->value, xattr->size); + } + break; + } + spin_unlock(&xattrs->lock); + return ret; +} + +static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name, + const void *value, size_t size, int flags) +{ + struct simple_xattr *xattr; + struct simple_xattr *new_xattr = NULL; + int err = 0; + + /* value == NULL means remove */ + if (value) { + new_xattr = simple_xattr_alloc(value, size); + if (!new_xattr) + return -ENOMEM; + + new_xattr->name = kstrdup(name, GFP_KERNEL); + if (!new_xattr->name) { + kfree(new_xattr); + return -ENOMEM; + } + } + + spin_lock(&xattrs->lock); + list_for_each_entry(xattr, &xattrs->head, list) { + if (!strcmp(name, xattr->name)) { + if (flags & XATTR_CREATE) { + xattr = new_xattr; + err = -EEXIST; + } else if (new_xattr) { + list_replace(&xattr->list, &new_xattr->list); + } else { + list_del(&xattr->list); + } + goto out; + } + } + if (flags & XATTR_REPLACE) { + xattr = new_xattr; + err = -ENODATA; + } else { + list_add(&new_xattr->list, &xattrs->head); + xattr = NULL; + } +out: + spin_unlock(&xattrs->lock); + if (xattr) { + kfree(xattr->name); + kfree(xattr); + } + return err; + +} + +/* + * xattr SET operation for in-memory/pseudo filesystems + */ +int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, + const void *value, size_t size, int flags) +{ + if (size == 0) + value = ""; /* empty EA, do not remove */ + return __simple_xattr_set(xattrs, name, value, size, flags); +} + +/* + * xattr REMOVE operation for in-memory/pseudo filesystems + */ +int simple_xattr_remove(struct simple_xattrs *xattrs, const char *name) +{ + return __simple_xattr_set(xattrs, name, NULL, 0, XATTR_REPLACE); +} + +static bool xattr_is_trusted(const char *name) +{ + return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); +} + +/* + * xattr LIST operation for in-memory/pseudo filesystems + */ +ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer, + size_t size) +{ + bool trusted = capable(CAP_SYS_ADMIN); + struct simple_xattr *xattr; + size_t used = 0; + + spin_lock(&xattrs->lock); + list_for_each_entry(xattr, &xattrs->head, list) { + size_t len; + + /* skip "trusted." attributes for unprivileged callers */ + if (!trusted && xattr_is_trusted(xattr->name)) + continue; + + len = strlen(xattr->name) + 1; + used += len; + if (buffer) { + if (size < used) { + used = -ERANGE; + break; + } + memcpy(buffer, xattr->name, len); + buffer += len; + } + } + spin_unlock(&xattrs->lock); + + return used; +} + +void simple_xattr_list_add(struct simple_xattrs *xattrs, + struct simple_xattr *new_xattr) +{ + spin_lock(&xattrs->lock); + list_add(&new_xattr->list, &xattrs->head); + spin_unlock(&xattrs->lock); +} diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index bef2cf00b3be..30aa0dc60d75 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -5,6 +5,7 @@ #include #include #include +#include /* inode in-kernel data */ @@ -18,7 +19,7 @@ struct shmem_inode_info { }; struct shared_policy policy; /* NUMA memory alloc policy */ struct list_head swaplist; /* chain of maybes on swap */ - struct list_head xattr_list; /* list of shmem_xattr */ + struct simple_xattrs xattrs; /* list of xattrs */ struct inode vfs_inode; }; diff --git a/include/linux/xattr.h b/include/linux/xattr.h index e5d122031542..2ace7a60316d 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -59,7 +59,9 @@ #ifdef __KERNEL__ +#include #include +#include struct inode; struct dentry; @@ -96,6 +98,52 @@ ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, size_t size, gfp_t flags); int vfs_xattr_cmp(struct dentry *dentry, const char *xattr_name, const char *value, size_t size, gfp_t flags); + +struct simple_xattrs { + struct list_head head; + spinlock_t lock; +}; + +struct simple_xattr { + struct list_head list; + char *name; + size_t size; + char value[0]; +}; + +/* + * initialize the simple_xattrs structure + */ +static inline void simple_xattrs_init(struct simple_xattrs *xattrs) +{ + INIT_LIST_HEAD(&xattrs->head); + spin_lock_init(&xattrs->lock); +} + +/* + * free all the xattrs + */ +static inline void simple_xattrs_free(struct simple_xattrs *xattrs) +{ + struct simple_xattr *xattr, *node; + + list_for_each_entry_safe(xattr, node, &xattrs->head, list) { + kfree(xattr->name); + kfree(xattr); + } +} + +struct simple_xattr *simple_xattr_alloc(const void *value, size_t size); +int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, + void *buffer, size_t size); +int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, + const void *value, size_t size, int flags); +int simple_xattr_remove(struct simple_xattrs *xattrs, const char *name); +ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer, + size_t size); +void simple_xattr_list_add(struct simple_xattrs *xattrs, + struct simple_xattr *new_xattr); + #endif /* __KERNEL__ */ #endif /* _LINUX_XATTR_H */ diff --git a/mm/shmem.c b/mm/shmem.c index d4e184e2a38e..d3752110c8c7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -77,13 +77,6 @@ static struct vfsmount *shm_mnt; /* Symlink up to this size is kmalloc'ed instead of using a swappable page */ #define SHORT_SYMLINK_LEN 128 -struct shmem_xattr { - struct list_head list; /* anchored by shmem_inode_info->xattr_list */ - char *name; /* xattr name */ - size_t size; - char value[0]; -}; - /* * shmem_fallocate and shmem_writepage communicate via inode->i_private * (with i_mutex making sure that it has only one user at a time): @@ -636,7 +629,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) static void shmem_evict_inode(struct inode *inode) { struct shmem_inode_info *info = SHMEM_I(inode); - struct shmem_xattr *xattr, *nxattr; if (inode->i_mapping->a_ops == &shmem_aops) { shmem_unacct_size(info->flags, inode->i_size); @@ -650,10 +642,7 @@ static void shmem_evict_inode(struct inode *inode) } else kfree(info->symlink); - list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) { - kfree(xattr->name); - kfree(xattr); - } + simple_xattrs_free(&info->xattrs); BUG_ON(inode->i_blocks); shmem_free_inode(inode->i_sb); clear_inode(inode); @@ -1377,7 +1366,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode spin_lock_init(&info->lock); info->flags = flags & VM_NORESERVE; INIT_LIST_HEAD(&info->swaplist); - INIT_LIST_HEAD(&info->xattr_list); + simple_xattrs_init(&info->xattrs); cache_no_acl(inode); switch (mode & S_IFMT) { @@ -2059,28 +2048,6 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co * filesystem level, though. */ -/* - * Allocate new xattr and copy in the value; but leave the name to callers. - */ -static struct shmem_xattr *shmem_xattr_alloc(const void *value, size_t size) -{ - struct shmem_xattr *new_xattr; - size_t len; - - /* wrap around? */ - len = sizeof(*new_xattr) + size; - if (len <= sizeof(*new_xattr)) - return NULL; - - new_xattr = kmalloc(len, GFP_KERNEL); - if (!new_xattr) - return NULL; - - new_xattr->size = size; - memcpy(new_xattr->value, value, size); - return new_xattr; -} - /* * Callback for security_inode_init_security() for acquiring xattrs. */ @@ -2090,11 +2057,11 @@ static int shmem_initxattrs(struct inode *inode, { struct shmem_inode_info *info = SHMEM_I(inode); const struct xattr *xattr; - struct shmem_xattr *new_xattr; + struct simple_xattr *new_xattr; size_t len; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - new_xattr = shmem_xattr_alloc(xattr->value, xattr->value_len); + new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len); if (!new_xattr) return -ENOMEM; @@ -2111,91 +2078,12 @@ static int shmem_initxattrs(struct inode *inode, memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN, xattr->name, len); - spin_lock(&info->lock); - list_add(&new_xattr->list, &info->xattr_list); - spin_unlock(&info->lock); + simple_xattr_list_add(&info->xattrs, new_xattr); } return 0; } -static int shmem_xattr_get(struct dentry *dentry, const char *name, - void *buffer, size_t size) -{ - struct shmem_inode_info *info; - struct shmem_xattr *xattr; - int ret = -ENODATA; - - info = SHMEM_I(dentry->d_inode); - - spin_lock(&info->lock); - list_for_each_entry(xattr, &info->xattr_list, list) { - if (strcmp(name, xattr->name)) - continue; - - ret = xattr->size; - if (buffer) { - if (size < xattr->size) - ret = -ERANGE; - else - memcpy(buffer, xattr->value, xattr->size); - } - break; - } - spin_unlock(&info->lock); - return ret; -} - -static int shmem_xattr_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - struct shmem_inode_info *info = SHMEM_I(inode); - struct shmem_xattr *xattr; - struct shmem_xattr *new_xattr = NULL; - int err = 0; - - /* value == NULL means remove */ - if (value) { - new_xattr = shmem_xattr_alloc(value, size); - if (!new_xattr) - return -ENOMEM; - - new_xattr->name = kstrdup(name, GFP_KERNEL); - if (!new_xattr->name) { - kfree(new_xattr); - return -ENOMEM; - } - } - - spin_lock(&info->lock); - list_for_each_entry(xattr, &info->xattr_list, list) { - if (!strcmp(name, xattr->name)) { - if (flags & XATTR_CREATE) { - xattr = new_xattr; - err = -EEXIST; - } else if (new_xattr) { - list_replace(&xattr->list, &new_xattr->list); - } else { - list_del(&xattr->list); - } - goto out; - } - } - if (flags & XATTR_REPLACE) { - xattr = new_xattr; - err = -ENODATA; - } else { - list_add(&new_xattr->list, &info->xattr_list); - xattr = NULL; - } -out: - spin_unlock(&info->lock); - if (xattr) - kfree(xattr->name); - kfree(xattr); - return err; -} - static const struct xattr_handler *shmem_xattr_handlers[] = { #ifdef CONFIG_TMPFS_POSIX_ACL &generic_acl_access_handler, @@ -2226,6 +2114,7 @@ static int shmem_xattr_validate(const char *name) static ssize_t shmem_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) { + struct shmem_inode_info *info = SHMEM_I(dentry->d_inode); int err; /* @@ -2240,12 +2129,13 @@ static ssize_t shmem_getxattr(struct dentry *dentry, const char *name, if (err) return err; - return shmem_xattr_get(dentry, name, buffer, size); + return simple_xattr_get(&info->xattrs, name, buffer, size); } static int shmem_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { + struct shmem_inode_info *info = SHMEM_I(dentry->d_inode); int err; /* @@ -2260,15 +2150,12 @@ static int shmem_setxattr(struct dentry *dentry, const char *name, if (err) return err; - if (size == 0) - value = ""; /* empty EA, do not remove */ - - return shmem_xattr_set(dentry->d_inode, name, value, size, flags); - + return simple_xattr_set(&info->xattrs, name, value, size, flags); } static int shmem_removexattr(struct dentry *dentry, const char *name) { + struct shmem_inode_info *info = SHMEM_I(dentry->d_inode); int err; /* @@ -2283,45 +2170,13 @@ static int shmem_removexattr(struct dentry *dentry, const char *name) if (err) return err; - return shmem_xattr_set(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); -} - -static bool xattr_is_trusted(const char *name) -{ - return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); + return simple_xattr_remove(&info->xattrs, name); } static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) { - bool trusted = capable(CAP_SYS_ADMIN); - struct shmem_xattr *xattr; - struct shmem_inode_info *info; - size_t used = 0; - - info = SHMEM_I(dentry->d_inode); - - spin_lock(&info->lock); - list_for_each_entry(xattr, &info->xattr_list, list) { - size_t len; - - /* skip "trusted." attributes for unprivileged callers */ - if (!trusted && xattr_is_trusted(xattr->name)) - continue; - - len = strlen(xattr->name) + 1; - used += len; - if (buffer) { - if (size < used) { - used = -ERANGE; - break; - } - memcpy(buffer, xattr->name, len); - buffer += len; - } - } - spin_unlock(&info->lock); - - return used; + struct shmem_inode_info *info = SHMEM_I(dentry->d_inode); + return simple_xattr_list(&info->xattrs, buffer, size); } #endif /* CONFIG_TMPFS_XATTR */ -- cgit v1.2.3 From 82aceae4f0d42f03d9ad7d1e90389e731153898f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 27 Aug 2012 13:32:15 -0700 Subject: debugfs: more tightly restrict default mount mode Since the debugfs is mostly only used by root, make the default mount mode 0700. Most system owners do not need a more permissive value, but they can choose to weaken the restrictions via their fstab. Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/debugfs.txt | 4 ++-- fs/debugfs/inode.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt index 7a34f827989c..3a863f692728 100644 --- a/Documentation/filesystems/debugfs.txt +++ b/Documentation/filesystems/debugfs.txt @@ -15,8 +15,8 @@ Debugfs is typically mounted with a command like: mount -t debugfs none /sys/kernel/debug (Or an equivalent /etc/fstab line). -The debugfs root directory is accessible by anyone by default. To -restrict access to the tree the "uid", "gid" and "mode" mount +The debugfs root directory is accessible only to the root user by +default. To change access to the tree the "uid", "gid" and "mode" mount options can be used. Note that the debugfs API is exported GPL-only to modules. diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 2c9fafbe8425..6393fd61d5c4 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -28,7 +28,7 @@ #include #include -#define DEBUGFS_DEFAULT_MODE 0755 +#define DEBUGFS_DEFAULT_MODE 0700 static struct vfsmount *debugfs_mount; static int debugfs_mount_count; -- cgit v1.2.3 From aa2ffd06168e25689e0eb9662bf4595ba2bbac14 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 26 Jul 2012 03:40:35 -0600 Subject: Btrfs: fix a misplaced address operator in a condition This should obviously not be "if (&flag)" but "if (flag)". Signed-off-by: Stefan Behrens --- fs/btrfs/locking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index a44eff074805..2a1762c66041 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -67,7 +67,7 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) { if (eb->lock_nested) { read_lock(&eb->lock); - if (&eb->lock_nested && current->pid == eb->lock_owner) { + if (eb->lock_nested && current->pid == eb->lock_owner) { read_unlock(&eb->lock); return; } -- cgit v1.2.3 From 5986802c2fcc754040bb7ed95f30bb16c4a843b7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 30 Jul 2012 02:16:10 -0600 Subject: Btrfs: fix some error codes in btrfs_qgroup_inherit() These are returning zero when it should be returning a negative error code. Signed-off-by: Dan Carpenter --- fs/btrfs/qgroup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index bc424ae5a81a..229ef8927e6b 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1369,8 +1369,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, if (srcid) { srcgroup = find_qgroup_rb(fs_info, srcid); - if (!srcgroup) + if (!srcgroup) { + ret = -EINVAL; goto unlock; + } dstgroup->rfer = srcgroup->rfer - level_size; dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; srcgroup->excl = level_size; @@ -1379,8 +1381,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, qgroup_dirty(fs_info, srcgroup); } - if (!inherit) + if (!inherit) { + ret = -EINVAL; goto unlock; + } i_qgroups = (u64 *)(inherit + 1); for (i = 0; i < inherit->num_qgroups; ++i) { -- cgit v1.2.3 From 57a5a882031dba5cb7bc7ebc955b897498365fe2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 30 Jul 2012 02:15:43 -0600 Subject: Btrfs: checking for NULL instead of IS_ERR add_qgroup_rb() never returns NULL, only error pointers. Signed-off-by: Dan Carpenter --- fs/btrfs/qgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 229ef8927e6b..38b42e7bc91d 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1364,8 +1364,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, spin_lock(&fs_info->qgroup_lock); dstgroup = add_qgroup_rb(fs_info, objectid); - if (!dstgroup) + if (IS_ERR(dstgroup)) { + ret = PTR_ERR(dstgroup); goto unlock; + } if (srcid) { srcgroup = find_qgroup_rb(fs_info, srcid); -- cgit v1.2.3 From 55e591ffde38e0088b022129e035e18a8d04c7e6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 30 Jul 2012 02:15:15 -0600 Subject: Btrfs: unlock on error in btrfs_delalloc_reserve_metadata() We should release this mutex before returning the error code. Signed-off-by: Dan Carpenter --- fs/btrfs/extent-tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4e1b153b7c47..45c69c4184c9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4571,8 +4571,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) if (root->fs_info->quota_enabled) { ret = btrfs_qgroup_reserve(root, num_bytes + nr_extents * root->leafsize); - if (ret) + if (ret) { + mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); return ret; + } } ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); -- cgit v1.2.3 From dadd1105ca9a1e506c678e8e410e9623efdda821 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 30 Jul 2012 02:10:44 -0600 Subject: Btrfs: fix some endian bugs handling the root times "trans->transid" is cpu endian but we want to store the data as little endian. "item->ctime.nsec" is only 32 bits, not 64. Signed-off-by: Dan Carpenter --- fs/btrfs/ioctl.c | 2 +- fs/btrfs/root-tree.c | 4 ++-- fs/btrfs/transaction.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 43f0012016e3..a1fbca0a1003 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -424,7 +424,7 @@ static noinline int create_subvol(struct btrfs_root *root, uuid_le_gen(&new_uuid); memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); - root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec); + root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec); root_item.ctime = root_item.otime; btrfs_set_root_ctransid(&root_item, trans->transid); btrfs_set_root_otransid(&root_item, trans->transid); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 6bb465cca20f..10d8e4d88071 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -544,8 +544,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct timespec ct = CURRENT_TIME; spin_lock(&root->root_times_lock); - item->ctransid = trans->transid; + item->ctransid = cpu_to_le64(trans->transid); item->ctime.sec = cpu_to_le64(ct.tv_sec); - item->ctime.nsec = cpu_to_le64(ct.tv_nsec); + item->ctime.nsec = cpu_to_le32(ct.tv_nsec); spin_unlock(&root->root_times_lock); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7ac7cdcc294e..7208ada41e0e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1061,7 +1061,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, memcpy(new_root_item->parent_uuid, root->root_item.uuid, BTRFS_UUID_SIZE); new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); - new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec); + new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec); btrfs_set_root_otransid(new_root_item, trans->transid); memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); -- cgit v1.2.3 From eb838e73dc2121d2bae47d5678952cd7d48793b5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 31 Jul 2012 16:28:48 -0400 Subject: Btrfs: lock extents as we map them in DIO A deadlock in xfstests 113 was uncovered by commit d187663ef24cd3d033f0cbf2867e70b36a3a90b8 This is because we would not return EIOCBQUEUED for short AIO reads, instead we'd wait for the DIO to complete and then return the amount of data we transferred, which would allow our stuff to unlock the remaning amount. But with this change this no longer happens, so if we have a short AIO read (for example if we try to read past EOF), we could leave the section from EOF to the end of where we tried to read locked. Fixing this is tricky since there is no clear way to know exactly how much data DIO truly submitted for IO, so to make this less hard on ourselves and less combersome we need to lock the extents as we try to map them, and then we unlock any areas we didn't actually map. This makes us completely safe from deadlocks and reliance on a particular behavior of the DIO code. This also lays the groundwork for allowing us to use the normal csum storage method for reads which means we can remove an allocation. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 256 +++++++++++++++++++++++++++---------------------------- 1 file changed, 127 insertions(+), 129 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dac1fc21d809..09182449cbdf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5773,18 +5773,109 @@ out: return ret; } +static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, + struct extent_state **cached_state, int writing) +{ + struct btrfs_ordered_extent *ordered; + int ret = 0; + + while (1) { + lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, + 0, cached_state); + /* + * We're concerned with the entire range that we're going to be + * doing DIO to, so we need to make sure theres no ordered + * extents in this range. + */ + ordered = btrfs_lookup_ordered_range(inode, lockstart, + lockend - lockstart + 1); + + /* + * We need to make sure there are no buffered pages in this + * range either, we could have raced between the invalidate in + * generic_file_direct_write and locking the extent. The + * invalidate needs to happen so that reads after a write do not + * get stale data. + */ + if (!ordered && (!writing || + !test_range_bit(&BTRFS_I(inode)->io_tree, + lockstart, lockend, EXTENT_UPTODATE, 0, + *cached_state))) + break; + + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, + cached_state, GFP_NOFS); + + if (ordered) { + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } else { + /* Screw you mmap */ + ret = filemap_write_and_wait_range(inode->i_mapping, + lockstart, + lockend); + if (ret) + break; + + /* + * If we found a page that couldn't be invalidated just + * fall back to buffered. + */ + ret = invalidate_inode_pages2_range(inode->i_mapping, + lockstart >> PAGE_CACHE_SHIFT, + lockend >> PAGE_CACHE_SHIFT); + if (ret) + break; + } + + cond_resched(); + } + + return ret; +} + static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { struct extent_map *em; struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_state *cached_state = NULL; u64 start = iblock << inode->i_blkbits; + u64 lockstart, lockend; u64 len = bh_result->b_size; struct btrfs_trans_handle *trans; + int unlock_bits = EXTENT_LOCKED; + int ret; + + lockstart = start; + lockend = start + len - 1; + if (create) { + ret = btrfs_delalloc_reserve_space(inode, len); + if (ret) + return ret; + unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; + } + + /* + * If this errors out it's because we couldn't invalidate pagecache for + * this range and we need to fallback to buffered. + */ + if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) + return -ENOTBLK; + + if (create) { + ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, + lockend, EXTENT_DELALLOC, NULL, + &cached_state, GFP_NOFS); + if (ret) + goto unlock_err; + } em = btrfs_get_extent(inode, NULL, 0, start, len, 0); - if (IS_ERR(em)) - return PTR_ERR(em); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto unlock_err; + } /* * Ok for INLINE and COMPRESSED extents we need to fallback on buffered @@ -5803,17 +5894,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || em->block_start == EXTENT_MAP_INLINE) { free_extent_map(em); - return -ENOTBLK; + ret = -ENOTBLK; + goto unlock_err; } /* Just a good old fashioned hole, return */ if (!create && (em->block_start == EXTENT_MAP_HOLE || test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { free_extent_map(em); - /* DIO will do one hole at a time, so just unlock a sector */ - unlock_extent(&BTRFS_I(inode)->io_tree, start, - start + root->sectorsize - 1); - return 0; + ret = 0; + goto unlock_err; } /* @@ -5826,8 +5916,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * */ if (!create) { - len = em->len - (start - em->start); - goto map; + len = min(len, em->len - (start - em->start)); + lockstart = start + len; + goto unlock; } if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || @@ -5859,7 +5950,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, btrfs_end_transaction(trans, root); if (ret) { free_extent_map(em); - return ret; + goto unlock_err; } goto unlock; } @@ -5872,14 +5963,12 @@ must_cow: */ len = bh_result->b_size; em = btrfs_new_extent_direct(inode, em, start, len); - if (IS_ERR(em)) - return PTR_ERR(em); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto unlock_err; + } len = min(len, em->len - (start - em->start)); unlock: - clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, - EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, - 0, NULL, GFP_NOFS); -map: bh_result->b_blocknr = (em->block_start + (start - em->start)) >> inode->i_blkbits; bh_result->b_size = len; @@ -5897,9 +5986,28 @@ map: i_size_write(inode, start + len); } + /* + * In the case of write we need to clear and unlock the entire range, + * in the case of read we need to unlock only the end area that we + * aren't using if there is any left over space. + */ + if (lockstart < lockend) + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, + unlock_bits, 1, 0, &cached_state, GFP_NOFS); + else + free_extent_state(cached_state); + free_extent_map(em); return 0; + +unlock_err: + if (create) + unlock_bits |= EXTENT_DO_ACCOUNTING; + + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, + unlock_bits, 1, 0, &cached_state, GFP_NOFS); + return ret; } struct btrfs_dio_private { @@ -6340,132 +6448,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io out: return retval; } + static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - struct btrfs_ordered_extent *ordered; - struct extent_state *cached_state = NULL; - u64 lockstart, lockend; - ssize_t ret; - int writing = rw & WRITE; - int write_bits = 0; - size_t count = iov_length(iov, nr_segs); if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, - offset, nr_segs)) { + offset, nr_segs)) return 0; - } - - lockstart = offset; - lockend = offset + count - 1; - - if (writing) { - ret = btrfs_delalloc_reserve_space(inode, count); - if (ret) - goto out; - } - while (1) { - lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, - 0, &cached_state); - /* - * We're concerned with the entire range that we're going to be - * doing DIO to, so we need to make sure theres no ordered - * extents in this range. - */ - ordered = btrfs_lookup_ordered_range(inode, lockstart, - lockend - lockstart + 1); - - /* - * We need to make sure there are no buffered pages in this - * range either, we could have raced between the invalidate in - * generic_file_direct_write and locking the extent. The - * invalidate needs to happen so that reads after a write do not - * get stale data. - */ - if (!ordered && (!writing || - !test_range_bit(&BTRFS_I(inode)->io_tree, - lockstart, lockend, EXTENT_UPTODATE, 0, - cached_state))) - break; - - unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, - &cached_state, GFP_NOFS); - - if (ordered) { - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - } else { - /* Screw you mmap */ - ret = filemap_write_and_wait_range(file->f_mapping, - lockstart, - lockend); - if (ret) - goto out; - - /* - * If we found a page that couldn't be invalidated just - * fall back to buffered. - */ - ret = invalidate_inode_pages2_range(file->f_mapping, - lockstart >> PAGE_CACHE_SHIFT, - lockend >> PAGE_CACHE_SHIFT); - if (ret) { - if (ret == -EBUSY) - ret = 0; - goto out; - } - } - - cond_resched(); - } - - /* - * we don't use btrfs_set_extent_delalloc because we don't want - * the dirty or uptodate bits - */ - if (writing) { - write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; - ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, - EXTENT_DELALLOC, NULL, &cached_state, - GFP_NOFS); - if (ret) { - clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, - lockend, EXTENT_LOCKED | write_bits, - 1, 0, &cached_state, GFP_NOFS); - goto out; - } - } - - free_extent_state(cached_state); - cached_state = NULL; - - ret = __blockdev_direct_IO(rw, iocb, inode, + return __blockdev_direct_IO(rw, iocb, inode, BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, 0); - - if (ret < 0 && ret != -EIOCBQUEUED) { - clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, - offset + iov_length(iov, nr_segs) - 1, - EXTENT_LOCKED | write_bits, 1, 0, - &cached_state, GFP_NOFS); - } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { - /* - * We're falling back to buffered, unlock the section we didn't - * do IO on. - */ - clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, - offset + iov_length(iov, nr_segs) - 1, - EXTENT_LOCKED | write_bits, 1, 0, - &cached_state, GFP_NOFS); - } -out: - free_extent_state(cached_state); - return ret; } static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, -- cgit v1.2.3 From 3627bf4503b504077332c13496cb1bd54713bcbb Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Wed, 1 Aug 2012 04:28:01 -0600 Subject: Btrfs: fix that error value is changed by mistake In iterate_inodes_from_logical() the error result from extent_from_logical() is patched by mistake. Typically ENOENT is patched to EINVAL because (-ENOENT & BTRFS_EXTENT_FLAG_TREE_BLOCK) evaluates to true. Signed-off-by: Stefan Behrens --- fs/btrfs/backref.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index a256f3b2a845..ff6475f409d6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1438,10 +1438,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, ret = extent_from_logical(fs_info, logical, path, &found_key); btrfs_release_path(path); - if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) - ret = -EINVAL; if (ret < 0) return ret; + if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) + return -EINVAL; extent_item_pos = logical - found_key.objectid; ret = iterate_extent_inodes(fs_info, found_key.objectid, -- cgit v1.2.3 From aa9ddcd4b5557102fa25695c11904f249b4dec49 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 2 Aug 2012 10:22:20 -0400 Subject: Btrfs: do not use missing devices when showing devname If you do the following mkfs.btrfs /dev/sdb /dev/sdc rmmod btrfs dd if=/dev/zero of=/dev/sdb bs=1M count=1 mount -o degraded /dev/sdc /mnt/btrfs-test the box will panic trying to deref the name for the missing dev since it is the lower numbered devid. So fix show_devname to not use missing devices. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 75ee2c7791f0..2e06f124f284 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1505,6 +1505,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root) while (cur_devices) { head = &cur_devices->devices; list_for_each_entry(dev, head, dev_list) { + if (dev->missing) + continue; if (!first_dev || dev->devid < first_dev->devid) first_dev = dev; } -- cgit v1.2.3 From 99f5944b8477914406173b47b4f261356286730b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 2 Aug 2012 10:23:59 -0400 Subject: Btrfs: do not strdup non existent strings When we close devices we add back empty devices for some reason that escapes me. In the case of a missing dev we don't allocate an rcu_string for it's name, so check to see if the device has a name and if it doesn't don't bother strdup()'ing it. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/volumes.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b8708f994e67..3b394503bd4e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -569,9 +569,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) memcpy(new_device, device, sizeof(*new_device)); /* Safe because we are under uuid_mutex */ - name = rcu_string_strdup(device->name->str, GFP_NOFS); - BUG_ON(device->name && !name); /* -ENOMEM */ - rcu_assign_pointer(new_device->name, name); + if (device->name) { + name = rcu_string_strdup(device->name->str, GFP_NOFS); + BUG_ON(device->name && !name); /* -ENOMEM */ + rcu_assign_pointer(new_device->name, name); + } new_device->bdev = NULL; new_device->writeable = 0; new_device->in_fs_metadata = 0; -- cgit v1.2.3 From c329861da40623cd838b8c9ee31a850242fd88cf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 3 Aug 2012 16:49:19 -0400 Subject: Btrfs: don't allocate a seperate csums array for direct reads We've been allocating a big array for csums instead of storing them in the io_tree like we do for buffered reads because previously we were locking the entire range, so we didn't have an extent state for each sector of the range. But now that we do the range locking as we map the buffers we can limit the mapping lenght to sectorsize and use the private part of the io_tree for our csums. This allows us to avoid an extra memory allocation for direct reads which could incur latency. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 2 +- fs/btrfs/file-item.c | 4 ++-- fs/btrfs/inode.c | 45 ++++++++++++++++----------------------------- 3 files changed, 19 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index adb1cd7ceb9b..348196350bf0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3192,7 +3192,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u32 *dst); int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, - struct bio *bio, u64 logical_offset, u32 *dst); + struct bio *bio, u64 logical_offset); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b45b9de0c21d..857d93cd01dc 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -272,9 +272,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, } int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, - struct bio *bio, u64 offset, u32 *dst) + struct bio *bio, u64 offset) { - return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); + return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1); } int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 09182449cbdf..2d65c52b0944 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5847,15 +5847,18 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, int unlock_bits = EXTENT_LOCKED; int ret; - lockstart = start; - lockend = start + len - 1; if (create) { ret = btrfs_delalloc_reserve_space(inode, len); if (ret) return ret; unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; + } else { + len = min_t(u64, len, root->sectorsize); } + lockstart = start; + lockend = start + len - 1; + /* * If this errors out it's because we couldn't invalidate pagecache for * this range and we need to fallback to buffered. @@ -6015,7 +6018,6 @@ struct btrfs_dio_private { u64 logical_offset; u64 disk_bytenr; u64 bytes; - u32 *csums; void *private; /* number of bios pending for this dio */ @@ -6035,7 +6037,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; u64 start; - u32 *private = dip->csums; start = dip->logical_offset; do { @@ -6043,8 +6044,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct page *page = bvec->bv_page; char *kaddr; u32 csum = ~(u32)0; + u64 private = ~(u32)0; unsigned long flags; + if (get_state_private(&BTRFS_I(inode)->io_tree, + start, &private)) + goto failed; local_irq_save(flags); kaddr = kmap_atomic(page); csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, @@ -6054,18 +6059,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) local_irq_restore(flags); flush_dcache_page(bvec->bv_page); - if (csum != *private) { + if (csum != private) { +failed: printk(KERN_ERR "btrfs csum failed ino %llu off" " %llu csum %u private %u\n", (unsigned long long)btrfs_ino(inode), (unsigned long long)start, - csum, *private); + csum, (unsigned)private); err = -EIO; } } start += bvec->bv_len; - private++; bvec++; } while (bvec <= bvec_end); @@ -6073,7 +6078,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) dip->logical_offset + dip->bytes - 1); bio->bi_private = dip->private; - kfree(dip->csums); kfree(dip); /* If we had a csum failure make sure to clear the uptodate flag */ @@ -6179,7 +6183,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, int rw, u64 file_offset, int skip_sum, - u32 *csums, int async_submit) + int async_submit) { int write = rw & REQ_WRITE; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -6212,8 +6216,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, if (ret) goto err; } else if (!skip_sum) { - ret = btrfs_lookup_bio_sums_dio(root, inode, bio, - file_offset, csums); + ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); if (ret) goto err; } @@ -6239,10 +6242,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, u64 submit_len = 0; u64 map_length; int nr_pages = 0; - u32 *csums = dip->csums; int ret = 0; int async_submit = 0; - int write = rw & REQ_WRITE; map_length = orig_bio->bi_size; ret = btrfs_map_block(map_tree, READ, start_sector << 9, @@ -6278,16 +6279,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, atomic_inc(&dip->pending_bios); ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, - csums, async_submit); + async_submit); if (ret) { bio_put(bio); atomic_dec(&dip->pending_bios); goto out_err; } - /* Write's use the ordered csums */ - if (!write && !skip_sum) - csums = csums + nr_pages; start_sector += submit_len >> 9; file_offset += submit_len; @@ -6317,7 +6315,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, submit: ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, - csums, async_submit); + async_submit); if (!ret) return 0; @@ -6353,17 +6351,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, ret = -ENOMEM; goto free_ordered; } - dip->csums = NULL; - - /* Write's use the ordered csum stuff, so we don't need dip->csums */ - if (!write && !skip_sum) { - dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); - if (!dip->csums) { - kfree(dip); - ret = -ENOMEM; - goto free_ordered; - } - } dip->private = bio->bi_private; dip->inode = inode; -- cgit v1.2.3 From 6209526531e70c080f79318ab8f50e26846c40a8 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Sat, 4 Aug 2012 01:45:02 -0600 Subject: btrfs: fix second lock in btrfs_delete_delayed_items() Fix a real bug caught by coccinelle. fs/btrfs/delayed-inode.c:1013:1-11: second lock on line 1013 Signed-off-by: Fengguang Wu --- fs/btrfs/delayed-inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 335605c8ceab..00deed4ef3ed 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1028,9 +1028,10 @@ do_again: btrfs_release_delayed_item(prev); ret = 0; btrfs_release_path(path); - if (curr) + if (curr) { + mutex_unlock(&node->mutex); goto do_again; - else + } else goto delete_fail; } -- cgit v1.2.3 From 1fa11e265fa2562fb713171b6a58e72bb7afd276 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Mon, 6 Aug 2012 14:18:51 -0600 Subject: Btrfs: fix deadlock in wait_for_more_refs Commit a168650c introduced a waiting mechanism to prevent busy waiting in btrfs_run_delayed_refs. This can deadlock with btrfs_run_ordered_operations, where a tree_mod_seq is held while waiting for the io to complete, while the end_io calls btrfs_run_delayed_refs. This whole mechanism is unnecessary. If not enough runnable refs are available to satisfy count, just return as count is more like a guideline than a strict requirement. In case we have to run all refs, commit transaction makes sure that no other threads are working in the transaction anymore, so we just assert here that no refs are blocked. Signed-off-by: Arne Jansen Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 6 ---- fs/btrfs/ctree.h | 1 - fs/btrfs/delayed-ref.c | 8 ------ fs/btrfs/disk-io.c | 2 -- fs/btrfs/extent-tree.c | 77 ++++++++++++++------------------------------------ 5 files changed, 21 insertions(+), 73 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9d7621f271ff..08e0b11ba0a1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -420,12 +420,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, } spin_unlock(&fs_info->tree_mod_seq_lock); - /* - * we removed the lowest blocker from the blocker list, so there may be - * more processible delayed refs. - */ - wake_up(&fs_info->tree_mod_seq_wait); - /* * anything that's lower than the lowest existing (read: blocked) * sequence number can be removed from the tree. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 348196350bf0..c38734a07a65 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1252,7 +1252,6 @@ struct btrfs_fs_info { atomic_t tree_mod_seq; struct list_head tree_mod_seq_list; struct seq_list tree_mod_seq_elem; - wait_queue_head_t tree_mod_seq_wait; /* this protects tree_mod_log */ rwlock_t tree_mod_log_lock; diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index da7419ed01bb..7561431af50d 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -662,9 +662,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, level, action, for_cow); - if (!need_ref_seq(for_cow, ref_root) && - waitqueue_active(&fs_info->tree_mod_seq_wait)) - wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); if (need_ref_seq(for_cow, ref_root)) btrfs_qgroup_record_ref(trans, &ref->node, extent_op); @@ -713,9 +710,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, owner, offset, action, for_cow); - if (!need_ref_seq(for_cow, ref_root) && - waitqueue_active(&fs_info->tree_mod_seq_wait)) - wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); if (need_ref_seq(for_cow, ref_root)) btrfs_qgroup_record_ref(trans, &ref->node, extent_op); @@ -744,8 +738,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, num_bytes, BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data); - if (waitqueue_active(&fs_info->tree_mod_seq_wait)) - wake_up(&fs_info->tree_mod_seq_wait); spin_unlock(&delayed_refs->lock); return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 502b20c56e84..a7ad8fc8dc53 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2035,8 +2035,6 @@ int open_ctree(struct super_block *sb, fs_info->free_chunk_space = 0; fs_info->tree_mod_log = RB_ROOT; - init_waitqueue_head(&fs_info->tree_mod_seq_wait); - /* readahead state */ INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); spin_lock_init(&fs_info->reada_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 45c69c4184c9..d3df65f83b5c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2318,12 +2318,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ref->in_tree = 0; rb_erase(&ref->rb_node, &delayed_refs->root); delayed_refs->num_entries--; - /* - * we modified num_entries, but as we're currently running - * delayed refs, skip - * wake_up(&delayed_refs->seq_wait); - * here. - */ spin_unlock(&delayed_refs->lock); ret = run_one_delayed_ref(trans, root, ref, extent_op, @@ -2350,22 +2344,6 @@ next: return count; } -static void wait_for_more_refs(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, - unsigned long num_refs, - struct list_head *first_seq) -{ - spin_unlock(&delayed_refs->lock); - pr_debug("waiting for more refs (num %ld, first %p)\n", - num_refs, first_seq); - wait_event(fs_info->tree_mod_seq_wait, - num_refs != delayed_refs->num_entries || - fs_info->tree_mod_seq_list.next != first_seq); - pr_debug("done waiting for more refs (num %ld, first %p)\n", - delayed_refs->num_entries, fs_info->tree_mod_seq_list.next); - spin_lock(&delayed_refs->lock); -} - #ifdef SCRAMBLE_DELAYED_REFS /* * Normally delayed refs get processed in ascending bytenr order. This @@ -2460,13 +2438,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_node *ref; struct list_head cluster; - struct list_head *first_seq = NULL; int ret; u64 delayed_start; int run_all = count == (unsigned long)-1; int run_most = 0; - unsigned long num_refs = 0; - int consider_waiting; + int loops; /* We'll clean this up in btrfs_cleanup_transaction */ if (trans->aborted) @@ -2484,7 +2460,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, delayed_refs = &trans->transaction->delayed_refs; INIT_LIST_HEAD(&cluster); again: - consider_waiting = 0; + loops = 0; spin_lock(&delayed_refs->lock); #ifdef SCRAMBLE_DELAYED_REFS @@ -2512,31 +2488,6 @@ again: if (ret) break; - if (delayed_start >= delayed_refs->run_delayed_start) { - if (consider_waiting == 0) { - /* - * btrfs_find_ref_cluster looped. let's do one - * more cycle. if we don't run any delayed ref - * during that cycle (because we can't because - * all of them are blocked) and if the number of - * refs doesn't change, we avoid busy waiting. - */ - consider_waiting = 1; - num_refs = delayed_refs->num_entries; - first_seq = root->fs_info->tree_mod_seq_list.next; - } else { - wait_for_more_refs(root->fs_info, delayed_refs, - num_refs, first_seq); - /* - * after waiting, things have changed. we - * dropped the lock and someone else might have - * run some refs, built new clusters and so on. - * therefore, we restart staleness detection. - */ - consider_waiting = 0; - } - } - ret = run_clustered_refs(trans, root, &cluster); if (ret < 0) { spin_unlock(&delayed_refs->lock); @@ -2549,9 +2500,26 @@ again: if (count == 0) break; - if (ret || delayed_refs->run_delayed_start == 0) { + if (delayed_start >= delayed_refs->run_delayed_start) { + if (loops == 0) { + /* + * btrfs_find_ref_cluster looped. let's do one + * more cycle. if we don't run any delayed ref + * during that cycle (because we can't because + * all of them are blocked), bail out. + */ + loops = 1; + } else { + /* + * no runnable refs left, stop trying + */ + BUG_ON(run_all); + break; + } + } + if (ret) { /* refs were run, let's reset staleness detection */ - consider_waiting = 0; + loops = 0; } } @@ -5296,9 +5264,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, rb_erase(&head->node.rb_node, &delayed_refs->root); delayed_refs->num_entries--; - smp_mb(); - if (waitqueue_active(&root->fs_info->tree_mod_seq_wait)) - wake_up(&root->fs_info->tree_mod_seq_wait); /* * we don't take a ref on the node because we're removing it from the -- cgit v1.2.3 From 66657b318e0e443ada229fccd40c8be86cfebdbf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 1 Aug 2012 15:36:24 -0400 Subject: Btrfs: barrier before waitqueue_active We need a barrir before calling waitqueue_active otherwise we will miss wakeups. So in places that do atomic_dec(); then atomic_read() use atomic_dec_return() which imply a memory barrier (see memory-barriers.txt) and then add an explicit memory barrier everywhere else that need them. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/compression.c | 1 + fs/btrfs/delayed-inode.c | 7 +++---- fs/btrfs/disk-io.c | 7 ++++--- fs/btrfs/inode.c | 4 +--- fs/btrfs/volumes.c | 3 +-- 5 files changed, 10 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 86eff48dab78..43d1c5a3a030 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -818,6 +818,7 @@ static void free_workspace(int type, struct list_head *workspace) btrfs_compress_op[idx]->free_workspace(workspace); atomic_dec(alloc_workspace); wake: + smp_mb(); if (waitqueue_active(workspace_wait)) wake_up(workspace_wait); } diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 00deed4ef3ed..07d5eeb1e6f1 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -512,8 +512,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) rb_erase(&delayed_item->rb_node, root); delayed_item->delayed_node->count--; - atomic_dec(&delayed_root->items); - if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND && + if (atomic_dec_return(&delayed_root->items) < + BTRFS_DELAYED_BACKGROUND && waitqueue_active(&delayed_root->wait)) wake_up(&delayed_root->wait); } @@ -1056,8 +1056,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node) delayed_node->count--; delayed_root = delayed_node->root->fs_info->delayed_root; - atomic_dec(&delayed_root->items); - if (atomic_read(&delayed_root->items) < + if (atomic_dec_return(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND && waitqueue_active(&delayed_root->wait)) wake_up(&delayed_root->wait); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a7ad8fc8dc53..dd86a5d88428 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -754,9 +754,7 @@ static void run_one_async_done(struct btrfs_work *work) limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; - atomic_dec(&fs_info->nr_async_submits); - - if (atomic_read(&fs_info->nr_async_submits) < limit && + if (atomic_dec_return(&fs_info->nr_async_submits) < limit && waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); @@ -3783,14 +3781,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) /* FIXME: cleanup wait for commit */ t->in_commit = 1; t->blocked = 1; + smp_mb(); if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) wake_up(&root->fs_info->transaction_blocked_wait); t->blocked = 0; + smp_mb(); if (waitqueue_active(&root->fs_info->transaction_wait)) wake_up(&root->fs_info->transaction_wait); t->commit_done = 1; + smp_mb(); if (waitqueue_active(&t->commit_wait)) wake_up(&t->commit_wait); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2d65c52b0944..97baf00b40d1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1007,9 +1007,7 @@ static noinline void async_cow_submit(struct btrfs_work *work) nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT; - atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); - - if (atomic_read(&root->fs_info->async_delalloc_pages) < + if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) < 5 * 1024 * 1024 && waitqueue_active(&root->fs_info->async_submit_wait)) wake_up(&root->fs_info->async_submit_wait); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3b394503bd4e..0b1e69d380dd 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -227,9 +227,8 @@ loop_lock: cur = pending; pending = pending->bi_next; cur->bi_next = NULL; - atomic_dec(&fs_info->nr_async_bios); - if (atomic_read(&fs_info->nr_async_bios) < limit && + if (atomic_dec_return(&fs_info->nr_async_bios) < limit && waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); -- cgit v1.2.3 From 6fc823b10f333313deb0b5d9069cbfd3a3f99f3a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 6 Aug 2012 13:46:38 -0600 Subject: Btrfs: increase the size of the free space cache Arne was complaining about the space cache having mismatching generation numbers when debugging a deadlock. This is because we can run out of space in our preallocated range for our space cache if you have a pretty fragmented amount of space in your pinned space. So just increase the amount of space we preallocate for space cache so we can be sure to have enough space. This will only really affect data ranges since their the only chunks that end up larger than 256MB. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d3df65f83b5c..1bb408f737fb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2975,17 +2975,16 @@ again: } spin_unlock(&block_group->lock); - num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); + /* + * Try to preallocate enough space based on how big the block group is. + * Keep in mind this has to include any pinned space which could end up + * taking up quite a bit since it's not folded into the other space + * cache. + */ + num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024); if (!num_pages) num_pages = 1; - /* - * Just to make absolutely sure we have enough space, we're going to - * preallocate 12 pages worth of space for each block group. In - * practice we ought to use at most 8, but we need extra space so we can - * add our header and have a terminator between the extents and the - * bitmaps. - */ num_pages *= 16; num_pages *= PAGE_CACHE_SIZE; -- cgit v1.2.3 From b12a3b1ea209d9dec02731fba58c3dbe7d31cfd8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 7 Aug 2012 15:34:49 -0400 Subject: Btrfs: don't run __tree_mod_log_free_eb on leaves When we split a leaf, we may end up inserting a new root on top of that leaf. The reflog code was incorrectly assuming the old root was always a node. This makes sure we skip over leaves. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 08e0b11ba0a1..6d183f60d63a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -625,6 +625,9 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) u32 nritems; int ret; + if (btrfs_header_level(eb) == 0) + return; + nritems = btrfs_header_nritems(eb); for (i = nritems - 1; i >= 0; i--) { ret = tree_mod_log_insert_key_locked(fs_info, eb, i, -- cgit v1.2.3 From 22cd2e7de7b0bd68fb668d23e1564707ca689510 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Thu, 9 Aug 2012 00:16:53 -0600 Subject: Btrfs: fix race in run_clustered_refs With commit commit d1270cd91f308c9d22b2804720c36ccd32dbc35e Author: Arne Jansen Date: Tue Sep 13 15:16:43 2011 +0200 Btrfs: put back delayed refs that are too new I added a window where the delayed_ref's head->ref_mod code can diverge from the sum of the remaining refs, because we release the head->mutex in the middle. This leads to btrfs_lookup_extent_info returning wrong numbers. This patch fixes this by adjusting the head's ref_mod with each delayed ref we run. Signed-off-by: Arne Jansen Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1bb408f737fb..f16411d3c252 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2318,6 +2318,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ref->in_tree = 0; rb_erase(&ref->rb_node, &delayed_refs->root); delayed_refs->num_entries--; + if (locked_ref) { + /* + * when we play the delayed ref, also correct the + * ref_mod on head + */ + switch (ref->action) { + case BTRFS_ADD_DELAYED_REF: + case BTRFS_ADD_DELAYED_EXTENT: + locked_ref->node.ref_mod -= ref->ref_mod; + break; + case BTRFS_DROP_DELAYED_REF: + locked_ref->node.ref_mod += ref->ref_mod; + break; + default: + WARN_ON(1); + } + } spin_unlock(&delayed_refs->lock); ret = run_one_delayed_ref(trans, root, ref, extent_op, -- cgit v1.2.3 From c0f62dedd04ae0f3b8a18079db5a015af24e416f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 8 Aug 2012 21:39:36 -0600 Subject: Btrfs: fix wrong mtime and ctime when creating snapshots When we created a new snapshot, the mtime and ctime of its parent directory were not updated. Fix it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7208ada41e0e..3ee8d58e97ad 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1026,6 +1026,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->i_size + dentry->d_name.len * 2); + parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, parent_root, parent_inode); if (ret) goto abort_trans_dput; -- cgit v1.2.3 From 5a24e84c55f57cc49bd1cab531b6ef28b6b7bdaa Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 8 Aug 2012 10:12:59 -0600 Subject: Btrfs: fix enospc problems when deleting a subvol Subvol delete is a special kind of awful where we use the global reserve to cover the ENOSPC requirements. The problem is once we're done removing everything we do a btrfs_update_inode(), which by default will try to do the delayed update stuff which will use it's own reserve. There will be no space in this reserve and we'll return ENOSPC. So instead use btrfs_update_inode_fallback() which will just fallback to updating the inode item in the case of enospc. This is fine because the global reserve covers the space requirements for this. With this patch I can now delete a subvol on a problem image Dave Sterba sent me. Thanks, Reported-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 97baf00b40d1..0808f483dafa 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3171,7 +3171,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, btrfs_i_size_write(dir, dir->i_size - name_len * 2); inode_inc_iversion(dir); dir->i_mtime = dir->i_ctime = CURRENT_TIME; - ret = btrfs_update_inode(trans, root, dir); + ret = btrfs_update_inode_fallback(trans, root, dir); if (ret) btrfs_abort_transaction(trans, root, ret); out: -- cgit v1.2.3 From ae1e206b806ccc490dadff59af8a7a2477b32884 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 7 Aug 2012 16:00:32 -0400 Subject: Btrfs: allow delayed refs to be merged Daniel Blueman reported a bug with fio+balance on a ramdisk setup. Basically what happens is the balance relocates a tree block which will drop the implicit refs for all of its children and adds a full backref. Once the block is relocated we have to add the implicit refs back, so when we cow the block again we add the implicit refs for its children back. The problem comes when the original drop ref doesn't get run before we add the implicit refs back. The delayed ref stuff will specifically prefer ADD operations over DROP to keep us from freeing up an extent that will have references to it, so we try to add the implicit ref before it is actually removed and we panic. This worked fine before because the add would have just canceled the drop out and we would have been fine. But the backref walking work needs to be able to freeze the delayed ref stuff in time so we have this ever increasing sequence number that gets attached to all new delayed ref updates which makes us not merge refs and we run into this issue. So to fix this we need to merge delayed refs. So everytime we run a clustered ref we need to try and merge all of its delayed refs. The backref walking stuff locks the delayed ref head before processing, so if we have it locked we are safe to merge any refs inside of the sequence number. If there is no sequence number we can merge all refs. Doing this not only fixes our bug but keeps the delayed ref code from adding and removing useless refs and batching together multiple refs into one search instead of one search per delayed ref, which will really help our commit times. I ran this with Daniels test and 276 and I haven't seen any problems. Thanks, Reported-by: Daniel J Blueman Signed-off-by: Josef Bacik --- fs/btrfs/delayed-ref.c | 155 ++++++++++++++++++++++++++++++++++++++++--------- fs/btrfs/delayed-ref.h | 4 ++ fs/btrfs/extent-tree.c | 10 ++++ 3 files changed, 142 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 7561431af50d..ae9411773397 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -38,17 +38,14 @@ static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, struct btrfs_delayed_tree_ref *ref1) { - if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { - if (ref1->root < ref2->root) - return -1; - if (ref1->root > ref2->root) - return 1; - } else { - if (ref1->parent < ref2->parent) - return -1; - if (ref1->parent > ref2->parent) - return 1; - } + if (ref1->root < ref2->root) + return -1; + if (ref1->root > ref2->root) + return 1; + if (ref1->parent < ref2->parent) + return -1; + if (ref1->parent > ref2->parent) + return 1; return 0; } @@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2, * type of the delayed backrefs and content of delayed backrefs. */ static int comp_entry(struct btrfs_delayed_ref_node *ref2, - struct btrfs_delayed_ref_node *ref1) + struct btrfs_delayed_ref_node *ref1, + bool compare_seq) { if (ref1->bytenr < ref2->bytenr) return -1; @@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2, if (ref1->type > ref2->type) return 1; /* merging of sequenced refs is not allowed */ - if (ref1->seq < ref2->seq) - return -1; - if (ref1->seq > ref2->seq) - return 1; + if (compare_seq) { + if (ref1->seq < ref2->seq) + return -1; + if (ref1->seq > ref2->seq) + return 1; + } if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), @@ -139,7 +139,7 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, rb_node); - cmp = comp_entry(entry, ins); + cmp = comp_entry(entry, ins, 1); if (cmp < 0) p = &(*p)->rb_left; else if (cmp > 0) @@ -233,6 +233,114 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, return 0; } +static void inline drop_delayed_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_node *ref) +{ + rb_erase(&ref->rb_node, &delayed_refs->root); + ref->in_tree = 0; + btrfs_put_delayed_ref(ref); + delayed_refs->num_entries--; + if (trans->delayed_ref_updates) + trans->delayed_ref_updates--; +} + +static int merge_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_node *ref, u64 seq) +{ + struct rb_node *node; + int merged = 0; + int mod = 0; + int done = 0; + + node = rb_prev(&ref->rb_node); + while (node) { + struct btrfs_delayed_ref_node *next; + + next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); + node = rb_prev(node); + if (next->bytenr != ref->bytenr) + break; + if (seq && next->seq >= seq) + break; + if (comp_entry(ref, next, 0)) + continue; + + if (ref->action == next->action) { + mod = next->ref_mod; + } else { + if (ref->ref_mod < next->ref_mod) { + struct btrfs_delayed_ref_node *tmp; + + tmp = ref; + ref = next; + next = tmp; + done = 1; + } + mod = -next->ref_mod; + } + + merged++; + drop_delayed_ref(trans, delayed_refs, next); + ref->ref_mod += mod; + if (ref->ref_mod == 0) { + drop_delayed_ref(trans, delayed_refs, ref); + break; + } else { + /* + * You can't have multiples of the same ref on a tree + * block. + */ + WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || + ref->type == BTRFS_SHARED_BLOCK_REF_KEY); + } + + if (done) + break; + node = rb_prev(&ref->rb_node); + } + + return merged; +} + +void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head) +{ + struct rb_node *node; + u64 seq = 0; + + spin_lock(&fs_info->tree_mod_seq_lock); + if (!list_empty(&fs_info->tree_mod_seq_list)) { + struct seq_list *elem; + + elem = list_first_entry(&fs_info->tree_mod_seq_list, + struct seq_list, list); + seq = elem->seq; + } + spin_unlock(&fs_info->tree_mod_seq_lock); + + node = rb_prev(&head->node.rb_node); + while (node) { + struct btrfs_delayed_ref_node *ref; + + ref = rb_entry(node, struct btrfs_delayed_ref_node, + rb_node); + if (ref->bytenr != head->node.bytenr) + break; + + /* We can't merge refs that are outside of our seq count */ + if (seq && ref->seq >= seq) + break; + if (merge_ref(trans, delayed_refs, ref, seq)) + node = rb_prev(&head->node.rb_node); + else + node = rb_prev(node); + } +} + int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, u64 seq) @@ -336,18 +444,11 @@ update_existing_ref(struct btrfs_trans_handle *trans, * every changing the extent allocation tree. */ existing->ref_mod--; - if (existing->ref_mod == 0) { - rb_erase(&existing->rb_node, - &delayed_refs->root); - existing->in_tree = 0; - btrfs_put_delayed_ref(existing); - delayed_refs->num_entries--; - if (trans->delayed_ref_updates) - trans->delayed_ref_updates--; - } else { + if (existing->ref_mod == 0) + drop_delayed_ref(trans, delayed_refs, existing); + else WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || existing->type == BTRFS_SHARED_BLOCK_REF_KEY); - } } else { WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || existing->type == BTRFS_SHARED_BLOCK_REF_KEY); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 0d7c90c366b6..ab5300595847 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -167,6 +167,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, struct btrfs_delayed_extent_op *extent_op); +void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head); struct btrfs_delayed_ref_head * btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f16411d3c252..ba58024d40d3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2251,6 +2251,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, } } + /* + * We need to try and merge add/drops of the same ref since we + * can run into issues with relocate dropping the implicit ref + * and then it being added back again before the drop can + * finish. If we merged anything we need to re-loop so we can + * get a good ref. + */ + btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, + locked_ref); + /* * locked_ref is the head node, so we have to go one * node back for any delayed ref updates -- cgit v1.2.3 From 68ce9682a4bb95d6be5529cb57214bf2a1b7d20e Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Wed, 1 Aug 2012 05:45:52 -0600 Subject: Btrfs: remove superblock writing after fatal error With commit acce952b0, btrfs was changed to flag the filesystem with BTRFS_SUPER_FLAG_ERROR and switch to read-only mode after a fatal error happened like a write I/O errors of all mirrors. In such situations, on unmount, the superblock is written in btrfs_error_commit_super(). This is done with the intention to be able to evaluate the error flag on the next mount. A warning is printed in this case during the next mount and the log tree is ignored. The issue is that it is possible that the superblock points to a root that was not written (due to write I/O errors). The result is that the filesystem cannot be mounted. btrfsck also does not start and all the other btrfs-progs tools fail to start as well. However, mount -o recovery is working well and does the right things to recover the filesystem (i.e., don't use the log root, clear the free space cache and use the next mountable root that is stored in the root backup array). This patch removes the writing of the superblock when BTRFS_SUPER_FLAG_ERROR is set, and removes the handling of the error flag in the mount function. These lines can be used to reproduce the issue (using /dev/sdm): SCRATCH_DEV=/dev/sdm SCRATCH_MNT=/mnt echo 0 25165824 linear $SCRATCH_DEV 0 | dmsetup create foo ls -alLF /dev/mapper/foo mkfs.btrfs /dev/mapper/foo mount /dev/mapper/foo $SCRATCH_MNT echo bar > $SCRATCH_MNT/foo sync echo 0 25165824 error | dmsetup reload foo dmsetup resume foo ls -alF $SCRATCH_MNT touch $SCRATCH_MNT/1 ls -alF $SCRATCH_MNT sleep 35 echo 0 25165824 linear $SCRATCH_DEV 0 | dmsetup reload foo dmsetup resume foo sleep 1 umount $SCRATCH_MNT btrfsck /dev/mapper/foo dmsetup remove foo Signed-off-by: Stefan Behrens Signed-off-by: Jan Schmidt --- fs/btrfs/disk-io.c | 36 ++++-------------------------------- fs/btrfs/disk-io.h | 2 +- 2 files changed, 5 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dd86a5d88428..3c4c4397f470 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2527,8 +2527,7 @@ retry_root_backup: goto fail_trans_kthread; /* do not make disk changes in broken FS */ - if (btrfs_super_log_root(disk_super) != 0 && - !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { + if (btrfs_super_log_root(disk_super) != 0) { u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { @@ -3188,30 +3187,14 @@ int close_ctree(struct btrfs_root *root) /* clear out the rbtree of defraggable inodes */ btrfs_run_defrag_inodes(fs_info); - /* - * Here come 2 situations when btrfs is broken to flip readonly: - * - * 1. when btrfs flips readonly somewhere else before - * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, - * and btrfs will skip to write sb directly to keep - * ERROR state on disk. - * - * 2. when btrfs flips readonly just in btrfs_commit_super, - * and in such case, btrfs cannot write sb via btrfs_commit_super, - * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, - * btrfs will cleanup all FS resources first and write sb then. - */ if (!(fs_info->sb->s_flags & MS_RDONLY)) { ret = btrfs_commit_super(root); if (ret) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - ret = btrfs_error_commit_super(root); - if (ret) - printk(KERN_ERR "btrfs: commit super ret %d\n", ret); - } + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) + btrfs_error_commit_super(root); btrfs_put_block_group_cache(fs_info); @@ -3433,18 +3416,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, if (read_only) return 0; - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - printk(KERN_WARNING "warning: mount fs with errors, " - "running btrfsck is recommended\n"); - } - return 0; } -int btrfs_error_commit_super(struct btrfs_root *root) +void btrfs_error_commit_super(struct btrfs_root *root) { - int ret; - mutex_lock(&root->fs_info->cleaner_mutex); btrfs_run_delayed_iputs(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -3454,10 +3430,6 @@ int btrfs_error_commit_super(struct btrfs_root *root) /* cleanup FS via transaction */ btrfs_cleanup_transaction(root); - - ret = write_ctree_super(NULL, root, 0); - - return ret; } static void btrfs_destroy_ordered_operations(struct btrfs_root *root) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 95e147eea239..c5b00a735fef 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -54,7 +54,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, int max_mirrors); struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); int btrfs_commit_super(struct btrfs_root *root); -int btrfs_error_commit_super(struct btrfs_root *root); +void btrfs_error_commit_super(struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, -- cgit v1.2.3 From 5ee0844d6427e7338e0aba748f62b62d07ea2ed0 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Mon, 27 Aug 2012 08:30:03 -0600 Subject: Btrfs: revert checksum error statistic which can cause a BUG() Commit 442a4f6308e694e0fa6025708bd5e4e424bbf51c added btrfs device statistic counters for detected IO and checksum errors to Linux 3.5. The statistic part that counts checksum errors in end_bio_extent_readpage() can cause a BUG() in a subfunction: "kernel BUG at fs/btrfs/volumes.c:3762!" That part is reverted with the current patch. However, the counting of checksum errors in the scrub context remains active, and the counting of detected IO errors (read, write or flush errors) in all contexts remains active. Cc: stable # 3.5 Signed-off-by: Stefan Behrens Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 17 ++--------------- fs/btrfs/volumes.c | 22 ---------------------- fs/btrfs/volumes.h | 2 -- 3 files changed, 2 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e7c9ed6505b..49085f2336d2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2329,23 +2329,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { ret = tree->ops->readpage_end_io_hook(page, start, end, state, mirror); - if (ret) { - /* no IO indicated but software detected errors - * in the block, either checksum errors or - * issues with the contents */ - struct btrfs_root *root = - BTRFS_I(page->mapping->host)->root; - struct btrfs_device *device; - + if (ret) uptodate = 0; - device = btrfs_find_device_for_logical( - root, start, mirror); - if (device) - btrfs_dev_stat_inc_and_print(device, - BTRFS_DEV_STAT_CORRUPTION_ERRS); - } else { + else clean_io_failure(start, page); - } } if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0b1e69d380dd..3f4e70e171ed 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4610,28 +4610,6 @@ int btrfs_read_sys_array(struct btrfs_root *root) return ret; } -struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, - u64 logical, int mirror_num) -{ - struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; - int ret; - u64 map_length = 0; - struct btrfs_bio *bbio = NULL; - struct btrfs_device *device; - - BUG_ON(mirror_num == 0); - ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio, - mirror_num); - if (ret) { - BUG_ON(bbio != NULL); - return NULL; - } - BUG_ON(mirror_num != bbio->mirror_num); - device = bbio->stripes[mirror_num - 1].dev; - kfree(bbio); - return device; -} - int btrfs_read_chunk_tree(struct btrfs_root *root) { struct btrfs_path *path; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5479325987b3..53c06af92e8d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -289,8 +289,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *max_avail); -struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, - u64 logical, int mirror_num); void btrfs_dev_stat_print_on_error(struct btrfs_device *device); void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); int btrfs_get_dev_stats(struct btrfs_root *root, -- cgit v1.2.3 From bd7de2c9a449e26a5493d918618eb20ae60d56bd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 24 Aug 2012 12:53:03 -0600 Subject: Btrfs: fix deadlock with freeze and sync V2 We can deadlock with freeze right now because we unconditionally start a transaction in our ->sync_fs() call. To fix this just check and see if we have a running transaction to commit. This saves us from the deadlock because at this point we'll have the umount sem for the sb so we're safe from freezes coming in after we've done our check. With this patch the freeze xfstests no longer deadlocks. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/super.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2e06f124f284..073c2368f459 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -813,7 +813,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *root = fs_info->tree_root; - int ret; trace_btrfs_sync_fs(wait); @@ -824,11 +823,17 @@ int btrfs_sync_fs(struct super_block *sb, int wait) btrfs_wait_ordered_extents(root, 0, 0); - trans = btrfs_start_transaction(root, 0); + spin_lock(&fs_info->trans_lock); + if (!fs_info->running_transaction) { + spin_unlock(&fs_info->trans_lock); + return 0; + } + spin_unlock(&fs_info->trans_lock); + + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans, root); - return ret; + return btrfs_commit_transaction(trans, root); } static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) -- cgit v1.2.3 From 24c03fa5cf3d02c327cf9f2fc39f72664b1bd7e1 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 22 Aug 2012 20:10:38 -0600 Subject: Btrfs: fix a dio write regression This bug is introduced by commit 3b8bde746f6f9bd36a9f05f5f3b6e334318176a9 (Btrfs: lock extents as we map them in DIO). In dio write, we should unlock the section which we didn't do IO on in case that we fall back to buffered write. But we need to not only unlock the section but also cleanup reserved space for the section. This bug was found while running xfstests 133, with this 133 no longer complains. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0808f483dafa..38cda78de5e4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5992,11 +5992,27 @@ unlock: * in the case of read we need to unlock only the end area that we * aren't using if there is any left over space. */ - if (lockstart < lockend) - clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, - unlock_bits, 1, 0, &cached_state, GFP_NOFS); - else + if (lockstart < lockend) { + if (create && len < lockend - lockstart) { + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, + lockstart + len - 1, unlock_bits, 1, 0, + &cached_state, GFP_NOFS); + /* + * Beside unlock, we also need to cleanup reserved space + * for the left range by attaching EXTENT_DO_ACCOUNTING. + */ + clear_extent_bit(&BTRFS_I(inode)->io_tree, + lockstart + len, lockend, + unlock_bits | EXTENT_DO_ACCOUNTING, + 1, 0, NULL, GFP_NOFS); + } else { + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, + lockend, unlock_bits, 1, 0, + &cached_state, GFP_NOFS); + } + } else { free_extent_state(cached_state); + } free_extent_map(em); -- cgit v1.2.3 From d280e5be940931c84bb2e9831ead9d02bc785484 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 21 Aug 2012 21:13:25 -0600 Subject: Btrfs: fix ordered extent leak when failing to start a transaction We cannot just return error before freeing ordered extent and releasing reserved space when we fail to start a transacion. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 38cda78de5e4..6ba80b902877 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1882,8 +1882,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) trans = btrfs_join_transaction_nolock(root); else trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + trans = NULL; + goto out; + } trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_update_inode_fallback(trans, root, inode); if (ret) /* -ENOMEM or corruption */ -- cgit v1.2.3 From 256dd1bb3750ac5ad49b40887c1691788dc44b33 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Fri, 10 Aug 2012 08:58:21 -0600 Subject: Btrfs: fix that repair code is spuriously executed for transid failures If verify_parent_transid() fails for all mirrors, the current code calls repair_io_failure() anyway which means: - that the disk block is rewritten without repairing anything and - that a kernel log message is printed which misleadingly claims that a read error was corrected. This is an example: parent transid verify failed on 615015833600 wanted 110423 found 110424 parent transid verify failed on 615015833600 wanted 110423 found 110424 btrfs read error corrected: ino 1 off 615015833600 (dev /dev/...) It is wrong to ignore the results from verify_parent_transid() and to call repair_eb_io_failure() when the verification of the transids failed. This commit fixes the issue. Signed-off-by: Stefan Behrens Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3c4c4397f470..29c69e60d3b0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -377,9 +377,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, ret = read_extent_buffer_pages(io_tree, eb, start, WAIT_COMPLETE, btree_get_extent, mirror_num); - if (!ret && !verify_parent_transid(io_tree, eb, + if (!ret) { + if (!verify_parent_transid(io_tree, eb, parent_transid, 0)) - break; + break; + else + ret = -EIO; + } /* * This buffer's crc is fine, but its contents are corrupted, so -- cgit v1.2.3 From 6fb8a90aa3f2319a25f3396b1e9273300f8903b8 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Fri, 10 Aug 2012 15:01:51 -0300 Subject: xfs: fix race while discarding buffers [V4] While xfs_buftarg_shrink() is freeing buffers from the dispose list (filled with buffers from lru list), there is a possibility to have xfs_buf_stale() racing with it, and removing buffers from dispose list before xfs_buftarg_shrink() does it. This happens because xfs_buftarg_shrink() handle the dispose list without locking and the test condition in xfs_buf_stale() checks for the buffer being in *any* list: if (!list_empty(&bp->b_lru)) If the buffer happens to be on dispose list, this causes the buffer counter of lru list (btp->bt_lru_nr) to be decremented twice (once in xfs_buftarg_shrink() and another in xfs_buf_stale()) causing a wrong account usage of the lru list. This may cause xfs_buftarg_shrink() to return a wrong value to the memory shrinker shrink_slab(), and such account error may also cause an underflowed value to be returned; since the counter is lower than the current number of items in the lru list, a decrement may happen when the counter is 0, causing an underflow on the counter. The fix uses a new flag field (and a new buffer flag) to serialize buffer handling during the shrink process. The new flag field has been designed to use btp->bt_lru_lock/unlock instead of xfs_buf_lock/unlock mechanism. dchinner, sandeen, aquini and aris also deserve credits for this. Signed-off-by: Carlos Maiolino Reviewed-by: Ben Myers Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 5 ++++- fs/xfs/xfs_buf.h | 41 ++++++++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d7a9dd735e1e..933b7930b863 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -96,6 +96,7 @@ xfs_buf_lru_add( atomic_inc(&bp->b_hold); list_add_tail(&bp->b_lru, &btp->bt_lru); btp->bt_lru_nr++; + bp->b_lru_flags &= ~_XBF_LRU_DISPOSE; } spin_unlock(&btp->bt_lru_lock); } @@ -154,7 +155,8 @@ xfs_buf_stale( struct xfs_buftarg *btp = bp->b_target; spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru)) { + if (!list_empty(&bp->b_lru) && + !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) { list_del_init(&bp->b_lru); btp->bt_lru_nr--; atomic_dec(&bp->b_hold); @@ -1501,6 +1503,7 @@ xfs_buftarg_shrink( */ list_move(&bp->b_lru, &dispose); btp->bt_lru_nr--; + bp->b_lru_flags |= _XBF_LRU_DISPOSE; } spin_unlock(&btp->bt_lru_lock); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d03b73b9604e..7c0b6a0a1557 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -38,27 +38,28 @@ typedef enum { XBRW_ZERO = 3, /* Zero target memory */ } xfs_buf_rw_t; -#define XBF_READ (1 << 0) /* buffer intended for reading from device */ -#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ -#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ -#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ -#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ -#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ +#define XBF_READ (1 << 0) /* buffer intended for reading from device */ +#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ +#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ +#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ +#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ +#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ /* I/O hints for the BIO layer */ -#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ -#define XBF_FUA (1 << 11)/* force cache write through mode */ -#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ +#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ +#define XBF_FUA (1 << 11)/* force cache write through mode */ +#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ /* flags used only as arguments to access routines */ -#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ -#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ +#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ +#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ /* flags used only internally */ -#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ -#define _XBF_KMEM (1 << 21)/* backed by heap memory */ -#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ -#define _XBF_COMPOUND (1 << 23)/* compound buffer */ +#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ +#define _XBF_KMEM (1 << 21)/* backed by heap memory */ +#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ +#define _XBF_COMPOUND (1 << 23)/* compound buffer */ +#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */ typedef unsigned int xfs_buf_flags_t; @@ -72,12 +73,13 @@ typedef unsigned int xfs_buf_flags_t; { XBF_SYNCIO, "SYNCIO" }, \ { XBF_FUA, "FUA" }, \ { XBF_FLUSH, "FLUSH" }, \ - { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ + { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" } + { _XBF_COMPOUND, "COMPOUND" }, \ + { _XBF_LRU_DISPOSE, "LRU_DISPOSE" } typedef struct xfs_buftarg { dev_t bt_dev; @@ -124,7 +126,12 @@ typedef struct xfs_buf { xfs_buf_flags_t b_flags; /* status flags */ struct semaphore b_sema; /* semaphore for lockables */ + /* + * concurrent access to b_lru and b_lru_flags are protected by + * bt_lru_lock and not by b_sema + */ struct list_head b_lru; /* lru list */ + xfs_buf_flags_t b_lru_flags; /* internal lru status flags */ wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; struct xfs_perag *b_pag; /* contains rbtree root */ -- cgit v1.2.3 From bbd99797973f2cebd905bf6469ce08b531ab258f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 30 Aug 2012 19:24:34 +0200 Subject: cuse: fix fuse_conn_kill() fuse_conn_kill() removed fc->entry, called fuse_ctl_remove_conn() and fuse_bdi_destroy(). None of which is appropriate for cuse cleanup. The fuse_ctl_remove_conn() decrements the nlink on the control filesystem, which is totally bogus. The others are harmless but unnecessary. So move these out from fuse_conn_kill() to fuse_put_super() where they belong. Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ce0a2838ccd0..fca222dabe3c 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -367,11 +367,6 @@ void fuse_conn_kill(struct fuse_conn *fc) wake_up_all(&fc->waitq); wake_up_all(&fc->blocked_waitq); wake_up_all(&fc->reserved_req_waitq); - mutex_lock(&fuse_mutex); - list_del(&fc->entry); - fuse_ctl_remove_conn(fc); - mutex_unlock(&fuse_mutex); - fuse_bdi_destroy(fc); } EXPORT_SYMBOL_GPL(fuse_conn_kill); @@ -380,7 +375,14 @@ static void fuse_put_super(struct super_block *sb) struct fuse_conn *fc = get_fuse_conn_super(sb); fuse_send_destroy(fc); + fuse_conn_kill(fc); + mutex_lock(&fuse_mutex); + list_del(&fc->entry); + fuse_ctl_remove_conn(fc); + mutex_unlock(&fuse_mutex); + fuse_bdi_destroy(fc); + fuse_conn_put(fc); } -- cgit v1.2.3 From 8d39d801d64658d7d69e4754f287a71e9f9bbcb8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 30 Aug 2012 19:24:35 +0200 Subject: cuse: kill connection on initialization error Luca Risolia reported that a CUSE daemon will continue to run even if initialization of the emulated device failes for some reason (e.g. the device number is already registered by another driver). This patch disconnects the fuse device on error, which will make the userspace CUSE daemon exit, albeit without indication about what the problem was. Reported-by: Luca Risolia Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 3426521f3205..ee8d55042298 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -396,7 +396,7 @@ err_device: err_region: unregister_chrdev_region(devt, 1); err: - fc->conn_error = 1; + fuse_conn_kill(fc); goto out; } @@ -532,8 +532,6 @@ static int cuse_channel_release(struct inode *inode, struct file *file) cdev_del(cc->cdev); } - /* kill connection and shutdown channel */ - fuse_conn_kill(&cc->fc); rc = fuse_dev_release(inode, file); /* puts the base reference */ return rc; -- cgit v1.2.3 From 8089ed792832e5d3dd219da435c5a08d969662b4 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 22 Aug 2012 17:35:11 +0300 Subject: UBIFS: fix power cut emulation for mtdram The power cut emulation did not work correctly because we corrupted more than one max. I/O unit in the buffer and then wrote the entire buffer. This lead to recovery errors because UBIFS complained about corrupted free space. And this was easily reproducible on mtdram because max. write size is very small there (64 bytes), and we could easily have a 1KiB buffer, corrupt 128 bytes there, and then write the entire buffer. The fix is to corrupt max. write size bytes at most, and write only up to the last corrupted max. write size chunk, not the entire buffer. Signed-off-by: Artem Bityutskiy --- fs/ubifs/debug.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 1aaa9f519485..5dc993fdca87 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2646,20 +2646,18 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) return 1; } -static void cut_data(const void *buf, unsigned int len) +static int corrupt_data(const struct ubifs_info *c, const void *buf, + unsigned int len) { unsigned int from, to, i, ffs = chance(1, 2); unsigned char *p = (void *)buf; from = random32() % (len + 1); - if (chance(1, 2)) - to = random32() % (len - from + 1); - else - to = len; + /* Corruption may only span one max. write unit */ + to = min(len, ALIGN(from, c->max_write_size)); - if (from < to) - ubifs_warn("filled bytes %u-%u with %s", from, to - 1, - ffs ? "0xFFs" : "random data"); + ubifs_warn("filled bytes %u-%u with %s", from, to - 1, + ffs ? "0xFFs" : "random data"); if (ffs) for (i = from; i < to; i++) @@ -2667,6 +2665,8 @@ static void cut_data(const void *buf, unsigned int len) else for (i = from; i < to; i++) p[i] = random32() % 0x100; + + return to; } int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, @@ -2679,7 +2679,9 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, failing = power_cut_emulated(c, lnum, 1); if (failing) - cut_data(buf, len); + len = corrupt_data(c, buf, len); + ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)", + len, lnum, offs); err = ubi_leb_write(c->ubi, lnum, buf, offs, len); if (err) return err; -- cgit v1.2.3 From e2441b43190efe0c1e156455b35397ac5efd2610 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 9 Aug 2012 19:43:14 +0200 Subject: UBIFS: remove __DATE__ and __TIME__ This tag is useless and it breaks automatic builds. It causes rebuilds for packages that depend on kernel for no real reason. Further, quoting Michal, who removed most of the users already: The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Signed-off-by: Jiri Slaby Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 71a197f0f93d..d31928975cc8 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1428,7 +1428,6 @@ static int mount_ubifs(struct ubifs_info *c) ubifs_msg("reserved for root: %llu bytes (%llu KiB)", c->report_rp_size, c->report_rp_size >> 10); - dbg_msg("compiled on: " __DATE__ " at " __TIME__); dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); dbg_msg("max. write size: %d bytes", c->max_write_size); dbg_msg("LEB size: %d bytes (%d KiB)", -- cgit v1.2.3 From 43457c60c8314835412848a9df25d4ba2f49f0ed Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 27 Aug 2012 13:48:48 +0300 Subject: UBIFS: use __aligned() attribute .. instead of __attribute__((aligned())). Signed-off-by: Artem Bityutskiy --- fs/ubifs/commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 8eda717cb99b..a054cafd614b 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -514,7 +514,7 @@ struct idx_node { struct list_head list; int iip; union ubifs_key upper_key; - struct ubifs_idx_node idx __attribute__((aligned(8))); + struct ubifs_idx_node idx __aligned(8); }; /** -- cgit v1.2.3 From 79fda5179a5227c930e5b0242b5d5ebf3df29422 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 27 Aug 2012 13:34:09 +0300 Subject: UBIFS: comply with coding style Join all the split printk lines in order to stop checkpatch complaining. Signed-off-by: Artem Bityutskiy --- fs/ubifs/budget.c | 5 +- fs/ubifs/compress.c | 7 +-- fs/ubifs/debug.c | 157 ++++++++++++++++++++------------------------------ fs/ubifs/dir.c | 4 +- fs/ubifs/file.c | 4 +- fs/ubifs/gc.c | 6 +- fs/ubifs/log.c | 14 ++--- fs/ubifs/lprops.c | 58 +++++++++---------- fs/ubifs/lpt.c | 3 +- fs/ubifs/lpt_commit.c | 14 ++--- fs/ubifs/orphan.c | 7 +-- fs/ubifs/recovery.c | 11 ++-- fs/ubifs/replay.c | 16 ++--- fs/ubifs/sb.c | 19 +++--- fs/ubifs/scan.c | 7 ++- fs/ubifs/super.c | 35 +++++------ 16 files changed, 161 insertions(+), 206 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index bc4f94b28706..ea9c814c0a33 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -342,9 +342,8 @@ static int do_budget_space(struct ubifs_info *c) lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - c->lst.taken_empty_lebs; if (unlikely(rsvd_idx_lebs > lebs)) { - dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " - "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs, - rsvd_idx_lebs); + dbg_budg("out of indexing space: min_idx_lebs %d (old %d), rsvd_idx_lebs %d", + min_idx_lebs, c->bi.min_idx_lebs, rsvd_idx_lebs); return -ENOSPC; } diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index 11e4132f314a..2bfa0953335d 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c @@ -112,8 +112,7 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, if (compr->comp_mutex) mutex_unlock(compr->comp_mutex); if (unlikely(err)) { - ubifs_warn("cannot compress %d bytes, compressor %s, " - "error %d, leave data uncompressed", + ubifs_warn("cannot compress %d bytes, compressor %s, error %d, leave data uncompressed", in_len, compr->name, err); goto no_compr; } @@ -176,8 +175,8 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, if (compr->decomp_mutex) mutex_unlock(compr->decomp_mutex); if (err) - ubifs_err("cannot decompress %d bytes, compressor %s, " - "error %d", in_len, compr->name, err); + ubifs_err("cannot decompress %d bytes, compressor %s, error %d", + in_len, compr->name, err); return err; } diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 5dc993fdca87..9f28375fc5b3 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -509,8 +509,7 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) printk(KERN_ERR "\tname "); if (nlen > UBIFS_MAX_NLEN) - printk(KERN_ERR "(bad name length, not printing, " - "bad or corrupted node)"); + printk(KERN_ERR "(bad name length, not printing, bad or corrupted node)"); else { for (i = 0; i < nlen && dent->name[i]; i++) printk(KERN_CONT "%c", dent->name[i]); @@ -618,14 +617,12 @@ void ubifs_dump_budget_req(const struct ubifs_budget_req *req) void ubifs_dump_lstats(const struct ubifs_lp_stats *lst) { spin_lock(&dbg_lock); - printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, " - "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); - printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, " - "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, - lst->total_dirty); - printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, " - "total_dead %lld\n", lst->total_used, lst->total_dark, - lst->total_dead); + printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, idx_lebs %d\n", + current->pid, lst->empty_lebs, lst->idx_lebs); + printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, total_dirty %lld\n", + lst->taken_empty_lebs, lst->total_free, lst->total_dirty); + printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, total_dead %lld\n", + lst->total_used, lst->total_dark, lst->total_dead); spin_unlock(&dbg_lock); } @@ -639,16 +636,13 @@ void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) spin_lock(&c->space_lock); spin_lock(&dbg_lock); - printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, " - "total budget sum %lld\n", current->pid, - bi->data_growth + bi->dd_growth, + printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, total budget sum %lld\n", + current->pid, bi->data_growth + bi->dd_growth, bi->data_growth + bi->dd_growth + bi->idx_growth); - printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, " - "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, - bi->idx_growth); - printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, " - "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, - bi->uncommitted_idx); + printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, budg_idx_growth %lld\n", + bi->data_growth, bi->dd_growth, bi->idx_growth); + printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, uncommitted_idx %lld\n", + bi->min_idx_lebs, bi->old_idx_sz, bi->uncommitted_idx); printk(KERN_ERR "\tpage_budget %d, inode_budget %d, dent_budget %d\n", bi->page_budget, bi->inode_budget, bi->dent_budget); printk(KERN_ERR "\tnospace %u, nospace_rp %u\n", @@ -666,8 +660,8 @@ void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) printk(KERN_ERR "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); - printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " - "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), + printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, clean_zn_cnt %ld\n", + atomic_long_read(&c->dirty_pg_cnt), atomic_long_read(&c->dirty_zn_cnt), atomic_long_read(&c->clean_zn_cnt)); printk(KERN_ERR "\tgc_lnum %d, ihead_lnum %d\n", @@ -715,15 +709,13 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) dark = ubifs_calc_dark(c, spc); if (lp->flags & LPROPS_INDEX) - printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " - "free + dirty %-8d flags %#x (", lp->lnum, lp->free, - lp->dirty, c->leb_size - spc, spc, lp->flags); + printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d flags %#x (", + lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc, + lp->flags); else - printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " - "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " - "flags %#-4x (", lp->lnum, lp->free, lp->dirty, - c->leb_size - spc, spc, dark, dead, - (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags); + printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d flags %#-4x (", + lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc, + dark, dead, (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags); if (lp->flags & LPROPS_TAKEN) { if (lp->flags & LPROPS_INDEX) @@ -851,9 +843,9 @@ void ubifs_dump_lpt_info(struct ubifs_info *c) printk(KERN_ERR "\tLPT lsave is at %d:%d\n", c->lsave_lnum, c->lsave_offs); for (i = 0; i < c->lpt_lebs; i++) - printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d " - "cmt %d\n", i + c->lpt_first, c->ltab[i].free, - c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); + printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d cmt %d\n", + i + c->lpt_first, c->ltab[i].free, c->ltab[i].dirty, + c->ltab[i].tgc, c->ltab[i].cmt); spin_unlock(&dbg_lock); } @@ -867,8 +859,8 @@ void ubifs_dump_sleb(const struct ubifs_info *c, list_for_each_entry(snod, &sleb->nodes, list) { cond_resched(); - printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", sleb->lnum, - snod->offs, snod->len); + printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", + sleb->lnum, snod->offs, snod->len); ubifs_dump_node(c, snod->node); } } @@ -926,10 +918,9 @@ void ubifs_dump_znode(const struct ubifs_info *c, else zbr = &c->zroot; - printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d" - " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, - zbr->len, znode->parent, znode->iip, znode->level, - znode->child_cnt, znode->flags); + printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d child_cnt %d flags %lx\n", + znode, zbr->lnum, zbr->offs, zbr->len, znode->parent, znode->iip, + znode->level, znode->child_cnt, znode->flags); if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { spin_unlock(&dbg_lock); @@ -940,19 +931,15 @@ void ubifs_dump_znode(const struct ubifs_info *c, for (n = 0; n < znode->child_cnt; n++) { zbr = &znode->zbranch[n]; if (znode->level > 0) - printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key " - "%s\n", n, zbr->znode, zbr->lnum, - zbr->offs, zbr->len, - dbg_snprintf_key(c, &zbr->key, - key_buf, - DBG_KEY_BUF_LEN)); + printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key %s\n", + n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, + dbg_snprintf_key(c, &zbr->key, key_buf, + DBG_KEY_BUF_LEN)); else - printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key " - "%s\n", n, zbr->znode, zbr->lnum, - zbr->offs, zbr->len, - dbg_snprintf_key(c, &zbr->key, - key_buf, - DBG_KEY_BUF_LEN)); + printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key %s\n", + n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, + dbg_snprintf_key(c, &zbr->key, key_buf, + DBG_KEY_BUF_LEN)); } spin_unlock(&dbg_lock); } @@ -966,9 +953,9 @@ void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) for (i = 0; i < heap->cnt; i++) { struct ubifs_lprops *lprops = heap->arr[i]; - printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d " - "flags %d\n", i, lprops->lnum, lprops->hpos, - lprops->free, lprops->dirty, lprops->flags); + printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d flags %d\n", + i, lprops->lnum, lprops->hpos, lprops->free, + lprops->dirty, lprops->flags); } printk(KERN_ERR "(pid %d) finish dumping heap\n", current->pid); } @@ -1148,8 +1135,8 @@ int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode) mutex_lock(&ui->ui_mutex); spin_lock(&ui->ui_lock); if (ui->ui_size != ui->synced_i_size && !ui->dirty) { - ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode " - "is clean", ui->ui_size, ui->synced_i_size); + ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode is clean", + ui->ui_size, ui->synced_i_size); ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, inode->i_mode, i_size_read(inode)); dump_stack(); @@ -1211,17 +1198,16 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) kfree(pdent); if (i_size_read(dir) != size) { - ubifs_err("directory inode %lu has size %llu, " - "but calculated size is %llu", dir->i_ino, - (unsigned long long)i_size_read(dir), + ubifs_err("directory inode %lu has size %llu, but calculated size is %llu", + dir->i_ino, (unsigned long long)i_size_read(dir), (unsigned long long)size); ubifs_dump_inode(c, dir); dump_stack(); return -EINVAL; } if (dir->i_nlink != nlink) { - ubifs_err("directory inode %lu has nlink %u, but calculated " - "nlink is %u", dir->i_ino, dir->i_nlink, nlink); + ubifs_err("directory inode %lu has nlink %u, but calculated nlink is %u", + dir->i_ino, dir->i_nlink, nlink); ubifs_dump_inode(c, dir); dump_stack(); return -EINVAL; @@ -1680,8 +1666,8 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, if (znode_cb) { err = znode_cb(c, znode, priv); if (err) { - ubifs_err("znode checking function returned " - "error %d", err); + ubifs_err("znode checking function returned error %d", + err); ubifs_dump_znode(c, znode); goto out_dump; } @@ -1691,9 +1677,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, zbr = &znode->zbranch[idx]; err = leaf_cb(c, zbr, priv); if (err) { - ubifs_err("leaf checking function " - "returned error %d, for leaf " - "at LEB %d:%d", + ubifs_err("leaf checking function returned error %d, for leaf at LEB %d:%d", err, zbr->lnum, zbr->offs); goto out_dump; } @@ -1801,8 +1785,8 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) } if (calc != idx_size) { - ubifs_err("index size check failed: calculated size is %lld, " - "should be %lld", calc, idx_size); + ubifs_err("index size check failed: calculated size is %lld, should be %lld", + calc, idx_size); dump_stack(); return -EINVAL; } @@ -2114,8 +2098,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, fscki = read_add_inode(c, priv, inum); if (IS_ERR(fscki)) { err = PTR_ERR(fscki); - ubifs_err("error %d while processing data node and " - "trying to find inode node %lu", + ubifs_err("error %d while processing data node and trying to find inode node %lu", err, (unsigned long)inum); goto out_dump; } @@ -2125,9 +2108,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, blk_offs <<= UBIFS_BLOCK_SHIFT; blk_offs += le32_to_cpu(dn->size); if (blk_offs > fscki->size) { - ubifs_err("data node at LEB %d:%d is not within inode " - "size %lld", zbr->lnum, zbr->offs, - fscki->size); + ubifs_err("data node at LEB %d:%d is not within inode size %lld", + zbr->lnum, zbr->offs, fscki->size); err = -EINVAL; goto out_dump; } @@ -2148,8 +2130,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, fscki = read_add_inode(c, priv, inum); if (IS_ERR(fscki)) { err = PTR_ERR(fscki); - ubifs_err("error %d while processing entry node and " - "trying to find inode node %lu", + ubifs_err("error %d while processing entry node and trying to find inode node %lu", err, (unsigned long)inum); goto out_dump; } @@ -2161,8 +2142,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, fscki1 = read_add_inode(c, priv, inum); if (IS_ERR(fscki1)) { err = PTR_ERR(fscki1); - ubifs_err("error %d while processing entry node and " - "trying to find parent inode node %lu", + ubifs_err("error %d while processing entry node and trying to find parent inode node %lu", err, (unsigned long)inum); goto out_dump; } @@ -2252,61 +2232,52 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd) */ if (fscki->inum != UBIFS_ROOT_INO && fscki->references != 1) { - ubifs_err("directory inode %lu has %d " - "direntries which refer it, but " - "should be 1", + ubifs_err("directory inode %lu has %d direntries which refer it, but should be 1", (unsigned long)fscki->inum, fscki->references); goto out_dump; } if (fscki->inum == UBIFS_ROOT_INO && fscki->references != 0) { - ubifs_err("root inode %lu has non-zero (%d) " - "direntries which refer it", + ubifs_err("root inode %lu has non-zero (%d) direntries which refer it", (unsigned long)fscki->inum, fscki->references); goto out_dump; } if (fscki->calc_sz != fscki->size) { - ubifs_err("directory inode %lu size is %lld, " - "but calculated size is %lld", + ubifs_err("directory inode %lu size is %lld, but calculated size is %lld", (unsigned long)fscki->inum, fscki->size, fscki->calc_sz); goto out_dump; } if (fscki->calc_cnt != fscki->nlink) { - ubifs_err("directory inode %lu nlink is %d, " - "but calculated nlink is %d", + ubifs_err("directory inode %lu nlink is %d, but calculated nlink is %d", (unsigned long)fscki->inum, fscki->nlink, fscki->calc_cnt); goto out_dump; } } else { if (fscki->references != fscki->nlink) { - ubifs_err("inode %lu nlink is %d, but " - "calculated nlink is %d", + ubifs_err("inode %lu nlink is %d, but calculated nlink is %d", (unsigned long)fscki->inum, fscki->nlink, fscki->references); goto out_dump; } } if (fscki->xattr_sz != fscki->calc_xsz) { - ubifs_err("inode %lu has xattr size %u, but " - "calculated size is %lld", + ubifs_err("inode %lu has xattr size %u, but calculated size is %lld", (unsigned long)fscki->inum, fscki->xattr_sz, fscki->calc_xsz); goto out_dump; } if (fscki->xattr_cnt != fscki->calc_xcnt) { - ubifs_err("inode %lu has %u xattrs, but " - "calculated count is %lld", + ubifs_err("inode %lu has %u xattrs, but calculated count is %lld", (unsigned long)fscki->inum, fscki->xattr_cnt, fscki->calc_xcnt); goto out_dump; } if (fscki->xattr_nms != fscki->calc_xnms) { - ubifs_err("inode %lu has xattr names' size %u, but " - "calculated names' size is %lld", + ubifs_err("inode %lu has xattr names' size %u, but calculated names' size is %lld", (unsigned long)fscki->inum, fscki->xattr_nms, fscki->calc_xnms); goto out_dump; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index c95681cf1b71..e271fba1651b 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -980,8 +980,8 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, * separately. */ - dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in " - "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name, + dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in dir ino %lu", + old_dentry->d_name.len, old_dentry->d_name.name, old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, new_dentry->d_name.name, new_dir->i_ino); ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 7bd6e72afd11..ff48c5a85309 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1486,8 +1486,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, err = ubifs_budget_space(c, &req); if (unlikely(err)) { if (err == -ENOSPC) - ubifs_warn("out of space for mmapped file " - "(inode number %lu)", inode->i_ino); + ubifs_warn("out of space for mmapped file (inode number %lu)", + inode->i_ino); return VM_FAULT_SIGBUS; } diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 04dd6f47635e..76ca53cd3eee 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -714,9 +714,9 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) break; } - dbg_gc("found LEB %d: free %d, dirty %d, sum %d " - "(min. space %d)", lp.lnum, lp.free, lp.dirty, - lp.free + lp.dirty, min_space); + dbg_gc("found LEB %d: free %d, dirty %d, sum %d (min. space %d)", + lp.lnum, lp.free, lp.dirty, lp.free + lp.dirty, + min_space); space_before = c->leb_size - wbuf->offs - wbuf->used; if (wbuf->lnum == -1) diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index c80b15d6c8de..36bd4efd0819 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -315,17 +315,15 @@ static void remove_buds(struct ubifs_info *c) * heads (non-closed buds). */ c->cmt_bud_bytes += wbuf->offs - bud->start; - dbg_log("preserve %d:%d, jhead %s, bud bytes %d, " - "cmt_bud_bytes %lld", bud->lnum, bud->start, - dbg_jhead(bud->jhead), wbuf->offs - bud->start, - c->cmt_bud_bytes); + dbg_log("preserve %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld", + bud->lnum, bud->start, dbg_jhead(bud->jhead), + wbuf->offs - bud->start, c->cmt_bud_bytes); bud->start = wbuf->offs; } else { c->cmt_bud_bytes += c->leb_size - bud->start; - dbg_log("remove %d:%d, jhead %s, bud bytes %d, " - "cmt_bud_bytes %lld", bud->lnum, bud->start, - dbg_jhead(bud->jhead), c->leb_size - bud->start, - c->cmt_bud_bytes); + dbg_log("remove %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld", + bud->lnum, bud->start, dbg_jhead(bud->jhead), + c->leb_size - bud->start, c->cmt_bud_bytes); rb_erase(p1, &c->buds); /* * If the commit does not finish, the recovery will need diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 86eb8e533249..132b8e3e7b0b 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -867,15 +867,15 @@ int dbg_check_cats(struct ubifs_info *c) list_for_each_entry(lprops, &c->empty_list, list) { if (lprops->free != c->leb_size) { - ubifs_err("non-empty LEB %d on empty list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("non-empty LEB %d on empty list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } if (lprops->flags & LPROPS_TAKEN) { - ubifs_err("taken LEB %d on empty list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("taken LEB %d on empty list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } } @@ -883,15 +883,15 @@ int dbg_check_cats(struct ubifs_info *c) i = 0; list_for_each_entry(lprops, &c->freeable_list, list) { if (lprops->free + lprops->dirty != c->leb_size) { - ubifs_err("non-freeable LEB %d on freeable list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("non-freeable LEB %d on freeable list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } if (lprops->flags & LPROPS_TAKEN) { - ubifs_err("taken LEB %d on freeable list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("taken LEB %d on freeable list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } i += 1; @@ -913,21 +913,21 @@ int dbg_check_cats(struct ubifs_info *c) list_for_each_entry(lprops, &c->frdi_idx_list, list) { if (lprops->free + lprops->dirty != c->leb_size) { - ubifs_err("non-freeable LEB %d on frdi_idx list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } if (lprops->flags & LPROPS_TAKEN) { - ubifs_err("taken LEB %d on frdi_idx list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("taken LEB %d on frdi_idx list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } if (!(lprops->flags & LPROPS_INDEX)) { - ubifs_err("non-index LEB %d on frdi_idx list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } } @@ -1153,8 +1153,8 @@ static int scan_check_cb(struct ubifs_info *c, if (free > c->leb_size || free < 0 || dirty > c->leb_size || dirty < 0) { - ubifs_err("bad calculated accounting for LEB %d: " - "free %d, dirty %d", lnum, free, dirty); + ubifs_err("bad calculated accounting for LEB %d: free %d, dirty %d", + lnum, free, dirty); goto out_destroy; } @@ -1200,8 +1200,7 @@ static int scan_check_cb(struct ubifs_info *c, /* Free but not unmapped LEB, it's fine */ is_idx = 0; else { - ubifs_err("indexing node without indexing " - "flag"); + ubifs_err("indexing node without indexing flag"); goto out_print; } } @@ -1236,8 +1235,7 @@ static int scan_check_cb(struct ubifs_info *c, return LPT_SCAN_CONTINUE; out_print: - ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " - "should be free %d, dirty %d", + ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d", lnum, lp->free, lp->dirty, lp->flags, free, dirty); ubifs_dump_leb(c, lnum); out_destroy: @@ -1290,12 +1288,10 @@ int dbg_check_lprops(struct ubifs_info *c) lst.total_dirty != c->lst.total_dirty || lst.total_used != c->lst.total_used) { ubifs_err("bad overall accounting"); - ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " - "total_free %lld, total_dirty %lld, total_used %lld", + ubifs_err("calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", lst.empty_lebs, lst.idx_lebs, lst.total_free, lst.total_dirty, lst.total_used); - ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " - "total_free %lld, total_dirty %lld, total_used %lld", + ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, c->lst.total_dirty, c->lst.total_used); err = -EINVAL; diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 8640920766ed..c3e6bbf13ba5 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -2237,8 +2237,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, /* cnode is a nnode */ num = calc_nnode_num(row, col); if (cnode->num != num) { - ubifs_err("nnode num %d expected %d " - "parent num %d iip %d", + ubifs_err("nnode num %d expected %d parent num %d iip %d", cnode->num, num, (nnode ? nnode->num : 0), cnode->iip); return -EINVAL; diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 4fa70734e6e7..74082553e1fc 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -320,8 +320,8 @@ static int layout_cnodes(struct ubifs_info *c) return 0; no_space: - ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, " - "done_lsave %d", lnum, offs, len, done_ltab, done_lsave); + ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", + lnum, offs, len, done_ltab, done_lsave); ubifs_dump_lpt_info(c); ubifs_dump_lpt_lebs(c); dump_stack(); @@ -545,8 +545,8 @@ static int write_cnodes(struct ubifs_info *c) return 0; no_space: - ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab " - "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); + ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", + lnum, offs, len, done_ltab, done_lsave); ubifs_dump_lpt_info(c); ubifs_dump_lpt_lebs(c); dump_stack(); @@ -1668,14 +1668,12 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) } i = lnum - c->lpt_first; if (len != c->ltab[i].free) { - dbg_msg("invalid free space in LEB %d " - "(free %d, expected %d)", + dbg_msg("invalid free space in LEB %d (free %d, expected %d)", lnum, len, c->ltab[i].free); err = -EINVAL; } if (dirty != c->ltab[i].dirty) { - dbg_msg("invalid dirty space in LEB %d " - "(dirty %d, expected %d)", + dbg_msg("invalid dirty space in LEB %d (dirty %d, expected %d)", lnum, dirty, c->ltab[i].dirty); err = -EINVAL; } diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index cebf17ea0458..769701ccb5c9 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -562,8 +562,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, list_for_each_entry(snod, &sleb->nodes, list) { if (snod->type != UBIFS_ORPH_NODE) { - ubifs_err("invalid node type %d in orphan area at " - "%d:%d", snod->type, sleb->lnum, snod->offs); + ubifs_err("invalid node type %d in orphan area at %d:%d", + snod->type, sleb->lnum, snod->offs); ubifs_dump_node(c, snod->node); return -EINVAL; } @@ -589,8 +589,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, * number. That makes this orphan node, out of date. */ if (!first) { - ubifs_err("out of order commit number %llu in " - "orphan node at %d:%d", + ubifs_err("out of order commit number %llu in orphan node at %d:%d", cmt_no, sleb->lnum, snod->offs); ubifs_dump_node(c, snod->node); return -EINVAL; diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index edeec499c048..065096e36ed9 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -609,7 +609,8 @@ static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); - dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs); + dbg_rcvry("dropping last node at %d:%d", + sleb->lnum, snod->offs); *offs = snod->offs; list_del(&snod->list); kfree(snod); @@ -702,8 +703,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, * See header comment for this file for more * explanations about the reasons we have this check. */ - ubifs_err("corrupt empty space LEB %d:%d, corruption " - "starts at %d", lnum, offs, corruption); + ubifs_err("corrupt empty space LEB %d:%d, corruption starts at %d", + lnum, offs, corruption); /* Make sure we dump interesting non-0xFF data */ offs += corruption; buf += corruption; @@ -899,8 +900,8 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, } } if (snod->sqnum > cs_sqnum) { - ubifs_err("unrecoverable log corruption " - "in LEB %d", lnum); + ubifs_err("unrecoverable log corruption in LEB %d", + lnum); ubifs_scan_destroy(sleb); return ERR_PTR(-EUCLEAN); } diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 94d78fc5d4e0..ede6e5835ced 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -141,9 +141,9 @@ static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) * during the replay. */ if (dirty != 0) - dbg_msg("LEB %d lp: %d free %d dirty " - "replay: %d free %d dirty", b->bud->lnum, - lp->free, lp->dirty, b->free, b->dirty); + dbg_msg("LEB %d lp: %d free %d dirty replay: %d free %d dirty", + b->bud->lnum, lp->free, lp->dirty, b->free, + b->dirty); } lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, lp->flags | LPROPS_TAKEN, 0); @@ -677,7 +677,8 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b) b->dirty = sleb->endpt - offs - used; b->free = c->leb_size - sleb->endpt; - dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free); + dbg_mnt("bud LEB %d replied: dirty %d, free %d", + lnum, b->dirty, b->free); out: ubifs_scan_destroy(sleb); @@ -865,8 +866,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) goto out_dump; } if (le64_to_cpu(node->cmt_no) != c->cmt_no) { - ubifs_err("first CS node at LEB %d:%d has wrong " - "commit number %llu expected %llu", + ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu", lnum, offs, (unsigned long long)le64_to_cpu(node->cmt_no), c->cmt_no); @@ -1058,8 +1058,8 @@ int ubifs_replay_journal(struct ubifs_info *c) c->bi.uncommitted_idx *= c->max_idx_node_sz; ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); - dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " - "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, + dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu", + c->lhead_lnum, c->lhead_offs, c->max_sqnum, (unsigned long)c->highest_inum); out: destroy_replay_list(c); diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 15e2fc5aa60b..5b7bfa29ec37 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -391,9 +391,8 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { - ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, " - "%d minimum required", c->leb_cnt, c->vi.size, - min_leb_cnt); + ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, %d minimum required", + c->leb_cnt, c->vi.size, min_leb_cnt); goto failed; } @@ -411,15 +410,14 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS; if (c->max_bud_bytes < max_bytes) { - ubifs_err("too small journal (%lld bytes), must be at least " - "%lld bytes", c->max_bud_bytes, max_bytes); + ubifs_err("too small journal (%lld bytes), must be at least %lld bytes", + c->max_bud_bytes, max_bytes); goto failed; } max_bytes = (long long)c->leb_size * c->main_lebs; if (c->max_bud_bytes > max_bytes) { - ubifs_err("too large journal size (%lld bytes), only %lld bytes" - "available in the main area", + ubifs_err("too large journal size (%lld bytes), only %lld bytes available in the main area", c->max_bud_bytes, max_bytes); goto failed; } @@ -549,10 +547,9 @@ int ubifs_read_superblock(struct ubifs_info *c) ubifs_assert(!c->ro_media || c->ro_mount); if (!c->ro_mount || c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { - ubifs_err("on-flash format version is w%d/r%d, but " - "software only supports up to version " - "w%d/r%d", c->fmt_version, - c->ro_compat_version, UBIFS_FORMAT_VERSION, + ubifs_err("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", + c->fmt_version, c->ro_compat_version, + UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { ubifs_msg("only R/O mounting is possible"); diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 019ca47a1f8e..58aa05df2bb6 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -85,7 +85,8 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, if (len < UBIFS_CH_SZ) return SCANNED_GARBAGE; - dbg_scan("scanning %s at LEB %d:%d", dbg_ntype(ch->node_type), lnum, offs); + dbg_scan("scanning %s at LEB %d:%d", + dbg_ntype(ch->node_type), lnum, offs); if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) return SCANNED_A_CORRUPT_NODE; @@ -150,8 +151,8 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); if (err && err != -EBADMSG) { - ubifs_err("cannot read %d bytes from LEB %d:%d," - " error %d", c->leb_size - offs, lnum, offs, err); + ubifs_err("cannot read %d bytes from LEB %d:%d, error %d", + c->leb_size - offs, lnum, offs, err); kfree(sleb); return ERR_PTR(err); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index d31928975cc8..448bf24ef8d0 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -89,9 +89,8 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) return 5; if (!ubifs_compr_present(ui->compr_type)) { - ubifs_warn("inode %lu uses '%s' compression, but it was not " - "compiled in", inode->i_ino, - ubifs_compr_name(ui->compr_type)); + ubifs_warn("inode %lu uses '%s' compression, but it was not compiled in", + inode->i_ino, ubifs_compr_name(ui->compr_type)); } err = dbg_check_dir(c, inode); @@ -1061,8 +1060,8 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, flag = parse_standard_option(p); if (!flag) { - ubifs_err("unrecognized mount option \"%s\" " - "or missing value", p); + ubifs_err("unrecognized mount option \"%s\" or missing value", + p); return -EINVAL; } sb->s_flags |= flag; @@ -1124,8 +1123,8 @@ again: } /* Just disable bulk-read */ - ubifs_warn("Cannot allocate %d bytes of memory for bulk-read, " - "disabling it", c->max_bu_buf_len); + ubifs_warn("cannot allocate %d bytes of memory for bulk-read, disabling it", + c->max_bu_buf_len); c->mount_opts.bulk_read = 1; c->bulk_read = 0; return; @@ -1416,11 +1415,11 @@ static int mount_ubifs(struct ubifs_info *c) if (c->ro_mount) ubifs_msg("mounted read-only"); x = (long long)c->main_lebs * c->leb_size; - ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " - "LEBs)", x, x >> 10, x >> 20, c->main_lebs); + ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", + x, x >> 10, x >> 20, c->main_lebs); x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; - ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " - "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); + ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", + x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)", c->fmt_version, c->ro_compat_version, UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); @@ -1563,10 +1562,9 @@ static int ubifs_remount_rw(struct ubifs_info *c) if (c->rw_incompat) { ubifs_err("the file-system is not R/W-compatible"); - ubifs_msg("on-flash format version is w%d/r%d, but software " - "only supports up to version w%d/r%d", c->fmt_version, - c->ro_compat_version, UBIFS_FORMAT_VERSION, - UBIFS_RO_COMPAT_VERSION); + ubifs_msg("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", + c->fmt_version, c->ro_compat_version, + UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); return -EROFS; } @@ -1827,8 +1825,8 @@ static void ubifs_put_super(struct super_block *sb) * next mount, so we just print a message and * continue to unmount normally. */ - ubifs_err("failed to write master node, " - "error %d", err); + ubifs_err("failed to write master node, error %d", + err); } else { for (i = 0; i < c->jhead_cnt; i++) /* Make sure write-buffer timers are canceled */ @@ -2247,8 +2245,7 @@ static int __init ubifs_init(void) * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. */ if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { - ubifs_err("VFS page cache size is %u bytes, but UBIFS requires" - " at least 4096 bytes", + ubifs_err("VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes", (unsigned int)PAGE_CACHE_SIZE); return -EINVAL; } -- cgit v1.2.3 From 6b38d03f48da3006085e2d3e45168ed60475f7bb Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 27 Aug 2012 13:56:19 +0300 Subject: UBIFS: use pr_ helper instead of printk Use 'pr_err()' instead of 'printk(KERN_ERR', etc. Signed-off-by: Artem Bityutskiy --- fs/ubifs/debug.c | 482 ++++++++++++++++++++++---------------------------- fs/ubifs/debug.h | 6 +- fs/ubifs/lpt_commit.c | 36 ++-- fs/ubifs/ubifs.h | 13 +- 4 files changed, 235 insertions(+), 302 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 9f28375fc5b3..2714e02093a4 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -219,15 +219,15 @@ const char *dbg_jhead(int jhead) static void dump_ch(const struct ubifs_ch *ch) { - printk(KERN_ERR "\tmagic %#x\n", le32_to_cpu(ch->magic)); - printk(KERN_ERR "\tcrc %#x\n", le32_to_cpu(ch->crc)); - printk(KERN_ERR "\tnode_type %d (%s)\n", ch->node_type, + pr_err("\tmagic %#x\n", le32_to_cpu(ch->magic)); + pr_err("\tcrc %#x\n", le32_to_cpu(ch->crc)); + pr_err("\tnode_type %d (%s)\n", ch->node_type, dbg_ntype(ch->node_type)); - printk(KERN_ERR "\tgroup_type %d (%s)\n", ch->group_type, + pr_err("\tgroup_type %d (%s)\n", ch->group_type, dbg_gtype(ch->group_type)); - printk(KERN_ERR "\tsqnum %llu\n", + pr_err("\tsqnum %llu\n", (unsigned long long)le64_to_cpu(ch->sqnum)); - printk(KERN_ERR "\tlen %u\n", le32_to_cpu(ch->len)); + pr_err("\tlen %u\n", le32_to_cpu(ch->len)); } void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) @@ -238,43 +238,43 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) struct ubifs_dent_node *dent, *pdent = NULL; int count = 2; - printk(KERN_ERR "Dump in-memory inode:"); - printk(KERN_ERR "\tinode %lu\n", inode->i_ino); - printk(KERN_ERR "\tsize %llu\n", + pr_err("Dump in-memory inode:"); + pr_err("\tinode %lu\n", inode->i_ino); + pr_err("\tsize %llu\n", (unsigned long long)i_size_read(inode)); - printk(KERN_ERR "\tnlink %u\n", inode->i_nlink); - printk(KERN_ERR "\tuid %u\n", (unsigned int)inode->i_uid); - printk(KERN_ERR "\tgid %u\n", (unsigned int)inode->i_gid); - printk(KERN_ERR "\tatime %u.%u\n", + pr_err("\tnlink %u\n", inode->i_nlink); + pr_err("\tuid %u\n", (unsigned int)inode->i_uid); + pr_err("\tgid %u\n", (unsigned int)inode->i_gid); + pr_err("\tatime %u.%u\n", (unsigned int)inode->i_atime.tv_sec, (unsigned int)inode->i_atime.tv_nsec); - printk(KERN_ERR "\tmtime %u.%u\n", + pr_err("\tmtime %u.%u\n", (unsigned int)inode->i_mtime.tv_sec, (unsigned int)inode->i_mtime.tv_nsec); - printk(KERN_ERR "\tctime %u.%u\n", + pr_err("\tctime %u.%u\n", (unsigned int)inode->i_ctime.tv_sec, (unsigned int)inode->i_ctime.tv_nsec); - printk(KERN_ERR "\tcreat_sqnum %llu\n", ui->creat_sqnum); - printk(KERN_ERR "\txattr_size %u\n", ui->xattr_size); - printk(KERN_ERR "\txattr_cnt %u\n", ui->xattr_cnt); - printk(KERN_ERR "\txattr_names %u\n", ui->xattr_names); - printk(KERN_ERR "\tdirty %u\n", ui->dirty); - printk(KERN_ERR "\txattr %u\n", ui->xattr); - printk(KERN_ERR "\tbulk_read %u\n", ui->xattr); - printk(KERN_ERR "\tsynced_i_size %llu\n", + pr_err("\tcreat_sqnum %llu\n", ui->creat_sqnum); + pr_err("\txattr_size %u\n", ui->xattr_size); + pr_err("\txattr_cnt %u\n", ui->xattr_cnt); + pr_err("\txattr_names %u\n", ui->xattr_names); + pr_err("\tdirty %u\n", ui->dirty); + pr_err("\txattr %u\n", ui->xattr); + pr_err("\tbulk_read %u\n", ui->xattr); + pr_err("\tsynced_i_size %llu\n", (unsigned long long)ui->synced_i_size); - printk(KERN_ERR "\tui_size %llu\n", + pr_err("\tui_size %llu\n", (unsigned long long)ui->ui_size); - printk(KERN_ERR "\tflags %d\n", ui->flags); - printk(KERN_ERR "\tcompr_type %d\n", ui->compr_type); - printk(KERN_ERR "\tlast_page_read %lu\n", ui->last_page_read); - printk(KERN_ERR "\tread_in_a_row %lu\n", ui->read_in_a_row); - printk(KERN_ERR "\tdata_len %d\n", ui->data_len); + pr_err("\tflags %d\n", ui->flags); + pr_err("\tcompr_type %d\n", ui->compr_type); + pr_err("\tlast_page_read %lu\n", ui->last_page_read); + pr_err("\tread_in_a_row %lu\n", ui->read_in_a_row); + pr_err("\tdata_len %d\n", ui->data_len); if (!S_ISDIR(inode->i_mode)) return; - printk(KERN_ERR "List of directory entries:\n"); + pr_err("List of directory entries:\n"); ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); lowest_dent_key(c, &key, inode->i_ino); @@ -282,11 +282,11 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) dent = ubifs_tnc_next_ent(c, &key, &nm); if (IS_ERR(dent)) { if (PTR_ERR(dent) != -ENOENT) - printk(KERN_ERR "error %ld\n", PTR_ERR(dent)); + pr_err("error %ld\n", PTR_ERR(dent)); break; } - printk(KERN_ERR "\t%d: %s (%s)\n", + pr_err("\t%d: %s (%s)\n", count++, dent->name, get_dent_type(dent->type)); nm.name = dent->name; @@ -307,7 +307,7 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) /* If the magic is incorrect, just hexdump the first bytes */ if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { - printk(KERN_ERR "Not a node, first %zu bytes:", UBIFS_CH_SZ); + pr_err("Not a node, first %zu bytes:", UBIFS_CH_SZ); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1, (void *)node, UBIFS_CH_SZ, 1); return; @@ -321,8 +321,7 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) { const struct ubifs_pad_node *pad = node; - printk(KERN_ERR "\tpad_len %u\n", - le32_to_cpu(pad->pad_len)); + pr_err("\tpad_len %u\n", le32_to_cpu(pad->pad_len)); break; } case UBIFS_SB_NODE: @@ -330,112 +329,77 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) const struct ubifs_sb_node *sup = node; unsigned int sup_flags = le32_to_cpu(sup->flags); - printk(KERN_ERR "\tkey_hash %d (%s)\n", + pr_err("\tkey_hash %d (%s)\n", (int)sup->key_hash, get_key_hash(sup->key_hash)); - printk(KERN_ERR "\tkey_fmt %d (%s)\n", + pr_err("\tkey_fmt %d (%s)\n", (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); - printk(KERN_ERR "\tflags %#x\n", sup_flags); - printk(KERN_ERR "\t big_lpt %u\n", + pr_err("\tflags %#x\n", sup_flags); + pr_err("\t big_lpt %u\n", !!(sup_flags & UBIFS_FLG_BIGLPT)); - printk(KERN_ERR "\t space_fixup %u\n", + pr_err("\t space_fixup %u\n", !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); - printk(KERN_ERR "\tmin_io_size %u\n", - le32_to_cpu(sup->min_io_size)); - printk(KERN_ERR "\tleb_size %u\n", - le32_to_cpu(sup->leb_size)); - printk(KERN_ERR "\tleb_cnt %u\n", - le32_to_cpu(sup->leb_cnt)); - printk(KERN_ERR "\tmax_leb_cnt %u\n", - le32_to_cpu(sup->max_leb_cnt)); - printk(KERN_ERR "\tmax_bud_bytes %llu\n", + pr_err("\tmin_io_size %u\n", le32_to_cpu(sup->min_io_size)); + pr_err("\tleb_size %u\n", le32_to_cpu(sup->leb_size)); + pr_err("\tleb_cnt %u\n", le32_to_cpu(sup->leb_cnt)); + pr_err("\tmax_leb_cnt %u\n", le32_to_cpu(sup->max_leb_cnt)); + pr_err("\tmax_bud_bytes %llu\n", (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); - printk(KERN_ERR "\tlog_lebs %u\n", - le32_to_cpu(sup->log_lebs)); - printk(KERN_ERR "\tlpt_lebs %u\n", - le32_to_cpu(sup->lpt_lebs)); - printk(KERN_ERR "\torph_lebs %u\n", - le32_to_cpu(sup->orph_lebs)); - printk(KERN_ERR "\tjhead_cnt %u\n", - le32_to_cpu(sup->jhead_cnt)); - printk(KERN_ERR "\tfanout %u\n", - le32_to_cpu(sup->fanout)); - printk(KERN_ERR "\tlsave_cnt %u\n", - le32_to_cpu(sup->lsave_cnt)); - printk(KERN_ERR "\tdefault_compr %u\n", + pr_err("\tlog_lebs %u\n", le32_to_cpu(sup->log_lebs)); + pr_err("\tlpt_lebs %u\n", le32_to_cpu(sup->lpt_lebs)); + pr_err("\torph_lebs %u\n", le32_to_cpu(sup->orph_lebs)); + pr_err("\tjhead_cnt %u\n", le32_to_cpu(sup->jhead_cnt)); + pr_err("\tfanout %u\n", le32_to_cpu(sup->fanout)); + pr_err("\tlsave_cnt %u\n", le32_to_cpu(sup->lsave_cnt)); + pr_err("\tdefault_compr %u\n", (int)le16_to_cpu(sup->default_compr)); - printk(KERN_ERR "\trp_size %llu\n", + pr_err("\trp_size %llu\n", (unsigned long long)le64_to_cpu(sup->rp_size)); - printk(KERN_ERR "\trp_uid %u\n", - le32_to_cpu(sup->rp_uid)); - printk(KERN_ERR "\trp_gid %u\n", - le32_to_cpu(sup->rp_gid)); - printk(KERN_ERR "\tfmt_version %u\n", - le32_to_cpu(sup->fmt_version)); - printk(KERN_ERR "\ttime_gran %u\n", - le32_to_cpu(sup->time_gran)); - printk(KERN_ERR "\tUUID %pUB\n", - sup->uuid); + pr_err("\trp_uid %u\n", le32_to_cpu(sup->rp_uid)); + pr_err("\trp_gid %u\n", le32_to_cpu(sup->rp_gid)); + pr_err("\tfmt_version %u\n", le32_to_cpu(sup->fmt_version)); + pr_err("\ttime_gran %u\n", le32_to_cpu(sup->time_gran)); + pr_err("\tUUID %pUB\n", sup->uuid); break; } case UBIFS_MST_NODE: { const struct ubifs_mst_node *mst = node; - printk(KERN_ERR "\thighest_inum %llu\n", + pr_err("\thighest_inum %llu\n", (unsigned long long)le64_to_cpu(mst->highest_inum)); - printk(KERN_ERR "\tcommit number %llu\n", + pr_err("\tcommit number %llu\n", (unsigned long long)le64_to_cpu(mst->cmt_no)); - printk(KERN_ERR "\tflags %#x\n", - le32_to_cpu(mst->flags)); - printk(KERN_ERR "\tlog_lnum %u\n", - le32_to_cpu(mst->log_lnum)); - printk(KERN_ERR "\troot_lnum %u\n", - le32_to_cpu(mst->root_lnum)); - printk(KERN_ERR "\troot_offs %u\n", - le32_to_cpu(mst->root_offs)); - printk(KERN_ERR "\troot_len %u\n", - le32_to_cpu(mst->root_len)); - printk(KERN_ERR "\tgc_lnum %u\n", - le32_to_cpu(mst->gc_lnum)); - printk(KERN_ERR "\tihead_lnum %u\n", - le32_to_cpu(mst->ihead_lnum)); - printk(KERN_ERR "\tihead_offs %u\n", - le32_to_cpu(mst->ihead_offs)); - printk(KERN_ERR "\tindex_size %llu\n", + pr_err("\tflags %#x\n", le32_to_cpu(mst->flags)); + pr_err("\tlog_lnum %u\n", le32_to_cpu(mst->log_lnum)); + pr_err("\troot_lnum %u\n", le32_to_cpu(mst->root_lnum)); + pr_err("\troot_offs %u\n", le32_to_cpu(mst->root_offs)); + pr_err("\troot_len %u\n", le32_to_cpu(mst->root_len)); + pr_err("\tgc_lnum %u\n", le32_to_cpu(mst->gc_lnum)); + pr_err("\tihead_lnum %u\n", le32_to_cpu(mst->ihead_lnum)); + pr_err("\tihead_offs %u\n", le32_to_cpu(mst->ihead_offs)); + pr_err("\tindex_size %llu\n", (unsigned long long)le64_to_cpu(mst->index_size)); - printk(KERN_ERR "\tlpt_lnum %u\n", - le32_to_cpu(mst->lpt_lnum)); - printk(KERN_ERR "\tlpt_offs %u\n", - le32_to_cpu(mst->lpt_offs)); - printk(KERN_ERR "\tnhead_lnum %u\n", - le32_to_cpu(mst->nhead_lnum)); - printk(KERN_ERR "\tnhead_offs %u\n", - le32_to_cpu(mst->nhead_offs)); - printk(KERN_ERR "\tltab_lnum %u\n", - le32_to_cpu(mst->ltab_lnum)); - printk(KERN_ERR "\tltab_offs %u\n", - le32_to_cpu(mst->ltab_offs)); - printk(KERN_ERR "\tlsave_lnum %u\n", - le32_to_cpu(mst->lsave_lnum)); - printk(KERN_ERR "\tlsave_offs %u\n", - le32_to_cpu(mst->lsave_offs)); - printk(KERN_ERR "\tlscan_lnum %u\n", - le32_to_cpu(mst->lscan_lnum)); - printk(KERN_ERR "\tleb_cnt %u\n", - le32_to_cpu(mst->leb_cnt)); - printk(KERN_ERR "\tempty_lebs %u\n", - le32_to_cpu(mst->empty_lebs)); - printk(KERN_ERR "\tidx_lebs %u\n", - le32_to_cpu(mst->idx_lebs)); - printk(KERN_ERR "\ttotal_free %llu\n", + pr_err("\tlpt_lnum %u\n", le32_to_cpu(mst->lpt_lnum)); + pr_err("\tlpt_offs %u\n", le32_to_cpu(mst->lpt_offs)); + pr_err("\tnhead_lnum %u\n", le32_to_cpu(mst->nhead_lnum)); + pr_err("\tnhead_offs %u\n", le32_to_cpu(mst->nhead_offs)); + pr_err("\tltab_lnum %u\n", le32_to_cpu(mst->ltab_lnum)); + pr_err("\tltab_offs %u\n", le32_to_cpu(mst->ltab_offs)); + pr_err("\tlsave_lnum %u\n", le32_to_cpu(mst->lsave_lnum)); + pr_err("\tlsave_offs %u\n", le32_to_cpu(mst->lsave_offs)); + pr_err("\tlscan_lnum %u\n", le32_to_cpu(mst->lscan_lnum)); + pr_err("\tleb_cnt %u\n", le32_to_cpu(mst->leb_cnt)); + pr_err("\tempty_lebs %u\n", le32_to_cpu(mst->empty_lebs)); + pr_err("\tidx_lebs %u\n", le32_to_cpu(mst->idx_lebs)); + pr_err("\ttotal_free %llu\n", (unsigned long long)le64_to_cpu(mst->total_free)); - printk(KERN_ERR "\ttotal_dirty %llu\n", + pr_err("\ttotal_dirty %llu\n", (unsigned long long)le64_to_cpu(mst->total_dirty)); - printk(KERN_ERR "\ttotal_used %llu\n", + pr_err("\ttotal_used %llu\n", (unsigned long long)le64_to_cpu(mst->total_used)); - printk(KERN_ERR "\ttotal_dead %llu\n", + pr_err("\ttotal_dead %llu\n", (unsigned long long)le64_to_cpu(mst->total_dead)); - printk(KERN_ERR "\ttotal_dark %llu\n", + pr_err("\ttotal_dark %llu\n", (unsigned long long)le64_to_cpu(mst->total_dark)); break; } @@ -443,12 +407,9 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) { const struct ubifs_ref_node *ref = node; - printk(KERN_ERR "\tlnum %u\n", - le32_to_cpu(ref->lnum)); - printk(KERN_ERR "\toffs %u\n", - le32_to_cpu(ref->offs)); - printk(KERN_ERR "\tjhead %u\n", - le32_to_cpu(ref->jhead)); + pr_err("\tlnum %u\n", le32_to_cpu(ref->lnum)); + pr_err("\toffs %u\n", le32_to_cpu(ref->offs)); + pr_err("\tjhead %u\n", le32_to_cpu(ref->jhead)); break; } case UBIFS_INO_NODE: @@ -456,41 +417,32 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) const struct ubifs_ino_node *ino = node; key_read(c, &ino->key, &key); - printk(KERN_ERR "\tkey %s\n", + pr_err("\tkey %s\n", dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); - printk(KERN_ERR "\tcreat_sqnum %llu\n", + pr_err("\tcreat_sqnum %llu\n", (unsigned long long)le64_to_cpu(ino->creat_sqnum)); - printk(KERN_ERR "\tsize %llu\n", + pr_err("\tsize %llu\n", (unsigned long long)le64_to_cpu(ino->size)); - printk(KERN_ERR "\tnlink %u\n", - le32_to_cpu(ino->nlink)); - printk(KERN_ERR "\tatime %lld.%u\n", + pr_err("\tnlink %u\n", le32_to_cpu(ino->nlink)); + pr_err("\tatime %lld.%u\n", (long long)le64_to_cpu(ino->atime_sec), le32_to_cpu(ino->atime_nsec)); - printk(KERN_ERR "\tmtime %lld.%u\n", + pr_err("\tmtime %lld.%u\n", (long long)le64_to_cpu(ino->mtime_sec), le32_to_cpu(ino->mtime_nsec)); - printk(KERN_ERR "\tctime %lld.%u\n", + pr_err("\tctime %lld.%u\n", (long long)le64_to_cpu(ino->ctime_sec), le32_to_cpu(ino->ctime_nsec)); - printk(KERN_ERR "\tuid %u\n", - le32_to_cpu(ino->uid)); - printk(KERN_ERR "\tgid %u\n", - le32_to_cpu(ino->gid)); - printk(KERN_ERR "\tmode %u\n", - le32_to_cpu(ino->mode)); - printk(KERN_ERR "\tflags %#x\n", - le32_to_cpu(ino->flags)); - printk(KERN_ERR "\txattr_cnt %u\n", - le32_to_cpu(ino->xattr_cnt)); - printk(KERN_ERR "\txattr_size %u\n", - le32_to_cpu(ino->xattr_size)); - printk(KERN_ERR "\txattr_names %u\n", - le32_to_cpu(ino->xattr_names)); - printk(KERN_ERR "\tcompr_type %#x\n", + pr_err("\tuid %u\n", le32_to_cpu(ino->uid)); + pr_err("\tgid %u\n", le32_to_cpu(ino->gid)); + pr_err("\tmode %u\n", le32_to_cpu(ino->mode)); + pr_err("\tflags %#x\n", le32_to_cpu(ino->flags)); + pr_err("\txattr_cnt %u\n", le32_to_cpu(ino->xattr_cnt)); + pr_err("\txattr_size %u\n", le32_to_cpu(ino->xattr_size)); + pr_err("\txattr_names %u\n", le32_to_cpu(ino->xattr_names)); + pr_err("\tcompr_type %#x\n", (int)le16_to_cpu(ino->compr_type)); - printk(KERN_ERR "\tdata len %u\n", - le32_to_cpu(ino->data_len)); + pr_err("\tdata len %u\n", le32_to_cpu(ino->data_len)); break; } case UBIFS_DENT_NODE: @@ -500,21 +452,21 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) int nlen = le16_to_cpu(dent->nlen); key_read(c, &dent->key, &key); - printk(KERN_ERR "\tkey %s\n", + pr_err("\tkey %s\n", dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); - printk(KERN_ERR "\tinum %llu\n", + pr_err("\tinum %llu\n", (unsigned long long)le64_to_cpu(dent->inum)); - printk(KERN_ERR "\ttype %d\n", (int)dent->type); - printk(KERN_ERR "\tnlen %d\n", nlen); - printk(KERN_ERR "\tname "); + pr_err("\ttype %d\n", (int)dent->type); + pr_err("\tnlen %d\n", nlen); + pr_err("\tname "); if (nlen > UBIFS_MAX_NLEN) - printk(KERN_ERR "(bad name length, not printing, bad or corrupted node)"); + pr_err("(bad name length, not printing, bad or corrupted node)"); else { for (i = 0; i < nlen && dent->name[i]; i++) - printk(KERN_CONT "%c", dent->name[i]); + pr_cont("%c", dent->name[i]); } - printk(KERN_CONT "\n"); + pr_cont("\n"); break; } @@ -524,15 +476,13 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; key_read(c, &dn->key, &key); - printk(KERN_ERR "\tkey %s\n", + pr_err("\tkey %s\n", dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); - printk(KERN_ERR "\tsize %u\n", - le32_to_cpu(dn->size)); - printk(KERN_ERR "\tcompr_typ %d\n", + pr_err("\tsize %u\n", le32_to_cpu(dn->size)); + pr_err("\tcompr_typ %d\n", (int)le16_to_cpu(dn->compr_type)); - printk(KERN_ERR "\tdata size %d\n", - dlen); - printk(KERN_ERR "\tdata:\n"); + pr_err("\tdata size %d\n", dlen); + pr_err("\tdata:\n"); print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, (void *)&dn->data, dlen, 0); break; @@ -541,11 +491,10 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) { const struct ubifs_trun_node *trun = node; - printk(KERN_ERR "\tinum %u\n", - le32_to_cpu(trun->inum)); - printk(KERN_ERR "\told_size %llu\n", + pr_err("\tinum %u\n", le32_to_cpu(trun->inum)); + pr_err("\told_size %llu\n", (unsigned long long)le64_to_cpu(trun->old_size)); - printk(KERN_ERR "\tnew_size %llu\n", + pr_err("\tnew_size %llu\n", (unsigned long long)le64_to_cpu(trun->new_size)); break; } @@ -554,17 +503,16 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) const struct ubifs_idx_node *idx = node; n = le16_to_cpu(idx->child_cnt); - printk(KERN_ERR "\tchild_cnt %d\n", n); - printk(KERN_ERR "\tlevel %d\n", - (int)le16_to_cpu(idx->level)); - printk(KERN_ERR "\tBranches:\n"); + pr_err("\tchild_cnt %d\n", n); + pr_err("\tlevel %d\n", (int)le16_to_cpu(idx->level)); + pr_err("\tBranches:\n"); for (i = 0; i < n && i < c->fanout - 1; i++) { const struct ubifs_branch *br; br = ubifs_idx_branch(c, idx, i); key_read(c, &br->key, &key); - printk(KERN_ERR "\t%d: LEB %d:%d len %d key %s\n", + pr_err("\t%d: LEB %d:%d len %d key %s\n", i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), le32_to_cpu(br->len), dbg_snprintf_key(c, &key, key_buf, @@ -578,20 +526,20 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) { const struct ubifs_orph_node *orph = node; - printk(KERN_ERR "\tcommit number %llu\n", + pr_err("\tcommit number %llu\n", (unsigned long long) le64_to_cpu(orph->cmt_no) & LLONG_MAX); - printk(KERN_ERR "\tlast node flag %llu\n", + pr_err("\tlast node flag %llu\n", (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; - printk(KERN_ERR "\t%d orphan inode numbers:\n", n); + pr_err("\t%d orphan inode numbers:\n", n); for (i = 0; i < n; i++) - printk(KERN_ERR "\t ino %llu\n", + pr_err("\t ino %llu\n", (unsigned long long)le64_to_cpu(orph->inos[i])); break; } default: - printk(KERN_ERR "node type %d was not recognized\n", + pr_err("node type %d was not recognized\n", (int)ch->node_type); } spin_unlock(&dbg_lock); @@ -600,16 +548,16 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) void ubifs_dump_budget_req(const struct ubifs_budget_req *req) { spin_lock(&dbg_lock); - printk(KERN_ERR "Budgeting request: new_ino %d, dirtied_ino %d\n", + pr_err("Budgeting request: new_ino %d, dirtied_ino %d\n", req->new_ino, req->dirtied_ino); - printk(KERN_ERR "\tnew_ino_d %d, dirtied_ino_d %d\n", + pr_err("\tnew_ino_d %d, dirtied_ino_d %d\n", req->new_ino_d, req->dirtied_ino_d); - printk(KERN_ERR "\tnew_page %d, dirtied_page %d\n", + pr_err("\tnew_page %d, dirtied_page %d\n", req->new_page, req->dirtied_page); - printk(KERN_ERR "\tnew_dent %d, mod_dent %d\n", + pr_err("\tnew_dent %d, mod_dent %d\n", req->new_dent, req->mod_dent); - printk(KERN_ERR "\tidx_growth %d\n", req->idx_growth); - printk(KERN_ERR "\tdata_growth %d dd_growth %d\n", + pr_err("\tidx_growth %d\n", req->idx_growth); + pr_err("\tdata_growth %d dd_growth %d\n", req->data_growth, req->dd_growth); spin_unlock(&dbg_lock); } @@ -617,11 +565,11 @@ void ubifs_dump_budget_req(const struct ubifs_budget_req *req) void ubifs_dump_lstats(const struct ubifs_lp_stats *lst) { spin_lock(&dbg_lock); - printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, idx_lebs %d\n", + pr_err("(pid %d) Lprops statistics: empty_lebs %d, idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); - printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, total_dirty %lld\n", + pr_err("\ttaken_empty_lebs %d, total_free %lld, total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, lst->total_dirty); - printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, total_dead %lld\n", + pr_err("\ttotal_used %lld, total_dark %lld, total_dead %lld\n", lst->total_used, lst->total_dark, lst->total_dead); spin_unlock(&dbg_lock); } @@ -636,18 +584,17 @@ void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) spin_lock(&c->space_lock); spin_lock(&dbg_lock); - printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, total budget sum %lld\n", + pr_err("(pid %d) Budgeting info: data budget sum %lld, total budget sum %lld\n", current->pid, bi->data_growth + bi->dd_growth, bi->data_growth + bi->dd_growth + bi->idx_growth); - printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, budg_idx_growth %lld\n", + pr_err("\tbudg_data_growth %lld, budg_dd_growth %lld, budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, bi->idx_growth); - printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, uncommitted_idx %lld\n", + pr_err("\tmin_idx_lebs %d, old_idx_sz %llu, uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, bi->uncommitted_idx); - printk(KERN_ERR "\tpage_budget %d, inode_budget %d, dent_budget %d\n", + pr_err("\tpage_budget %d, inode_budget %d, dent_budget %d\n", bi->page_budget, bi->inode_budget, bi->dent_budget); - printk(KERN_ERR "\tnospace %u, nospace_rp %u\n", - bi->nospace, bi->nospace_rp); - printk(KERN_ERR "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", + pr_err("\tnospace %u, nospace_rp %u\n", bi->nospace, bi->nospace_rp); + pr_err("\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", c->dark_wm, c->dead_wm, c->max_idx_node_sz); if (bi != &c->bi) @@ -658,38 +605,37 @@ void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) */ goto out_unlock; - printk(KERN_ERR "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", + pr_err("\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); - printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, clean_zn_cnt %ld\n", + pr_err("\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), atomic_long_read(&c->dirty_zn_cnt), atomic_long_read(&c->clean_zn_cnt)); - printk(KERN_ERR "\tgc_lnum %d, ihead_lnum %d\n", - c->gc_lnum, c->ihead_lnum); + pr_err("\tgc_lnum %d, ihead_lnum %d\n", c->gc_lnum, c->ihead_lnum); /* If we are in R/O mode, journal heads do not exist */ if (c->jheads) for (i = 0; i < c->jhead_cnt; i++) - printk(KERN_ERR "\tjhead %s\t LEB %d\n", + pr_err("\tjhead %s\t LEB %d\n", dbg_jhead(c->jheads[i].wbuf.jhead), c->jheads[i].wbuf.lnum); for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { bud = rb_entry(rb, struct ubifs_bud, rb); - printk(KERN_ERR "\tbud LEB %d\n", bud->lnum); + pr_err("\tbud LEB %d\n", bud->lnum); } list_for_each_entry(bud, &c->old_buds, list) - printk(KERN_ERR "\told bud LEB %d\n", bud->lnum); + pr_err("\told bud LEB %d\n", bud->lnum); list_for_each_entry(idx_gc, &c->idx_gc, list) - printk(KERN_ERR "\tGC'ed idx LEB %d unmap %d\n", + pr_err("\tGC'ed idx LEB %d unmap %d\n", idx_gc->lnum, idx_gc->unmap); - printk(KERN_ERR "\tcommit state %d\n", c->cmt_state); + pr_err("\tcommit state %d\n", c->cmt_state); /* Print budgeting predictions */ available = ubifs_calc_available(c, c->bi.min_idx_lebs); outstanding = c->bi.data_growth + c->bi.dd_growth; free = ubifs_get_free_space_nolock(c); - printk(KERN_ERR "Budgeting predictions:\n"); - printk(KERN_ERR "\tavailable: %lld, outstanding %lld, free %lld\n", + pr_err("Budgeting predictions:\n"); + pr_err("\tavailable: %lld, outstanding %lld, free %lld\n", available, outstanding, free); out_unlock: spin_unlock(&dbg_lock); @@ -709,19 +655,19 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) dark = ubifs_calc_dark(c, spc); if (lp->flags & LPROPS_INDEX) - printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d flags %#x (", + pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d flags %#x (", lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc, lp->flags); else - printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d flags %#-4x (", + pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d flags %#-4x (", lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc, dark, dead, (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags); if (lp->flags & LPROPS_TAKEN) { if (lp->flags & LPROPS_INDEX) - printk(KERN_CONT "index, taken"); + pr_cont("index, taken"); else - printk(KERN_CONT "taken"); + pr_cont("taken"); } else { const char *s; @@ -758,7 +704,7 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) break; } } - printk(KERN_CONT "%s", s); + pr_cont("%s", s); } for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) { @@ -773,19 +719,18 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) */ if (c->jheads && lp->lnum == c->jheads[i].wbuf.lnum) { - printk(KERN_CONT ", jhead %s", - dbg_jhead(i)); + pr_cont(", jhead %s", dbg_jhead(i)); head = 1; } } if (!head) - printk(KERN_CONT ", bud of jhead %s", + pr_cont(", bud of jhead %s", dbg_jhead(bud->jhead)); } } if (lp->lnum == c->gc_lnum) - printk(KERN_CONT ", GC LEB"); - printk(KERN_CONT ")\n"); + pr_cont(", GC LEB"); + pr_cont(")\n"); } void ubifs_dump_lprops(struct ubifs_info *c) @@ -794,8 +739,7 @@ void ubifs_dump_lprops(struct ubifs_info *c) struct ubifs_lprops lp; struct ubifs_lp_stats lst; - printk(KERN_ERR "(pid %d) start dumping LEB properties\n", - current->pid); + pr_err("(pid %d) start dumping LEB properties\n", current->pid); ubifs_get_lp_stats(c, &lst); ubifs_dump_lstats(&lst); @@ -806,8 +750,7 @@ void ubifs_dump_lprops(struct ubifs_info *c) ubifs_dump_lprop(c, &lp); } - printk(KERN_ERR "(pid %d) finish dumping LEB properties\n", - current->pid); + pr_err("(pid %d) finish dumping LEB properties\n", current->pid); } void ubifs_dump_lpt_info(struct ubifs_info *c) @@ -815,35 +758,34 @@ void ubifs_dump_lpt_info(struct ubifs_info *c) int i; spin_lock(&dbg_lock); - printk(KERN_ERR "(pid %d) dumping LPT information\n", current->pid); - printk(KERN_ERR "\tlpt_sz: %lld\n", c->lpt_sz); - printk(KERN_ERR "\tpnode_sz: %d\n", c->pnode_sz); - printk(KERN_ERR "\tnnode_sz: %d\n", c->nnode_sz); - printk(KERN_ERR "\tltab_sz: %d\n", c->ltab_sz); - printk(KERN_ERR "\tlsave_sz: %d\n", c->lsave_sz); - printk(KERN_ERR "\tbig_lpt: %d\n", c->big_lpt); - printk(KERN_ERR "\tlpt_hght: %d\n", c->lpt_hght); - printk(KERN_ERR "\tpnode_cnt: %d\n", c->pnode_cnt); - printk(KERN_ERR "\tnnode_cnt: %d\n", c->nnode_cnt); - printk(KERN_ERR "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); - printk(KERN_ERR "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); - printk(KERN_ERR "\tlsave_cnt: %d\n", c->lsave_cnt); - printk(KERN_ERR "\tspace_bits: %d\n", c->space_bits); - printk(KERN_ERR "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); - printk(KERN_ERR "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); - printk(KERN_ERR "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); - printk(KERN_ERR "\tpcnt_bits: %d\n", c->pcnt_bits); - printk(KERN_ERR "\tlnum_bits: %d\n", c->lnum_bits); - printk(KERN_ERR "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); - printk(KERN_ERR "\tLPT head is at %d:%d\n", + pr_err("(pid %d) dumping LPT information\n", current->pid); + pr_err("\tlpt_sz: %lld\n", c->lpt_sz); + pr_err("\tpnode_sz: %d\n", c->pnode_sz); + pr_err("\tnnode_sz: %d\n", c->nnode_sz); + pr_err("\tltab_sz: %d\n", c->ltab_sz); + pr_err("\tlsave_sz: %d\n", c->lsave_sz); + pr_err("\tbig_lpt: %d\n", c->big_lpt); + pr_err("\tlpt_hght: %d\n", c->lpt_hght); + pr_err("\tpnode_cnt: %d\n", c->pnode_cnt); + pr_err("\tnnode_cnt: %d\n", c->nnode_cnt); + pr_err("\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); + pr_err("\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); + pr_err("\tlsave_cnt: %d\n", c->lsave_cnt); + pr_err("\tspace_bits: %d\n", c->space_bits); + pr_err("\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); + pr_err("\tlpt_offs_bits: %d\n", c->lpt_offs_bits); + pr_err("\tlpt_spc_bits: %d\n", c->lpt_spc_bits); + pr_err("\tpcnt_bits: %d\n", c->pcnt_bits); + pr_err("\tlnum_bits: %d\n", c->lnum_bits); + pr_err("\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); + pr_err("\tLPT head is at %d:%d\n", c->nhead_lnum, c->nhead_offs); - printk(KERN_ERR "\tLPT ltab is at %d:%d\n", - c->ltab_lnum, c->ltab_offs); + pr_err("\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs); if (c->big_lpt) - printk(KERN_ERR "\tLPT lsave is at %d:%d\n", + pr_err("\tLPT lsave is at %d:%d\n", c->lsave_lnum, c->lsave_offs); for (i = 0; i < c->lpt_lebs; i++) - printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d cmt %d\n", + pr_err("\tLPT LEB %d free %d dirty %d tgc %d cmt %d\n", i + c->lpt_first, c->ltab[i].free, c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); spin_unlock(&dbg_lock); @@ -854,12 +796,12 @@ void ubifs_dump_sleb(const struct ubifs_info *c, { struct ubifs_scan_node *snod; - printk(KERN_ERR "(pid %d) start dumping scanned data from LEB %d:%d\n", + pr_err("(pid %d) start dumping scanned data from LEB %d:%d\n", current->pid, sleb->lnum, offs); list_for_each_entry(snod, &sleb->nodes, list) { cond_resched(); - printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", + pr_err("Dumping node at LEB %d:%d len %d\n", sleb->lnum, snod->offs, snod->len); ubifs_dump_node(c, snod->node); } @@ -871,8 +813,7 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) struct ubifs_scan_node *snod; void *buf; - printk(KERN_ERR "(pid %d) start dumping LEB %d\n", - current->pid, lnum); + pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); if (!buf) { @@ -886,18 +827,17 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) goto out; } - printk(KERN_ERR "LEB %d has %d nodes ending at %d\n", lnum, + pr_err("LEB %d has %d nodes ending at %d\n", lnum, sleb->nodes_cnt, sleb->endpt); list_for_each_entry(snod, &sleb->nodes, list) { cond_resched(); - printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", lnum, + pr_err("Dumping node at LEB %d:%d len %d\n", lnum, snod->offs, snod->len); ubifs_dump_node(c, snod->node); } - printk(KERN_ERR "(pid %d) finish dumping LEB %d\n", - current->pid, lnum); + pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); ubifs_scan_destroy(sleb); out: @@ -918,7 +858,7 @@ void ubifs_dump_znode(const struct ubifs_info *c, else zbr = &c->zroot; - printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d child_cnt %d flags %lx\n", + pr_err("znode %p, LEB %d:%d len %d parent %p iip %d level %d child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, zbr->len, znode->parent, znode->iip, znode->level, znode->child_cnt, znode->flags); @@ -927,16 +867,16 @@ void ubifs_dump_znode(const struct ubifs_info *c, return; } - printk(KERN_ERR "zbranches:\n"); + pr_err("zbranches:\n"); for (n = 0; n < znode->child_cnt; n++) { zbr = &znode->zbranch[n]; if (znode->level > 0) - printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key %s\n", + pr_err("\t%d: znode %p LEB %d:%d len %d key %s\n", n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, dbg_snprintf_key(c, &zbr->key, key_buf, DBG_KEY_BUF_LEN)); else - printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key %s\n", + pr_err("\t%d: LNC %p LEB %d:%d len %d key %s\n", n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, dbg_snprintf_key(c, &zbr->key, key_buf, DBG_KEY_BUF_LEN)); @@ -948,16 +888,16 @@ void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) { int i; - printk(KERN_ERR "(pid %d) start dumping heap cat %d (%d elements)\n", + pr_err("(pid %d) start dumping heap cat %d (%d elements)\n", current->pid, cat, heap->cnt); for (i = 0; i < heap->cnt; i++) { struct ubifs_lprops *lprops = heap->arr[i]; - printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d flags %d\n", + pr_err("\t%d. LEB %d hpos %d free %d dirty %d flags %d\n", i, lprops->lnum, lprops->hpos, lprops->free, lprops->dirty, lprops->flags); } - printk(KERN_ERR "(pid %d) finish dumping heap\n", current->pid); + pr_err("(pid %d) finish dumping heap\n", current->pid); } void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, @@ -965,15 +905,15 @@ void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, { int i; - printk(KERN_ERR "(pid %d) dumping pnode:\n", current->pid); - printk(KERN_ERR "\taddress %zx parent %zx cnext %zx\n", + pr_err("(pid %d) dumping pnode:\n", current->pid); + pr_err("\taddress %zx parent %zx cnext %zx\n", (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); - printk(KERN_ERR "\tflags %lu iip %d level %d num %d\n", + pr_err("\tflags %lu iip %d level %d num %d\n", pnode->flags, iip, pnode->level, pnode->num); for (i = 0; i < UBIFS_LPT_FANOUT; i++) { struct ubifs_lprops *lp = &pnode->lprops[i]; - printk(KERN_ERR "\t%d: free %d dirty %d flags %d lnum %d\n", + pr_err("\t%d: free %d dirty %d flags %d lnum %d\n", i, lp->free, lp->dirty, lp->flags, lp->lnum); } } @@ -983,20 +923,20 @@ void ubifs_dump_tnc(struct ubifs_info *c) struct ubifs_znode *znode; int level; - printk(KERN_ERR "\n"); - printk(KERN_ERR "(pid %d) start dumping TNC tree\n", current->pid); + pr_err("\n"); + pr_err("(pid %d) start dumping TNC tree\n", current->pid); znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); level = znode->level; - printk(KERN_ERR "== Level %d ==\n", level); + pr_err("== Level %d ==\n", level); while (znode) { if (level != znode->level) { level = znode->level; - printk(KERN_ERR "== Level %d ==\n", level); + pr_err("== Level %d ==\n", level); } ubifs_dump_znode(c, znode); znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); } - printk(KERN_ERR "(pid %d) finish dumping TNC tree\n", current->pid); + pr_err("(pid %d) finish dumping TNC tree\n", current->pid); } static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 46b8dd123585..38230b1e544e 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -150,7 +150,7 @@ struct ubifs_global_debug_info { #define ubifs_assert(expr) do { \ if (unlikely(!(expr))) { \ - printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ + pr_crit("UBIFS assert failed in %s at %u (pid %d)\n", \ __func__, __LINE__, current->pid); \ dump_stack(); \ } \ @@ -159,7 +159,7 @@ struct ubifs_global_debug_info { #define ubifs_assert_cmt_locked(c) do { \ if (unlikely(down_write_trylock(&(c)->commit_sem))) { \ up_write(&(c)->commit_sem); \ - printk(KERN_CRIT "commit lock is not locked!\n"); \ + pr_crit("commit lock is not locked!\n"); \ ubifs_assert(0); \ } \ } while (0) @@ -178,7 +178,7 @@ struct ubifs_global_debug_info { /* Just a debugging messages not related to any specific UBIFS subsystem */ #define dbg_msg(fmt, ...) \ - printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ + pr_err("UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ __func__, ##__VA_ARGS__) /* General messages */ diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 74082553e1fc..a1de3cf9dba2 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -1886,8 +1886,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) int err, len = c->leb_size, node_type, node_num, node_len, offs; void *buf, *p; - printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", - current->pid, lnum); + pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); if (!buf) { ubifs_err("cannot allocate memory to dump LPT"); @@ -1905,14 +1904,14 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) pad_len = get_pad_len(c, p, len); if (pad_len) { - printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", + pr_err("LEB %d:%d, pad %d bytes\n", lnum, offs, pad_len); p += pad_len; len -= pad_len; continue; } if (len) - printk(KERN_DEBUG "LEB %d:%d, free %d bytes\n", + pr_err("LEB %d:%d, free %d bytes\n", lnum, offs, len); break; } @@ -1923,11 +1922,10 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) { node_len = c->pnode_sz; if (c->big_lpt) - printk(KERN_DEBUG "LEB %d:%d, pnode num %d\n", + pr_err("LEB %d:%d, pnode num %d\n", lnum, offs, node_num); else - printk(KERN_DEBUG "LEB %d:%d, pnode\n", - lnum, offs); + pr_err("LEB %d:%d, pnode\n", lnum, offs); break; } case UBIFS_LPT_NNODE: @@ -1937,29 +1935,28 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) node_len = c->nnode_sz; if (c->big_lpt) - printk(KERN_DEBUG "LEB %d:%d, nnode num %d, ", + pr_err("LEB %d:%d, nnode num %d, ", lnum, offs, node_num); else - printk(KERN_DEBUG "LEB %d:%d, nnode, ", + pr_err("LEB %d:%d, nnode, ", lnum, offs); err = ubifs_unpack_nnode(c, p, &nnode); for (i = 0; i < UBIFS_LPT_FANOUT; i++) { - printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, + pr_cont("%d:%d", nnode.nbranch[i].lnum, nnode.nbranch[i].offs); if (i != UBIFS_LPT_FANOUT - 1) - printk(KERN_CONT ", "); + pr_cont(", "); } - printk(KERN_CONT "\n"); + pr_cont("\n"); break; } case UBIFS_LPT_LTAB: node_len = c->ltab_sz; - printk(KERN_DEBUG "LEB %d:%d, ltab\n", - lnum, offs); + pr_err("LEB %d:%d, ltab\n", lnum, offs); break; case UBIFS_LPT_LSAVE: node_len = c->lsave_sz; - printk(KERN_DEBUG "LEB %d:%d, lsave len\n", lnum, offs); + pr_err("LEB %d:%d, lsave len\n", lnum, offs); break; default: ubifs_err("LPT node type %d not recognized", node_type); @@ -1970,8 +1967,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) len -= node_len; } - printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", - current->pid, lnum); + pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); out: vfree(buf); return; @@ -1988,12 +1984,10 @@ void ubifs_dump_lpt_lebs(const struct ubifs_info *c) { int i; - printk(KERN_DEBUG "(pid %d) start dumping all LPT LEBs\n", - current->pid); + pr_err("(pid %d) start dumping all LPT LEBs\n", current->pid); for (i = 0; i < c->lpt_lebs; i++) dump_lpt_leb(c, i + c->lpt_first); - printk(KERN_DEBUG "(pid %d) finish dumping all LPT LEBs\n", - current->pid); + pr_err("(pid %d) finish dumping all LPT LEBs\n", current->pid); } /** diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 1e5a08623d11..e6a9275fadb0 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -42,16 +42,15 @@ #define UBIFS_VERSION 1 /* Normal UBIFS messages */ -#define ubifs_msg(fmt, ...) \ - printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__) +#define ubifs_msg(fmt, ...) pr_notice("UBIFS: " fmt "\n", ##__VA_ARGS__) /* UBIFS error messages */ -#define ubifs_err(fmt, ...) \ - printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ +#define ubifs_err(fmt, ...) \ + pr_err("UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ __func__, ##__VA_ARGS__) /* UBIFS warning messages */ -#define ubifs_warn(fmt, ...) \ - printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \ - current->pid, __func__, ##__VA_ARGS__) +#define ubifs_warn(fmt, ...) \ + pr_warn("UBIFS warning (pid %d): %s: " fmt "\n", \ + current->pid, __func__, ##__VA_ARGS__) /* UBIFS file system VFS magic number */ #define UBIFS_SUPER_MAGIC 0x24051905 -- cgit v1.2.3 From 3668b70fcf1fdc6799abf15f70fe3f50f407ec82 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 27 Aug 2012 16:56:58 +0300 Subject: UBIFS: print less MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UBIFS currently prints a lot of information when it mounts a volume, which bothers some people. Make it less chatty - print only important information by default. Get rid of 'dbg_msg()' macro completely. Reported-by: Uwe Kleine-König Signed-off-by: Artem Bityutskiy --- fs/ubifs/commit.c | 6 ++-- fs/ubifs/debug.h | 5 --- fs/ubifs/lprops.c | 8 ++--- fs/ubifs/lpt.c | 2 +- fs/ubifs/lpt_commit.c | 12 +++---- fs/ubifs/replay.c | 2 +- fs/ubifs/super.c | 91 ++++++++++++++++++++++++--------------------------- fs/ubifs/tnc_misc.c | 4 +-- 8 files changed, 60 insertions(+), 70 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index a054cafd614b..ff8229340cd5 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -293,8 +293,8 @@ int ubifs_bg_thread(void *info) int err; struct ubifs_info *c = info; - dbg_msg("background thread \"%s\" started, PID %d", - c->bgt_name, current->pid); + ubifs_msg("background thread \"%s\" started, PID %d", + c->bgt_name, current->pid); set_freezable(); while (1) { @@ -328,7 +328,7 @@ int ubifs_bg_thread(void *info) cond_resched(); } - dbg_msg("background thread \"%s\" stops", c->bgt_name); + ubifs_msg("background thread \"%s\" stops", c->bgt_name); return 0; } diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 38230b1e544e..e03d5179769a 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -176,11 +176,6 @@ struct ubifs_global_debug_info { dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \ } while (0) -/* Just a debugging messages not related to any specific UBIFS subsystem */ -#define dbg_msg(fmt, ...) \ - pr_err("UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ - __func__, ##__VA_ARGS__) - /* General messages */ #define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) /* Additional journal messages */ diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 132b8e3e7b0b..e5a2a35a46dc 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -982,9 +982,9 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, goto out; } if (lprops != lp) { - dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", - (size_t)lprops, (size_t)lp, lprops->lnum, - lp->lnum); + ubifs_err("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", + (size_t)lprops, (size_t)lp, lprops->lnum, + lp->lnum); err = 4; goto out; } @@ -1002,7 +1002,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, } out: if (err) { - dbg_msg("failed cat %d hpos %d err %d", cat, i, err); + ubifs_err("failed cat %d hpos %d err %d", cat, i, err); dump_stack(); ubifs_dump_heap(c, heap, cat); } diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index c3e6bbf13ba5..d46b19ec1815 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1311,7 +1311,7 @@ out: ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); ubifs_dump_pnode(c, pnode, parent, iip); dump_stack(); - dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); + ubifs_err("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); kfree(pnode); return err; } diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index a1de3cf9dba2..9daaeef675dd 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -1662,19 +1662,19 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) continue; } if (!dbg_is_all_ff(p, len)) { - dbg_msg("invalid empty space in LEB %d at %d", - lnum, c->leb_size - len); + ubifs_err("invalid empty space in LEB %d at %d", + lnum, c->leb_size - len); err = -EINVAL; } i = lnum - c->lpt_first; if (len != c->ltab[i].free) { - dbg_msg("invalid free space in LEB %d (free %d, expected %d)", - lnum, len, c->ltab[i].free); + ubifs_err("invalid free space in LEB %d (free %d, expected %d)", + lnum, len, c->ltab[i].free); err = -EINVAL; } if (dirty != c->ltab[i].dirty) { - dbg_msg("invalid dirty space in LEB %d (dirty %d, expected %d)", - lnum, dirty, c->ltab[i].dirty); + ubifs_err("invalid dirty space in LEB %d (dirty %d, expected %d)", + lnum, dirty, c->ltab[i].dirty); err = -EINVAL; } goto out; diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index ede6e5835ced..3187925e9879 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -141,7 +141,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) * during the replay. */ if (dirty != 0) - dbg_msg("LEB %d lp: %d free %d dirty replay: %d free %d dirty", + dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty", b->bud->lnum, lp->free, lp->dirty, b->free, b->dirty); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 448bf24ef8d0..7d51f2802bda 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1160,7 +1160,7 @@ static int check_free_space(struct ubifs_info *c) static int mount_ubifs(struct ubifs_info *c) { int err; - long long x; + long long x, y; size_t sz; c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY); @@ -1410,74 +1410,69 @@ static int mount_ubifs(struct ubifs_info *c) c->mounting = 0; - ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", - c->vi.ubi_num, c->vi.vol_id, c->vi.name); - if (c->ro_mount) - ubifs_msg("mounted read-only"); + ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", + c->vi.ubi_num, c->vi.vol_id, c->vi.name, + c->ro_mount ? ", R/O mode" : NULL); x = (long long)c->main_lebs * c->leb_size; - ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", - x, x >> 10, x >> 20, c->main_lebs); - x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; - ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", - x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); - ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)", + y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; + ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", + c->leb_size, c->leb_size >> 10, c->min_io_size, + c->max_write_size); + ubifs_msg("FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)", + x, x >> 20, c->main_lebs, + y, y >> 20, c->log_lebs + c->max_bud_cnt); + ubifs_msg("reserved for root: %llu bytes (%llu KiB)", + c->report_rp_size, c->report_rp_size >> 10); + ubifs_msg("media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s", c->fmt_version, c->ro_compat_version, - UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); - ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); - ubifs_msg("reserved for root: %llu bytes (%llu KiB)", - c->report_rp_size, c->report_rp_size >> 10); - - dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); - dbg_msg("max. write size: %d bytes", c->max_write_size); - dbg_msg("LEB size: %d bytes (%d KiB)", - c->leb_size, c->leb_size >> 10); - dbg_msg("data journal heads: %d", + UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid, + c->big_lpt ? ", big LPT model" : ", small LPT model"); + + dbg_gen("default compressor: %s", ubifs_compr_name(c->default_compr)); + dbg_gen("data journal heads: %d", c->jhead_cnt - NONDATA_JHEADS_CNT); - dbg_msg("UUID: %pUB", c->uuid); - dbg_msg("big_lpt %d", c->big_lpt); - dbg_msg("log LEBs: %d (%d - %d)", + dbg_gen("log LEBs: %d (%d - %d)", c->log_lebs, UBIFS_LOG_LNUM, c->log_last); - dbg_msg("LPT area LEBs: %d (%d - %d)", + dbg_gen("LPT area LEBs: %d (%d - %d)", c->lpt_lebs, c->lpt_first, c->lpt_last); - dbg_msg("orphan area LEBs: %d (%d - %d)", + dbg_gen("orphan area LEBs: %d (%d - %d)", c->orph_lebs, c->orph_first, c->orph_last); - dbg_msg("main area LEBs: %d (%d - %d)", + dbg_gen("main area LEBs: %d (%d - %d)", c->main_lebs, c->main_first, c->leb_cnt - 1); - dbg_msg("index LEBs: %d", c->lst.idx_lebs); - dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", + dbg_gen("index LEBs: %d", c->lst.idx_lebs); + dbg_gen("total index bytes: %lld (%lld KiB, %lld MiB)", c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, c->bi.old_idx_sz >> 20); - dbg_msg("key hash type: %d", c->key_hash_type); - dbg_msg("tree fanout: %d", c->fanout); - dbg_msg("reserved GC LEB: %d", c->gc_lnum); - dbg_msg("first main LEB: %d", c->main_first); - dbg_msg("max. znode size %d", c->max_znode_sz); - dbg_msg("max. index node size %d", c->max_idx_node_sz); - dbg_msg("node sizes: data %zu, inode %zu, dentry %zu", + dbg_gen("key hash type: %d", c->key_hash_type); + dbg_gen("tree fanout: %d", c->fanout); + dbg_gen("reserved GC LEB: %d", c->gc_lnum); + dbg_gen("max. znode size %d", c->max_znode_sz); + dbg_gen("max. index node size %d", c->max_idx_node_sz); + dbg_gen("node sizes: data %zu, inode %zu, dentry %zu", UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ); - dbg_msg("node sizes: trun %zu, sb %zu, master %zu", + dbg_gen("node sizes: trun %zu, sb %zu, master %zu", UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); - dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", + dbg_gen("node sizes: ref %zu, cmt. start %zu, orph %zu", UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); - dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", + dbg_gen("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); - dbg_msg("dead watermark: %d", c->dead_wm); - dbg_msg("dark watermark: %d", c->dark_wm); - dbg_msg("LEB overhead: %d", c->leb_overhead); + dbg_gen("dead watermark: %d", c->dead_wm); + dbg_gen("dark watermark: %d", c->dark_wm); + dbg_gen("LEB overhead: %d", c->leb_overhead); x = (long long)c->main_lebs * c->dark_wm; - dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", + dbg_gen("max. dark space: %lld (%lld KiB, %lld MiB)", x, x >> 10, x >> 20); - dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)", + dbg_gen("maximum bud bytes: %lld (%lld KiB, %lld MiB)", c->max_bud_bytes, c->max_bud_bytes >> 10, c->max_bud_bytes >> 20); - dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", + dbg_gen("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", c->bg_bud_bytes, c->bg_bud_bytes >> 10, c->bg_bud_bytes >> 20); - dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)", + dbg_gen("current bud bytes %lld (%lld KiB, %lld MiB)", c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20); - dbg_msg("max. seq. number: %llu", c->max_sqnum); - dbg_msg("commit number: %llu", c->cmt_no); + dbg_gen("max. seq. number: %llu", c->max_sqnum); + dbg_gen("commit number: %llu", c->cmt_no); return 0; diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index d38ac7f9654b..f6bf8995c7b1 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c @@ -328,8 +328,8 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, case UBIFS_XENT_KEY: break; default: - dbg_msg("bad key type at slot %d: %d", - i, key_type(c, &zbr->key)); + ubifs_err("bad key type at slot %d: %d", + i, key_type(c, &zbr->key)); err = 3; goto out_dump; } -- cgit v1.2.3 From b4a871bce619dc5ca03cc6c78e1c467ceacb8e7e Mon Sep 17 00:00:00 2001 From: Jovi Zhang Date: Mon, 20 Aug 2012 14:58:26 +0800 Subject: pstore/ram: Add missing platform_device_unregister We need to unregister platform device when module exit, this commit fixes the issue. Signed-off-by: Jovi Zhang Acked-by: Kees Cook Signed-off-by: Anton Vorontsov --- fs/pstore/ram.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 91016049e551..1a4f6da58eab 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -538,6 +538,7 @@ postcore_initcall(ramoops_init); static void __exit ramoops_exit(void) { platform_driver_unregister(&ramoops_driver); + platform_device_unregister(dummy); kfree(dummy_data); } module_exit(ramoops_exit); -- cgit v1.2.3 From 8bad3c024496e30948f576485c10ecc9ebdfe41d Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 18 Jun 2012 12:14:23 +0800 Subject: btrfs: fix comment typo in btrfs_finish_ordered_io Fix typo errors in comments of btrfs_finish_ordered_io. Signed-off-by: Liu Bo Signed-off-by: Jiri Kosina --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fb8d671d00e6..d8ed4fda0015 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1969,8 +1969,8 @@ out: ordered_extent->len - 1, NULL, GFP_NOFS); /* - * This needs to be dont to make sure anybody waiting knows we are done - * upating everything for this ordered extent. + * This needs to be done to make sure anybody waiting knows we are done + * updating everything for this ordered extent. */ btrfs_remove_ordered_extent(inode, ordered_extent); -- cgit v1.2.3 From 1856b225ca1f80446938c9ec4a0b330c1772ec45 Mon Sep 17 00:00:00 2001 From: Peter Meerwald Date: Sat, 18 Aug 2012 17:38:54 +0200 Subject: nfs: comment fix Signed-off-by: Peter Meerwald Signed-off-by: Jiri Kosina --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 8b2a2977b720..120d8e98ee5f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1590,7 +1590,7 @@ static int nfs_parse_mount_options(char *raw, /* * verify that any proto=/mountproto= options match the address - * familiies in the addr=/mountaddr= options. + * families in the addr=/mountaddr= options. */ if (protofamily != AF_UNSPEC && protofamily != mnt->nfs_server.address.ss_family) -- cgit v1.2.3 From 4907cb7b193a4f91c1fd30cf679c035e3644c64d Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Sat, 1 Sep 2012 10:31:09 -0700 Subject: treewide: fix comment/printk/variable typos Signed-off-by: Anatol Pomozov Signed-off-by: Jiri Kosina --- drivers/dma/intel_mid_dma.c | 2 +- drivers/net/ethernet/3com/typhoon.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h | 2 +- drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c | 6 +++--- drivers/net/wireless/rtlwifi/rtl8192de/fw.c | 6 +++--- drivers/scsi/bfa/bfa_ioc.c | 2 +- drivers/scsi/bfa/bfa_ioc.h | 2 +- drivers/scsi/isci/init.c | 2 +- drivers/scsi/isci/request.c | 2 +- drivers/scsi/isci/task.c | 2 +- drivers/scsi/lpfc/lpfc_init.c | 2 +- drivers/scsi/lpfc/lpfc_sli.c | 4 ++-- drivers/video/exynos/exynos_mipi_dsi.c | 2 +- drivers/w1/masters/ds1wm.c | 2 +- fs/ext2/balloc.c | 2 +- fs/ext3/balloc.c | 2 +- fs/ext3/inode.c | 2 +- fs/ext4/inode.c | 14 +++++++------- fs/ext4/mballoc.c | 2 +- include/trace/define_trace.h | 2 +- net/8021q/vlanproc.c | 2 +- 22 files changed, 33 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c index 222e907bfaaa..02b21d7d38e5 100644 --- a/drivers/dma/intel_mid_dma.c +++ b/drivers/dma/intel_mid_dma.c @@ -427,7 +427,7 @@ DMA engine callback Functions*/ * intel_mid_dma_tx_submit - callback to submit DMA transaction * @tx: dma engine descriptor * - * Submit the DMA trasaction for this descriptor, start if ch idle + * Submit the DMA transaction for this descriptor, start if ch idle */ static dma_cookie_t intel_mid_dma_tx_submit(struct dma_async_tx_descriptor *tx) { diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index b15366635147..bb9670f29b59 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -364,7 +364,7 @@ typhoon_inc_rxfree_index(u32 *index, const int count) static inline void typhoon_inc_tx_index(u32 *index, const int count) { - /* if we start using the Hi Tx ring, this needs updateing */ + /* if we start using the Hi Tx ring, this needs updating */ typhoon_inc_index(index, count, TXLO_ENTRIES); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 77bcd4cb4ffb..dfa3ee2ddba4 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1458,7 +1458,7 @@ struct bnx2x { int fw_stats_req_sz; /* - * FW statistics data shortcut (points at the begining of + * FW statistics data shortcut (points at the beginning of * fw_stats buffer + fw_stats_req_sz). */ struct bnx2x_fw_stats_data *fw_stats_data; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index f83e033da6da..acf2fe4ca608 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -1321,7 +1321,7 @@ void bnx2x_init_mcast_obj(struct bnx2x *bp, * the current command will be enqueued to the tail of the * pending commands list. * - * Return: 0 is operation was sucessfull and there are no pending completions, + * Return: 0 is operation was successfull and there are no pending completions, * negative if there were errors, positive if there are pending * completions. */ diff --git a/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c b/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c index 44febfde9493..8a7b864faca3 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c +++ b/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c @@ -315,7 +315,7 @@ static void _rtl92c_fill_h2c_command(struct ieee80211_hw *hw, u16 box_reg = 0, box_extreg = 0; u8 u1b_tmp; bool isfw_read = false; - bool bwrite_sucess = false; + bool bwrite_success = false; u8 wait_h2c_limmit = 100; u8 wait_writeh2c_limmit = 100; u8 boxcontent[4], boxextcontent[2]; @@ -354,7 +354,7 @@ static void _rtl92c_fill_h2c_command(struct ieee80211_hw *hw, } } - while (!bwrite_sucess) { + while (!bwrite_success) { wait_writeh2c_limmit--; if (wait_writeh2c_limmit == 0) { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, @@ -491,7 +491,7 @@ static void _rtl92c_fill_h2c_command(struct ieee80211_hw *hw, break; } - bwrite_sucess = true; + bwrite_success = true; rtlhal->last_hmeboxnum = boxnum + 1; if (rtlhal->last_hmeboxnum == 4) diff --git a/drivers/net/wireless/rtlwifi/rtl8192de/fw.c b/drivers/net/wireless/rtlwifi/rtl8192de/fw.c index 895ae6c1f354..eb22dccc418b 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192de/fw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192de/fw.c @@ -365,7 +365,7 @@ static void _rtl92d_fill_h2c_command(struct ieee80211_hw *hw, u8 u1b_tmp; bool isfw_read = false; u8 buf_index = 0; - bool bwrite_sucess = false; + bool bwrite_success = false; u8 wait_h2c_limmit = 100; u8 wait_writeh2c_limmit = 100; u8 boxcontent[4], boxextcontent[2]; @@ -408,7 +408,7 @@ static void _rtl92d_fill_h2c_command(struct ieee80211_hw *hw, break; } } - while (!bwrite_sucess) { + while (!bwrite_success) { wait_writeh2c_limmit--; if (wait_writeh2c_limmit == 0) { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, @@ -515,7 +515,7 @@ static void _rtl92d_fill_h2c_command(struct ieee80211_hw *hw, "switch case not processed\n"); break; } - bwrite_sucess = true; + bwrite_success = true; rtlhal->last_hmeboxnum = boxnum + 1; if (rtlhal->last_hmeboxnum == 4) rtlhal->last_hmeboxnum = 0; diff --git a/drivers/scsi/bfa/bfa_ioc.c b/drivers/scsi/bfa/bfa_ioc.c index 14e6284e48e4..6faff0053370 100644 --- a/drivers/scsi/bfa/bfa_ioc.c +++ b/drivers/scsi/bfa/bfa_ioc.c @@ -5587,7 +5587,7 @@ static bfa_status_t bfa_dconf_flash_write(struct bfa_dconf_mod_s *dconf); static void bfa_dconf_init_cb(void *arg, bfa_status_t status); /* - * Begining state of dconf module. Waiting for an event to start. + * Beginning state of dconf module. Waiting for an event to start. */ static void bfa_dconf_sm_uninit(struct bfa_dconf_mod_s *dconf, enum bfa_dconf_event event) diff --git a/drivers/scsi/bfa/bfa_ioc.h b/drivers/scsi/bfa/bfa_ioc.h index 1a99d4b5b50f..7b916e04ca56 100644 --- a/drivers/scsi/bfa/bfa_ioc.h +++ b/drivers/scsi/bfa/bfa_ioc.h @@ -530,7 +530,7 @@ struct bfa_diag_results_fwping { struct bfa_diag_qtest_result_s { u32 status; - u16 count; /* sucessful queue test count */ + u16 count; /* successful queue test count */ u8 queue; u8 rsvd; /* 64-bit align */ }; diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index 47e28b555029..8912567988cf 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -219,7 +219,7 @@ static struct sas_domain_function_template isci_transport_ops = { * @isci_host: This parameter specifies the lldd specific wrapper for the * libsas sas_ha struct. * - * This method returns an error code indicating sucess or failure. The user + * This method returns an error code indicating success or failure. The user * should check for possible memory allocation error return otherwise, a zero * indicates success. */ diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index 7a0431c73493..c1bafc3f3fb1 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -2240,7 +2240,7 @@ static enum sci_status atapi_data_tc_completion_handler(struct isci_request *ire status = ireq->sci_status; sci_change_state(&idev->sm, SCI_STP_DEV_ATAPI_ERROR); } else { - /* If receiving any non-sucess TC status, no UF + /* If receiving any non-success TC status, no UF * received yet, then an UF for the status fis * is coming after (XXX: suspect this is * actually a protocol error or a bug like the diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c index 6bc74eb012c9..b6f19a1db780 100644 --- a/drivers/scsi/isci/task.c +++ b/drivers/scsi/isci/task.c @@ -532,7 +532,7 @@ int isci_task_abort_task(struct sas_task *task) /* The request has already completed and there * is nothing to do here other than to set the task * done bit, and indicate that the task abort function - * was sucessful. + * was successful. */ spin_lock_irqsave(&task->task_state_lock, flags); task->task_state_flags |= SAS_TASK_STATE_DONE; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 411ed48d79da..be43b4f6bd70 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -6470,7 +6470,7 @@ out_error: * we just use some constant number as place holder. * * Return codes - * 0 - sucessful + * 0 - successful * -ENOMEM - No availble memory * -EIO - The mailbox failed to complete successfully. **/ diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index b4720a109817..7983abbb7aaf 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -4739,7 +4739,7 @@ lpfc_sli4_read_rev(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq, * is attached to. * * Return codes - * 0 - sucessful + * 0 - successful * otherwise - failed to retrieve physical port name **/ static int @@ -15118,7 +15118,7 @@ lpfc_check_next_fcf_pri_level(struct lpfc_hba *phba) /* * if next_fcf_pri was not set above and the list is not empty then * we have failed flogis on all of them. So reset flogi failed - * and start at the begining. + * and start at the beginning. */ if (!next_fcf_pri && !list_empty(&phba->fcf.fcf_pri_list)) { list_for_each_entry(fcf_pri, &phba->fcf.fcf_pri_list, list) { diff --git a/drivers/video/exynos/exynos_mipi_dsi.c b/drivers/video/exynos/exynos_mipi_dsi.c index 9908e75ae761..752a12e625e4 100644 --- a/drivers/video/exynos/exynos_mipi_dsi.c +++ b/drivers/video/exynos/exynos_mipi_dsi.c @@ -461,7 +461,7 @@ static int exynos_mipi_dsi_probe(struct platform_device *pdev) done: platform_set_drvdata(pdev, dsim); - dev_dbg(&pdev->dev, "%s() completed sucessfuly (%s mode)\n", __func__, + dev_dbg(&pdev->dev, "%s() completed successfully (%s mode)\n", __func__, dsim_config->e_interface == DSIM_COMMAND ? "CPU" : "RGB"); return 0; diff --git a/drivers/w1/masters/ds1wm.c b/drivers/w1/masters/ds1wm.c index a0c8965c1a79..649eb0b2d3ff 100644 --- a/drivers/w1/masters/ds1wm.c +++ b/drivers/w1/masters/ds1wm.c @@ -347,7 +347,7 @@ static void ds1wm_search(void *data, struct w1_master *master_dev, "pass: %d entering ASM\n", pass); ds1wm_write_register(ds1wm_data, DS1WM_CMD, DS1WM_CMD_SRA); dev_dbg(&ds1wm_data->pdev->dev, - "pass: %d begining nibble loop\n", pass); + "pass: %d beginning nibble loop\n", pass); r_prime = 0; d = 0; diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 1c3613998862..905166a3ffb2 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -479,7 +479,7 @@ void ext2_discard_reservation(struct inode *inode) /** * ext2_free_blocks() -- Free given blocks and update quota and i_blocks * @inode: inode - * @block: start physcial block to free + * @block: start physical block to free * @count: number of blocks to free */ void ext2_free_blocks (struct inode * inode, unsigned long block, diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 25cd60892116..7c6a31d44daf 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -483,7 +483,7 @@ void ext3_discard_reservation(struct inode *inode) * ext3_free_blocks_sb() -- Free given blocks and update quota * @handle: handle to this transaction * @sb: super block - * @block: start physcial block to free + * @block: start physical block to free * @count: number of blocks to free * @pdquot_freed_blocks: pointer to quota */ diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 9a4a5c48b1c9..3aff616d4d03 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3196,7 +3196,7 @@ out_brelse: * * - Within generic_file_write() for O_SYNC files. * Here, there will be no transaction running. We wait for any running - * trasnaction to commit. + * transaction to commit. * * - Within sys_sync(), kupdate and such. * We wait on commit, if tol to. diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 02bc8cbe7281..189dae6b0964 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3227,7 +3227,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle, * handle: The journal handle * inode: The files inode * page: A locked page that contains the offset "from" - * from: The starting byte offset (from the begining of the file) + * from: The starting byte offset (from the beginning of the file) * to begin discarding * len: The length of bytes to discard * flags: Optional flags that may be used: @@ -3235,11 +3235,11 @@ int ext4_discard_partial_page_buffers(handle_t *handle, * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED * Only zero the regions of the page whose buffer heads * have already been unmapped. This flag is appropriate - * for updateing the contents of a page whose blocks may + * for updating the contents of a page whose blocks may * have already been released, and we only want to zero * out the regions that correspond to those released blocks. * - * Returns zero on sucess or negative on failure. + * Returns zero on success or negative on failure. */ static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, struct inode *inode, struct page *page, loff_t from, @@ -3400,7 +3400,7 @@ int ext4_can_truncate(struct inode *inode) * @offset: The offset where the hole will begin * @len: The length of the hole * - * Returns: 0 on sucess or negative on failure + * Returns: 0 on success or negative on failure */ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) @@ -3922,7 +3922,7 @@ static int ext4_inode_blocks_set(handle_t *handle, if (i_blocks <= ~0U) { /* - * i_blocks can be represnted in a 32 bit variable + * i_blocks can be represented in a 32 bit variable * as multiple of 512 bytes */ raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); @@ -4083,7 +4083,7 @@ out_brelse: * * - Within generic_file_write() for O_SYNC files. * Here, there will be no transaction running. We wait for any running - * trasnaction to commit. + * transaction to commit. * * - Within sys_sync(), kupdate and such. * We wait on commit, if tol to. @@ -4327,7 +4327,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) * worse case, the indexs blocks spread over different block groups * * If datablocks are discontiguous, they are possible to spread over - * different block groups too. If they are contiuguous, with flexbg, + * different block groups too. If they are contiguous, with flexbg, * they could still across block group boundary. * * Also account for superblock, inode, quota and xattr blocks diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1cd6994fc446..86831411e98b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4705,7 +4705,7 @@ error_return: * ext4_group_add_blocks() -- Add given blocks to an existing group * @handle: handle to this transaction * @sb: super block - * @block: start physcial block to add to the block group + * @block: start physical block to add to the block group * @count: number of blocks to free * * This marks the blocks as free in the bitmap and buddy. diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index b0b4eb24d592..1905ca8dd399 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -1,5 +1,5 @@ /* - * Trace files that want to automate creationg of all tracepoints defined + * Trace files that want to automate creation of all tracepoints defined * in their file should include this file. The following are macros that the * trace file may define: * diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index c718fd3664b6..4de77ea5fa37 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -105,7 +105,7 @@ static const struct file_operations vlandev_fops = { }; /* - * Proc filesystem derectory entries. + * Proc filesystem directory entries. */ /* Strings */ -- cgit v1.2.3 From 381bf7cad9dbce701c618f8942fd35954952ef39 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Tue, 28 Aug 2012 10:38:03 +0200 Subject: fuse: mark variables uninitialized gcc 4.6.3 complains about uninitialized variables in fs/fuse/control.c: CC fs/fuse/control.o fs/fuse/control.c: In function 'fuse_conn_congestion_threshold_write': fs/fuse/control.c:165:29: warning: 'val' may be used uninitialized in this function [-Wuninitialized] fs/fuse/control.c: In function 'fuse_conn_max_background_write': fs/fuse/control.c:128:23: warning: 'val' may be used uninitialized in this function [-Wuninitialized] fuse_conn_limit_write() will always return non-zero unless the &val is modified, so the warning is misleading. Let the compiler know about it by marking 'val' with 'uninitialized_var'. Signed-off-by: Daniel Mack Cc: Brian Foster Signed-off-by: Miklos Szeredi --- fs/fuse/control.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 03ff5b1eba93..75a20c092dd4 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -117,7 +117,7 @@ static ssize_t fuse_conn_max_background_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - unsigned val; + unsigned uninitialized_var(val); ssize_t ret; ret = fuse_conn_limit_write(file, buf, count, ppos, &val, @@ -154,7 +154,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - unsigned val; + unsigned uninitialized_var(val); ssize_t ret; ret = fuse_conn_limit_write(file, buf, count, ppos, &val, -- cgit v1.2.3 From 156bddd8e505b295540f3ca0e27dda68cb0d49aa Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 3 Sep 2012 16:50:42 +0200 Subject: ext3: Fix fdatasync() for files with only i_size changes Code tracking when transaction needs to be committed on fdatasync(2) forgets to handle a situation when only inode's i_size is changed. Thus in such situations fdatasync(2) doesn't force transaction with new i_size to disk and that can result in wrong i_size after a crash. Fix the issue by updating inode's i_datasync_tid whenever its size is updated. CC: # >= 2.6.32 Reported-by: Kristian Nielsen Signed-off-by: Jan Kara --- fs/ext3/inode.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index a07597307fd1..ff574b4e345e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3072,6 +3072,8 @@ static int ext3_do_update_inode(handle_t *handle, struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; + int need_datasync = 0; + __le32 disksize; uid_t i_uid; gid_t i_gid; @@ -3113,7 +3115,11 @@ again: raw_inode->i_gid_high = 0; } raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); - raw_inode->i_size = cpu_to_le32(ei->i_disksize); + disksize = cpu_to_le32(ei->i_disksize); + if (disksize != raw_inode->i_size) { + need_datasync = 1; + raw_inode->i_size = disksize; + } raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); @@ -3129,8 +3135,11 @@ again: if (!S_ISREG(inode->i_mode)) { raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); } else { - raw_inode->i_size_high = - cpu_to_le32(ei->i_disksize >> 32); + disksize = cpu_to_le32(ei->i_disksize >> 32); + if (disksize != raw_inode->i_size_high) { + raw_inode->i_size_high = disksize; + need_datasync = 1; + } if (ei->i_disksize > 0x7fffffffULL) { struct super_block *sb = inode->i_sb; if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, @@ -3183,6 +3192,8 @@ again: ext3_clear_inode_state(inode, EXT3_STATE_NEW); atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); + if (need_datasync) + atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); out_brelse: brelse (bh); ext3_std_error(inode->i_sb, err); -- cgit v1.2.3 From da02eb72f150bef1d281d93b3e4716ce374c4510 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Thu, 23 Aug 2012 17:28:57 +0200 Subject: reiserfs: Make reiserfs_xattr_handlers static Silences the following sparse warning: fs/reiserfs/xattr.c:899:28: warning: symbol 'reiserfs_xattr_handlers' was not declared. Should it be static? Signed-off-by: Sachin Kamat Signed-off-by: Jan Kara --- fs/reiserfs/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index d319963aeb11..c196369fe408 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -896,7 +896,7 @@ static int create_privroot(struct dentry *dentry) { return 0; } #endif /* Actual operations that are exported to VFS-land */ -const struct xattr_handler *reiserfs_xattr_handlers[] = { +static const struct xattr_handler *reiserfs_xattr_handlers[] = { #ifdef CONFIG_REISERFS_FS_XATTR &reiserfs_xattr_user_handler, &reiserfs_xattr_trusted_handler, -- cgit v1.2.3 From e4230108547766f4e105a62c45f3724309106f91 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 27 Aug 2012 14:30:40 -0500 Subject: ext3: don't clear orphan list on ro mount with errors When we have a filesystem with an orphan inode list *and* in error state, things behave differently if: 1) e2fsck -p is done prior to mount: e2fsck fixes things and exits happily (barring other significant problems) vs. 2) mount is done first, then e2fsck -p: due to the orphan inode list removal, more errors are found and e2fsck exits with UNEXPECTED INCONSISTENCY. The 2nd case above, on the root filesystem, has the tendency to halt the boot process, which is unfortunate. The situation can be improved by not clearing the orphan inode list when the fs is mounted readonly. Signed-off-by: Eric Sandeen Signed-off-by: Jan Kara --- fs/ext3/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 8c892e93d8e7..d90ad0574705 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1479,10 +1479,12 @@ static void ext3_orphan_cleanup (struct super_block * sb, } if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { - if (es->s_last_orphan) + /* don't clear list on RO mount w/ errors */ + if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { jbd_debug(1, "Errors on filesystem, " "clearing orphan list.\n"); - es->s_last_orphan = 0; + es->s_last_orphan = 0; + } jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); return; } -- cgit v1.2.3 From 378b8e1ad18e7c97832aa3771e295153c4cd2a55 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 31 Aug 2012 12:49:07 -0400 Subject: udf: add writepages support for udf Use mpage_writepages() instead of multiple calls to udf_writepage() to make performance higher. *Write Speed with writepage() = RecSize ReadSpeed WriteSpeed RanReadSpeed RanWriteSpeed 10485760 0.00MB/sec 8.56MB/sec 0.00MB/sec 8.20MB/sec 1048576 0.00MB/sec 8.57MB/sec 0.00MB/sec 6.42MB/sec 524288 0.00MB/sec 8.59MB/sec 0.00MB/sec 5.24MB/sec 262144 0.00MB/sec 8.59MB/sec 0.00MB/sec 4.17MB/sec 131072 0.00MB/sec 8.53MB/sec 0.00MB/sec 3.32MB/sec 65536 0.00MB/sec 8.49MB/sec 0.00MB/sec 2.31MB/sec *Write Speed with writepages() RecSize ReadSpeed WriteSpeed RanReadSpeed RanWriteSpeed 10485760 0.00MB/sec 9.88MB/sec 0.00MB/sec 9.60MB/sec 1048576 0.00MB/sec 9.95MB/sec 0.00MB/sec 7.52MB/sec 524288 0.00MB/sec 9.98MB/sec 0.00MB/sec 6.16MB/sec 262144 0.00MB/sec 9.90MB/sec 0.00MB/sec 4.98MB/sec 131072 0.00MB/sec 9.89MB/sec 0.00MB/sec 3.78MB/sec 65536 0.00MB/sec 9.81MB/sec 0.00MB/sec 2.50MB/sec There is about 1.4MB/sec speed improvement over 8.5MB/sec, which comes out around 16% improvement. Signed-off-by: Namjae Jeon Signed-off-by: Ashish Sangwan Signed-off-by: Jan Kara --- fs/udf/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/udf/inode.c b/fs/udf/inode.c index aa233469b3c1..1a0588e77a2f 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -100,6 +100,12 @@ static int udf_writepage(struct page *page, struct writeback_control *wbc) return block_write_full_page(page, udf_get_block, wbc); } +static int udf_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + return mpage_writepages(mapping, wbc, udf_get_block); +} + static int udf_readpage(struct file *file, struct page *page) { return mpage_readpage(page, udf_get_block); @@ -145,6 +151,7 @@ const struct address_space_operations udf_aops = { .readpage = udf_readpage, .readpages = udf_readpages, .writepage = udf_writepage, + .writepages = udf_writepages, .write_begin = udf_write_begin, .write_end = generic_write_end, .bmap = udf_bmap, -- cgit v1.2.3 From 76f59b3bf5af41c2fb60f05f5d5ec51e8138e996 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 4 Sep 2012 13:42:00 +0530 Subject: ext3: Replace 0 with NULL for pointer in super.c file Fixes the following sparse warning: fs/ext3/super.c:983:45: warning: Using plain integer as NULL pointer Signed-off-by: Sachin Kamat Signed-off-by: Jan Kara --- fs/ext3/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index d90ad0574705..504fb3f51420 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -975,7 +975,7 @@ static int parse_options (char *options, struct super_block *sb, * Initialize args struct so we know whether arg was * found; some options take optional arguments. */ - args[0].to = args[0].from = 0; + args[0].to = args[0].from = NULL; token = match_token(p, tokens, args); switch (token) { case Opt_bsd_df: -- cgit v1.2.3 From c9e67d483776d8d2a5f3f70491161b205930ffe1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 4 Sep 2012 18:45:54 +0200 Subject: fuse: fix retrieve length In some cases fuse_retrieve() would return a short byte count if offset was non-zero. The data returned was correct, though. Signed-off-by: Miklos Szeredi Cc: stable@vger.kernel.org --- fs/fuse/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 7df2b5e8fbe1..f4246cfc8d87 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1576,6 +1576,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->pages[req->num_pages] = page; req->num_pages++; + offset = 0; num -= this_num; total_len += this_num; index++; -- cgit v1.2.3 From c3f52af3e03013db5237e339c817beaae5ec9e3a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2012 14:56:02 -0400 Subject: NFS: Fix the initialisation of the readdir 'cookieverf' array When the NFS_COOKIEVERF helper macro was converted into a static inline function in commit 99fadcd764 (nfs: convert NFS_*(inode) helpers to static inline), we broke the initialisation of the readdir cookies, since that depended on doing a memset with an argument of 'sizeof(NFS_COOKIEVERF(inode))' which therefore changed from sizeof(be32 cookieverf[2]) to sizeof(be32 *). At this point, NFS_COOKIEVERF seems to be more of an obfuscation than a helper, so the best thing would be to just get rid of it. Also see: https://bugzilla.kernel.org/show_bug.cgi?id=46881 Reported-by: Andi Kleen Reported-by: David Binderman Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/inode.c | 2 +- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4proc.c | 4 ++-- include/linux/nfs_fs.h | 5 ----- 4 files changed, 4 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c6e895f0fbf3..9b47610338f5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -154,7 +154,7 @@ static void nfs_zap_caches_locked(struct inode *inode) nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d6b3b5f2d779..69322096c325 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -643,7 +643,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, u64 cookie, struct page **pages, unsigned int count, int plus) { struct inode *dir = dentry->d_inode; - __be32 *verf = NFS_COOKIEVERF(dir); + __be32 *verf = NFS_I(dir)->cookieverf; struct nfs3_readdirargs arg = { .fh = NFS_FH(dir), .cookie = cookie, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 635274140b18..86b4c7361036 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3215,11 +3215,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long long)cookie); - nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); + nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); if (status >= 0) { - memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); + memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); status += args.pgbase; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1f8fc7f9bcd8..4b03f56e280e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -265,11 +265,6 @@ static inline const struct nfs_rpc_ops *NFS_PROTO(const struct inode *inode) return NFS_SERVER(inode)->nfs_client->rpc_ops; } -static inline __be32 *NFS_COOKIEVERF(const struct inode *inode) -{ - return NFS_I(inode)->cookieverf; -} - static inline unsigned NFS_MINATTRTIMEO(const struct inode *inode) { struct nfs_server *nfss = NFS_SERVER(inode); -- cgit v1.2.3 From 872ece86ea5c367aa92f44689c2d01a1c767aeb3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 4 Sep 2012 11:05:07 -0400 Subject: NFS: Fix a problem with the legacy binary mount code Apparently, am-utils is still using the legacy binary mountdata interface, and is having trouble parsing /proc/mounts due to the 'port=' field being incorrectly set. The following patch should fix up the regression. Reported-by: Marius Tolzmann Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 239aff7338eb..b8eda700584b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1867,6 +1867,7 @@ static int nfs23_validate_mount_data(void *options, memcpy(sap, &data->addr, sizeof(data->addr)); args->nfs_server.addrlen = sizeof(data->addr); + args->nfs_server.port = ntohs(data->addr.sin_port); if (!nfs_verify_server_address(sap)) goto out_no_address; @@ -2564,6 +2565,7 @@ static int nfs4_validate_mount_data(void *options, return -EFAULT; if (!nfs_verify_server_address(sap)) goto out_no_address; + args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); if (data->auth_flavourlen) { if (data->auth_flavourlen > 1) -- cgit v1.2.3 From 21f498c2f73bd6150d82931f09965826dca0b5f2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Aug 2012 10:59:25 -0400 Subject: NFSv4: Fix range checking in __nfs4_get_acl_uncached and __nfs4_proc_set_acl Ensure that the user supplied buffer size doesn't cause us to overflow the 'pages' array. Also fix up some confusion between the use of PAGE_SIZE and PAGE_CACHE_SIZE when calculating buffer sizes. We're not using the page cache for anything here. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 86b4c7361036..6b94f2d52532 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3653,11 +3653,11 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); } -/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that - * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on +/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that + * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_SIZE) bytes on * the stack. */ -#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) +#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) static int buf_to_pages_noslab(const void *buf, size_t buflen, struct page **pages, unsigned int *pgbase) @@ -3668,7 +3668,7 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen, spages = pages; do { - len = min_t(size_t, PAGE_CACHE_SIZE, buflen); + len = min_t(size_t, PAGE_SIZE, buflen); newpage = alloc_page(GFP_KERNEL); if (newpage == NULL) @@ -3782,17 +3782,16 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - int ret = -ENOMEM, npages, i; + unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); + int ret = -ENOMEM, i; size_t acl_len = 0; - npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; /* As long as we're doing a round trip to the server anyway, * let's be prepared for a page of acl data. */ if (npages == 0) npages = 1; - - /* Add an extra page to handle the bitmap returned */ - npages++; + if (npages > ARRAY_SIZE(pages)) + return -ERANGE; for (i = 0; i < npages; i++) { pages[i] = alloc_page(GFP_KERNEL); @@ -3891,10 +3890,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl .rpc_argp = &arg, .rpc_resp = &res, }; + unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); int ret, i; if (!nfs4_server_supports_acls(server)) return -EOPNOTSUPP; + if (npages > ARRAY_SIZE(pages)) + return -ERANGE; i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); if (i < 0) return i; -- cgit v1.2.3 From 6f1cbd4a25c58323b57f1374e827c363b44683cb Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Tue, 4 Sep 2012 07:23:35 -0400 Subject: sysfs: Fix comment typo "sysf_create_link". More pedantry. Signed-off-by: Robert P. J. Day Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/symlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index a7ac78f8e67a..3c9eb5624f5e 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -113,7 +113,7 @@ int sysfs_create_link(struct kobject *kobj, struct kobject *target, * @target: object we're pointing to. * @name: name of the symlink. * - * This function does the same as sysf_create_link(), but it + * This function does the same as sysfs_create_link(), but it * doesn't warn if the link already exists. */ int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target, -- cgit v1.2.3 From 03c1c29053f678234dbd51bf3d65f3b7529021de Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 5 Sep 2012 01:21:50 -0400 Subject: ext4: ignore last group w/o enough space when resizing instead of BUG'ing If the last group does not have enough space for group tables, ignore it instead of calling BUG_ON(). Reported-by: Daniel Drake Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/resize.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 41f6ef68e2e1..28031c4e15e9 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -200,8 +200,11 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) * be a partial of a flex group. * * @sb: super block of fs to which the groups belongs + * + * Returns 0 on a successful allocation of the metadata blocks in the + * block group. */ -static void ext4_alloc_group_tables(struct super_block *sb, +static int ext4_alloc_group_tables(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, int flexbg_size) { @@ -226,6 +229,8 @@ static void ext4_alloc_group_tables(struct super_block *sb, (last_group & ~(flexbg_size - 1)))); next_group: group = group_data[0].group; + if (src_group >= group_data[0].group + flex_gd->count) + return -ENOSPC; start_blk = ext4_group_first_block_no(sb, src_group); last_blk = start_blk + group_data[src_group - group].blocks_count; @@ -235,7 +240,6 @@ next_group: start_blk += overhead; - BUG_ON(src_group >= group_data[0].group + flex_gd->count); /* We collect contiguous blocks as much as possible. */ src_group++; for (; src_group <= last_group; src_group++) @@ -300,6 +304,7 @@ next_group: group_data[i].free_blocks_count); } } + return 0; } static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, @@ -1729,7 +1734,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) */ while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, flexbg_size)) { - ext4_alloc_group_tables(sb, flex_gd, flexbg_size); + if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0) + break; err = ext4_flex_group_add(sb, resize_inode, flex_gd); if (unlikely(err)) break; -- cgit v1.2.3 From d7574ad08bac1ef89cb679d2c76c91ff9281c2e2 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 5 Sep 2012 01:23:50 -0400 Subject: ext4: report the original old blocks count in a debug message when resizing Avoid changing o_blocks_count, since it is used later when reporting old blocks count in debug mode. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 28031c4e15e9..591f4bda1146 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1719,8 +1719,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) es->s_log_groups_per_flex) flexbg_size = 1 << es->s_log_groups_per_flex; - o_blocks_count = ext4_blocks_count(es); - if (o_blocks_count == n_blocks_count) + if (ext4_blocks_count(es) == n_blocks_count) goto out; flex_gd = alloc_flex_gd(flexbg_size); -- cgit v1.2.3 From 6df935ad2fced9033ab52078825fcaf6365f34b7 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 5 Sep 2012 01:25:50 -0400 Subject: ext4: don't copy non-existent gdt blocks when resizing The resize code was copying blocks at the beginning of each block group in order to copy the superblock and block group descriptor table (gdt) blocks. This was, unfortunately, being done even for block groups that did not have super blocks or gdt blocks. This is a complete waste of perfectly good I/O bandwidth, to skip writing those blocks for sparse bg's. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/resize.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 591f4bda1146..a0ee26c23dd8 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -456,6 +456,9 @@ static int setup_new_flex_group_blocks(struct super_block *sb, gdblocks = ext4_bg_num_gdb(sb, group); start = ext4_group_first_block_no(sb, group); + if (!ext4_bg_has_super(sb, group)) + goto handle_itb; + /* Copy all of the GDT blocks into the backup in this group */ for (j = 0, block = start + 1; j < gdblocks; j++, block++) { struct buffer_head *gdb; @@ -498,6 +501,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb, goto out; } +handle_itb: /* Initialize group tables of the grop @group */ if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) goto handle_bb; -- cgit v1.2.3 From 2ebd1704ded88a8ae29b5f3998b13959c715c4be Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 5 Sep 2012 01:27:50 -0400 Subject: ext4: avoid duplicate writes of the backup bg descriptor blocks The resize code was needlessly writing the backup block group descriptor blocks multiple times (once per block group) during an online resize. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/resize.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index a0ee26c23dd8..365d800ff8c1 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1358,13 +1358,15 @@ exit_journal: err = err2; if (!err) { - int i; + int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); + int gdb_num_end = ((group + flex_gd->count - 1) / + EXT4_DESC_PER_BLOCK(sb)); + update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, sizeof(struct ext4_super_block)); - for (i = 0; i < flex_gd->count; i++, group++) { + for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; - int gdb_num; - gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb); + gdb_bh = sbi->s_group_desc[gdb_num]; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, gdb_bh->b_size); -- cgit v1.2.3 From 117fff10d7f140e12dd43df20d3f9fda80577460 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 5 Sep 2012 01:29:50 -0400 Subject: ext4: grow the s_flex_groups array as needed when resizing Previously, we allocated the s_flex_groups array to the maximum size that the file system could be resized. There was two problems with this approach. First, it wasted memory in the common case where the file system was not resized. Secondly, once we start allowing online resizing using the meta_bg scheme, there is no maximum size that the file system can be resized. So instead, we need to grow the s_flex_groups at inline resize time. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +++ fs/ext4/resize.c | 14 +++++++++----- fs/ext4/super.c | 48 +++++++++++++++++++++++++++++++++++------------- 3 files changed, 47 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0df5ee102b61..464cff711ed6 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1276,6 +1276,7 @@ struct ext4_sb_info { unsigned int s_log_groups_per_flex; struct flex_groups *s_flex_groups; + ext4_group_t s_flex_groups_allocated; /* workqueue for dio unwritten */ struct workqueue_struct *dio_unwritten_wq; @@ -2055,6 +2056,8 @@ extern void ext4_superblock_csum_set(struct super_block *sb, extern void *ext4_kvmalloc(size_t size, gfp_t flags); extern void *ext4_kvzalloc(size_t size, gfp_t flags); extern void ext4_kvfree(void *ptr); +extern int ext4_alloc_flex_bg_array(struct super_block *sb, + ext4_group_t ngroup); extern __printf(4, 5) void __ext4_error(struct super_block *, const char *, unsigned int, const char *, ...); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 365d800ff8c1..3f5c67bf13a2 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1503,6 +1503,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (err) goto out; + err = ext4_alloc_flex_bg_array(sb, input->group + 1); + if (err) + return err; + flex_gd.count = 1; flex_gd.groups = input; flex_gd.bg_flags = &bg_flags; @@ -1662,7 +1666,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) unsigned long n_desc_blocks; unsigned long o_desc_blocks; unsigned long desc_blocks; - int err = 0, flexbg_size = 1; + int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; o_blocks_count = ext4_blocks_count(es); @@ -1721,13 +1725,13 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) goto out; } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && - es->s_log_groups_per_flex) - flexbg_size = 1 << es->s_log_groups_per_flex; - if (ext4_blocks_count(es) == n_blocks_count) goto out; + err = ext4_alloc_flex_bg_array(sb, n_group + 1); + if (err) + return err; + flex_gd = alloc_flex_gd(flexbg_size); if (flex_gd == NULL) { err = -ENOMEM; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b875ff555865..b8de488889d6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1925,15 +1925,45 @@ done: return res; } +int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct flex_groups *new_groups; + int size; + + if (!sbi->s_log_groups_per_flex) + return 0; + + size = ext4_flex_group(sbi, ngroup - 1) + 1; + if (size <= sbi->s_flex_groups_allocated) + return 0; + + size = roundup_pow_of_two(size * sizeof(struct flex_groups)); + new_groups = ext4_kvzalloc(size, GFP_KERNEL); + if (!new_groups) { + ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", + size / (int) sizeof(struct flex_groups)); + return -ENOMEM; + } + + if (sbi->s_flex_groups) { + memcpy(new_groups, sbi->s_flex_groups, + (sbi->s_flex_groups_allocated * + sizeof(struct flex_groups))); + ext4_kvfree(sbi->s_flex_groups); + } + sbi->s_flex_groups = new_groups; + sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); + return 0; +} + static int ext4_fill_flex_info(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = NULL; - ext4_group_t flex_group_count; ext4_group_t flex_group; unsigned int groups_per_flex = 0; - size_t size; - int i; + int i, err; sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { @@ -1942,17 +1972,9 @@ static int ext4_fill_flex_info(struct super_block *sb) } groups_per_flex = 1 << sbi->s_log_groups_per_flex; - /* We allocate both existing and potentially added groups */ - flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + - ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << - EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; - size = flex_group_count * sizeof(struct flex_groups); - sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL); - if (sbi->s_flex_groups == NULL) { - ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups", - flex_group_count); + err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); + if (err) goto failed; - } for (i = 0; i < sbi->s_groups_count; i++) { gdp = ext4_get_group_desc(sb, i, NULL); -- cgit v1.2.3 From 28623c2f5b0dca3c3ea34fd6108940661352e276 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 5 Sep 2012 01:31:50 -0400 Subject: ext4: grow the s_group_info array as needed Previously we allocated the s_group_info array with enough space for any future possible growth of the file system via online resize. This is unfortunate because it wastes memory, and it doesn't work for the meta_bg scheme, since there is no limit based on the number of reserved gdt blocks. So add the code to grow the s_group_info array as needed. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +++ fs/ext4/mballoc.c | 79 +++++++++++++++++++++++++++---------------------------- fs/ext4/resize.c | 8 ++++++ 3 files changed, 50 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 464cff711ed6..8b6902c4d7be 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1233,6 +1233,7 @@ struct ext4_sb_info { spinlock_t s_md_lock; unsigned short *s_mb_offsets; unsigned int *s_mb_maxs; + unsigned int s_group_info_size; /* tunables */ unsigned long s_stripe; @@ -1971,6 +1972,8 @@ extern void ext4_exit_mballoc(void); extern void ext4_free_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t block, unsigned long count, int flags); +extern int ext4_mb_alloc_groupinfo(struct super_block *sb, + ext4_group_t ngroups); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6873571c9f44..2102c20f7e98 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -24,6 +24,7 @@ #include "ext4_jbd2.h" #include "mballoc.h" #include +#include #include #include @@ -2163,6 +2164,39 @@ static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) return cachep; } +/* + * Allocate the top-level s_group_info array for the specified number + * of groups + */ +int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + unsigned size; + struct ext4_group_info ***new_groupinfo; + + size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> + EXT4_DESC_PER_BLOCK_BITS(sb); + if (size <= sbi->s_group_info_size) + return 0; + + size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size); + new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL); + if (!new_groupinfo) { + ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); + return -ENOMEM; + } + if (sbi->s_group_info) { + memcpy(new_groupinfo, sbi->s_group_info, + sbi->s_group_info_size * sizeof(*sbi->s_group_info)); + ext4_kvfree(sbi->s_group_info); + } + sbi->s_group_info = new_groupinfo; + sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); + ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", + sbi->s_group_info_size); + return 0; +} + /* Create and initialize ext4_group_info data for the given group. */ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, struct ext4_group_desc *desc) @@ -2252,49 +2286,14 @@ static int ext4_mb_init_backend(struct super_block *sb) ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; - int num_meta_group_infos; - int num_meta_group_infos_max; - int array_size; + int err; struct ext4_group_desc *desc; struct kmem_cache *cachep; - /* This is the number of blocks used by GDT */ - num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); - - /* - * This is the total number of blocks used by GDT including - * the number of reserved blocks for GDT. - * The s_group_info array is allocated with this value - * to allow a clean online resize without a complex - * manipulation of pointer. - * The drawback is the unused memory when no resize - * occurs but it's very low in terms of pages - * (see comments below) - * Need to handle this properly when META_BG resizing is allowed - */ - num_meta_group_infos_max = num_meta_group_infos + - le16_to_cpu(es->s_reserved_gdt_blocks); + err = ext4_mb_alloc_groupinfo(sb, ngroups); + if (err) + return err; - /* - * array_size is the size of s_group_info array. We round it - * to the next power of two because this approximation is done - * internally by kmalloc so we can have some more memory - * for free here (e.g. may be used for META_BG resize). - */ - array_size = 1; - while (array_size < sizeof(*sbi->s_group_info) * - num_meta_group_infos_max) - array_size = array_size << 1; - /* An 8TB filesystem with 64-bit pointers requires a 4096 byte - * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. - * So a two level scheme suffices for now. */ - sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL); - if (sbi->s_group_info == NULL) { - ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); - return -ENOMEM; - } sbi->s_buddy_cache = new_inode(sb); if (sbi->s_buddy_cache == NULL) { ext4_msg(sb, KERN_ERR, "can't get new inode"); @@ -2322,7 +2321,7 @@ err_freebuddy: cachep = get_groupinfo_cache(sb->s_blocksize_bits); while (i-- > 0) kmem_cache_free(cachep, ext4_get_group_info(sb, i)); - i = num_meta_group_infos; + i = sbi->s_group_info_size; while (i-- > 0) kfree(sbi->s_group_info[i]); iput(sbi->s_buddy_cache); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3f5c67bf13a2..f288933bf4c0 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1507,6 +1507,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (err) return err; + err = ext4_mb_alloc_groupinfo(sb, input->group + 1); + if (err) + goto out; + flex_gd.count = 1; flex_gd.groups = input; flex_gd.bg_flags = &bg_flags; @@ -1732,6 +1736,10 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) if (err) return err; + err = ext4_mb_alloc_groupinfo(sb, n_group + 1); + if (err) + goto out; + flex_gd = alloc_flex_gd(flexbg_size); if (flex_gd == NULL) { err = -ENOMEM; -- cgit v1.2.3 From 01f795f9e0d67adeccc61a8b20c28acb45fa5fd8 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 5 Sep 2012 01:33:50 -0400 Subject: ext4: add online resizing support for meta_bg and 64-bit file systems This patch adds support for resizing file systems with the meta_bg and 64bit features. [ Added a fix by tytso to fix a divide by zero when resizing a filesystem from 14 TB to 18TB. Also fixed overhead accounting for meta_bg file systems.] Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ioctl.c | 15 ---- fs/ext4/resize.c | 215 ++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 165 insertions(+), 65 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7f7dad787603..8b84fe28ccaf 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -365,26 +365,11 @@ group_add_out: return -EOPNOTSUPP; } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_META_BG)) { - ext4_msg(sb, KERN_ERR, - "Online resizing not (yet) supported with meta_bg"); - return -EOPNOTSUPP; - } - if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, sizeof(__u64))) { return -EFAULT; } - if (n_blocks_count > MAX_32_NUM && - !EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_64BIT)) { - ext4_msg(sb, KERN_ERR, - "File system only supports 32-bit block numbers"); - return -EOPNOTSUPP; - } - err = ext4_resize_begin(sb); if (err) return err; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f288933bf4c0..7adc08854588 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -45,6 +45,28 @@ void ext4_resize_end(struct super_block *sb) smp_mb__after_clear_bit(); } +static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb, + ext4_group_t group) { + return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) << + EXT4_DESC_PER_BLOCK_BITS(sb); +} + +static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb, + ext4_group_t group) { + group = ext4_meta_bg_first_group(sb, group); + return ext4_group_first_block_no(sb, group); +} + +static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb, + ext4_group_t group) { + ext4_grpblk_t overhead; + overhead = ext4_bg_num_gdb(sb, group); + if (ext4_bg_has_super(sb, group)) + overhead += 1 + + le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); + return overhead; +} + #define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last)) @@ -57,9 +79,7 @@ static int verify_group_input(struct super_block *sb, ext4_fsblk_t end = start + input->blocks_count; ext4_group_t group = input->group; ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; - unsigned overhead = ext4_bg_has_super(sb, group) ? - (1 + ext4_bg_num_gdb(sb, group) + - le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; + unsigned overhead = ext4_group_overhead_blocks(sb, group); ext4_fsblk_t metaend = start + overhead; struct buffer_head *bh = NULL; ext4_grpblk_t free_blocks_count, offset; @@ -209,7 +229,6 @@ static int ext4_alloc_group_tables(struct super_block *sb, int flexbg_size) { struct ext4_new_group_data *group_data = flex_gd->groups; - struct ext4_super_block *es = EXT4_SB(sb)->s_es; ext4_fsblk_t start_blk; ext4_fsblk_t last_blk; ext4_group_t src_group; @@ -234,19 +253,19 @@ next_group: start_blk = ext4_group_first_block_no(sb, src_group); last_blk = start_blk + group_data[src_group - group].blocks_count; - overhead = ext4_bg_has_super(sb, src_group) ? - (1 + ext4_bg_num_gdb(sb, src_group) + - le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; + overhead = ext4_group_overhead_blocks(sb, src_group); start_blk += overhead; /* We collect contiguous blocks as much as possible. */ src_group++; - for (; src_group <= last_group; src_group++) - if (!ext4_bg_has_super(sb, src_group)) + for (; src_group <= last_group; src_group++) { + overhead = ext4_group_overhead_blocks(sb, src_group); + if (overhead != 0) last_blk += group_data[src_group - group].blocks_count; else break; + } /* Allocate block bitmaps */ for (; bb_index < flex_gd->count; bb_index++) { @@ -438,11 +457,13 @@ static int setup_new_flex_group_blocks(struct super_block *sb, ext4_group_t group, count; struct buffer_head *bh = NULL; int reserved_gdb, i, j, err = 0, err2; + int meta_bg; BUG_ON(!flex_gd->count || !group_data || group_data[0].group != sbi->s_groups_count); reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); + meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); /* This transaction may be extended/restarted along the way */ handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); @@ -452,15 +473,25 @@ static int setup_new_flex_group_blocks(struct super_block *sb, group = group_data[0].group; for (i = 0; i < flex_gd->count; i++, group++) { unsigned long gdblocks; + ext4_grpblk_t overhead; gdblocks = ext4_bg_num_gdb(sb, group); start = ext4_group_first_block_no(sb, group); - if (!ext4_bg_has_super(sb, group)) + if (meta_bg == 0 && !ext4_bg_has_super(sb, group)) goto handle_itb; + if (meta_bg == 1) { + ext4_group_t first_group; + first_group = ext4_meta_bg_first_group(sb, group); + if (first_group != group + 1 && + first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1) + goto handle_itb; + } + + block = start + ext4_bg_has_super(sb, group); /* Copy all of the GDT blocks into the backup in this group */ - for (j = 0, block = start + 1; j < gdblocks; j++, block++) { + for (j = 0; j < gdblocks; j++, block++) { struct buffer_head *gdb; ext4_debug("update backup group %#04llx\n", block); @@ -530,11 +561,11 @@ handle_bb: err = PTR_ERR(bh); goto out; } - if (ext4_bg_has_super(sb, group)) { + overhead = ext4_group_overhead_blocks(sb, group); + if (overhead != 0) { ext4_debug("mark backup superblock %#04llx (+0)\n", start); - ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + - 1); + ext4_set_bits(bh->b_data, 0, overhead); } ext4_mark_bitmap_end(group_data[i].blocks_count, sb->s_blocksize * 8, bh->b_data); @@ -830,6 +861,45 @@ exit_bh: return err; } +/* + * add_new_gdb_meta_bg is the sister of add_new_gdb. + */ +static int add_new_gdb_meta_bg(struct super_block *sb, + handle_t *handle, ext4_group_t group) { + ext4_fsblk_t gdblock; + struct buffer_head *gdb_bh; + struct buffer_head **o_group_desc, **n_group_desc; + unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); + int err; + + gdblock = ext4_meta_bg_first_block_no(sb, group) + + ext4_bg_has_super(sb, group); + gdb_bh = sb_bread(sb, gdblock); + if (!gdb_bh) + return -EIO; + n_group_desc = ext4_kvmalloc((gdb_num + 1) * + sizeof(struct buffer_head *), + GFP_NOFS); + if (!n_group_desc) { + err = -ENOMEM; + ext4_warning(sb, "not enough memory for %lu groups", + gdb_num + 1); + return err; + } + + o_group_desc = EXT4_SB(sb)->s_group_desc; + memcpy(n_group_desc, o_group_desc, + EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + n_group_desc[gdb_num] = gdb_bh; + EXT4_SB(sb)->s_group_desc = n_group_desc; + EXT4_SB(sb)->s_gdb_count++; + ext4_kvfree(o_group_desc); + err = ext4_journal_get_write_access(handle, gdb_bh); + if (unlikely(err)) + brelse(gdb_bh); + return err; +} + /* * Called when we are adding a new group which has a backup copy of each of * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. @@ -958,16 +1028,16 @@ exit_free: * do not copy the full number of backups at this time. The resize * which changed s_groups_count will backup again. */ -static void update_backups(struct super_block *sb, - int blk_off, char *data, int size) +static void update_backups(struct super_block *sb, int blk_off, char *data, + int size, int meta_bg) { struct ext4_sb_info *sbi = EXT4_SB(sb); - const ext4_group_t last = sbi->s_groups_count; + ext4_group_t last; const int bpg = EXT4_BLOCKS_PER_GROUP(sb); unsigned three = 1; unsigned five = 5; unsigned seven = 7; - ext4_group_t group; + ext4_group_t group = 0; int rest = sb->s_blocksize - size; handle_t *handle; int err = 0, err2; @@ -981,8 +1051,17 @@ static void update_backups(struct super_block *sb, ext4_superblock_csum_set(sb, (struct ext4_super_block *)data); - while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { + if (meta_bg == 0) { + group = ext4_list_backups(sb, &three, &five, &seven); + last = sbi->s_groups_count; + } else { + group = ext4_meta_bg_first_group(sb, group) + 1; + last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2); + } + + while (group < sbi->s_groups_count) { struct buffer_head *bh; + ext4_fsblk_t backup_block; /* Out of journal space, and can't get more - abort - so sad */ if (ext4_handle_valid(handle) && @@ -991,13 +1070,20 @@ static void update_backups(struct super_block *sb, (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) break; - bh = sb_getblk(sb, group * bpg + blk_off); + if (meta_bg == 0) + backup_block = group * bpg + blk_off; + else + backup_block = (ext4_group_first_block_no(sb, group) + + ext4_bg_has_super(sb, group)); + + bh = sb_getblk(sb, backup_block); if (!bh) { err = -EIO; break; } - ext4_debug("update metadata backup %#04lx\n", - (unsigned long)bh->b_blocknr); + ext4_debug("update metadata backup %llu(+%llu)\n", + backup_block, backup_block - + ext4_group_first_block_no(sb, group)); if ((err = ext4_journal_get_write_access(handle, bh))) break; lock_buffer(bh); @@ -1010,6 +1096,13 @@ static void update_backups(struct super_block *sb, if (unlikely(err)) ext4_std_error(sb, err); brelse(bh); + + if (meta_bg == 0) + group = ext4_list_backups(sb, &three, &five, &seven); + else if (group == last) + break; + else + group = last; } if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; @@ -1052,7 +1145,9 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, struct ext4_super_block *es = sbi->s_es; struct buffer_head *gdb_bh; int i, gdb_off, gdb_num, err = 0; + int meta_bg; + meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); for (i = 0; i < count; i++, group++) { int reserved_gdb = ext4_bg_has_super(sb, group) ? le16_to_cpu(es->s_reserved_gdt_blocks) : 0; @@ -1072,8 +1167,11 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) err = reserve_backup_gdb(handle, resize_inode, group); - } else + } else if (meta_bg != 0) { + err = add_new_gdb_meta_bg(sb, handle, group); + } else { err = add_new_gdb(handle, resize_inode, group); + } if (err) break; } @@ -1225,7 +1323,7 @@ static void ext4_update_super(struct super_block *sb, } reserved_blocks = ext4_r_blocks_count(es) * 100; - do_div(reserved_blocks, ext4_blocks_count(es)); + reserved_blocks = div64_u64(reserved_blocks, ext4_blocks_count(es)); reserved_blocks *= blocks_count; do_div(reserved_blocks, 100); @@ -1236,6 +1334,7 @@ static void ext4_update_super(struct super_block *sb, le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * flex_gd->count); + ext4_debug("free blocks count %llu", ext4_free_blocks_count(es)); /* * We need to protect s_groups_count against other CPUs seeing * inconsistent state in the superblock. @@ -1270,6 +1369,8 @@ static void ext4_update_super(struct super_block *sb, percpu_counter_add(&sbi->s_freeinodes_counter, EXT4_INODES_PER_GROUP(sb) * flex_gd->count); + ext4_debug("free blocks count %llu", + percpu_counter_read(&sbi->s_freeclusters_counter)); if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && sbi->s_log_groups_per_flex) { @@ -1361,15 +1462,17 @@ exit_journal: int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int gdb_num_end = ((group + flex_gd->count - 1) / EXT4_DESC_PER_BLOCK(sb)); + int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, + EXT4_FEATURE_INCOMPAT_META_BG); update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext4_super_block)); + sizeof(struct ext4_super_block), 0); for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; gdb_bh = sbi->s_group_desc[gdb_num]; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, - gdb_bh->b_size); + gdb_bh->b_size, meta_bg); } } exit: @@ -1413,9 +1516,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, group_data[i].group = group + i; group_data[i].blocks_count = blocks_per_group; - overhead = ext4_bg_has_super(sb, group + i) ? - (1 + ext4_bg_num_gdb(sb, group + i) + - le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; + overhead = ext4_group_overhead_blocks(sb, group + i); group_data[i].free_blocks_count = blocks_per_group - overhead; if (ext4_has_group_desc_csum(sb)) flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | @@ -1563,11 +1664,13 @@ errout: err = err2; if (!err) { + ext4_fsblk_t first_block; + first_block = ext4_group_first_block_no(sb, 0); if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: extended group to %llu " "blocks\n", ext4_blocks_count(es)); - update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext4_super_block)); + update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block, + (char *)es, sizeof(struct ext4_super_block), 0); } return err; } @@ -1662,15 +1765,16 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct buffer_head *bh; - struct inode *resize_inode; - ext4_fsblk_t o_blocks_count; - ext4_group_t o_group; - ext4_group_t n_group; - ext4_grpblk_t offset, add; + struct inode *resize_inode = NULL; + ext4_grpblk_t add, offset; unsigned long n_desc_blocks; unsigned long o_desc_blocks; unsigned long desc_blocks; + ext4_group_t o_group; + ext4_group_t n_group; + ext4_fsblk_t o_blocks_count; int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; + int meta_bg; o_blocks_count = ext4_blocks_count(es); @@ -1692,22 +1796,33 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / - EXT4_DESC_PER_BLOCK(sb); + EXT4_DESC_PER_BLOCK(sb); o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / - EXT4_DESC_PER_BLOCK(sb); + EXT4_DESC_PER_BLOCK(sb); desc_blocks = n_desc_blocks - o_desc_blocks; - if (desc_blocks && - (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) || - le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) { - ext4_warning(sb, "No reserved GDT blocks, can't resize"); - return -EPERM; - } + meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); - resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); - if (IS_ERR(resize_inode)) { - ext4_warning(sb, "Error opening resize inode"); - return PTR_ERR(resize_inode); + if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) { + if (meta_bg) { + ext4_error(sb, "resize_inode and meta_bg enabled " + "simultaneously"); + return -EINVAL; + } + if (le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks) { + ext4_warning(sb, + "No reserved GDT blocks, can't resize"); + return -EPERM; + } + resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); + if (IS_ERR(resize_inode)) { + ext4_warning(sb, "Error opening resize inode"); + return PTR_ERR(resize_inode); + } + } else if (!meta_bg) { + ext4_warning(sb, "File system features do not permit " + "online resize"); + return -EPERM; } /* See if the device is actually as big as what was requested */ @@ -1761,8 +1876,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) out: if (flex_gd) free_flex_gd(flex_gd); - - iput(resize_inode); + if (resize_inode != NULL) + iput(resize_inode); if (test_opt(sb, DEBUG)) ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu " "upto %llu blocks", o_blocks_count, n_blocks_count); -- cgit v1.2.3 From caaa357dc0582f0d4504c22f1ef2347ad940de1b Mon Sep 17 00:00:00 2001 From: Michael Schutte Date: Tue, 4 Sep 2012 22:54:16 -0700 Subject: Input: Add KD[GS]KBDIACRUC ioctls to the compatible list Allow handling of Unicode compose sequences by 32-bit apps on a 64-bit system. The issue has been reported in and . A formal check of the two affected ioctls in drivers/char/vt_ioctl.c (introduced in 04c71976) and a test using x86 kbd 1.15.1 on a so patched x86_64 kernel both confirm that KD[GS]KBDIACRUC are ioctl32() compatible. Signed-off-by: Michael Schutte Signed-off-by: Dmitry Torokhov --- fs/compat_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index debdfe0fc809..edd4ab67cd1b 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -897,6 +897,8 @@ COMPATIBLE_IOCTL(KDGKBSENT) COMPATIBLE_IOCTL(KDSKBSENT) COMPATIBLE_IOCTL(KDGKBDIACR) COMPATIBLE_IOCTL(KDSKBDIACR) +COMPATIBLE_IOCTL(KDGKBDIACRUC) +COMPATIBLE_IOCTL(KDSKBDIACRUC) COMPATIBLE_IOCTL(KDKBDREP) COMPATIBLE_IOCTL(KDGKBLED) COMPATIBLE_IOCTL(KDGETLED) -- cgit v1.2.3 From ca1868309be96eb905a71174187a190543a1b90e Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Wed, 5 Sep 2012 16:31:29 +0800 Subject: vfs: fix kerneldoc for generic_fh_to_parent() Wrong function name in the kerneldoc description of generic_fh_to_parent(). Signed-off-by: Yanchuan Nian Signed-off-by: Jiri Kosina --- fs/libfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/libfs.c b/fs/libfs.c index a74cb1725ac6..7cc37ca19cd8 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -874,7 +874,7 @@ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, EXPORT_SYMBOL_GPL(generic_fh_to_dentry); /** - * generic_fh_to_dentry - generic helper for the fh_to_parent export operation + * generic_fh_to_parent - generic helper for the fh_to_parent export operation * @sb: filesystem to do the file handle conversion on * @fid: file handle to convert * @fh_len: length of the file handle in bytes -- cgit v1.2.3 From 9c2fc0de1a6e638fe58c354a463f544f42a90a09 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 5 Sep 2012 15:48:23 +0200 Subject: udf: Fix data corruption for files in ICB When a file is stored in ICB (inode), we overwrite part of the file, and the page containing file's data is not in page cache, we end up corrupting file's data by overwriting them with zeros. The problem is we use simple_write_begin() which simply zeroes parts of the page which are not written to. The problem has been introduced by be021ee4 (udf: convert to new aops). Fix the problem by providing a ->write_begin function which makes the page properly uptodate. CC: # >= 2.6.24 Reported-by: Ian Abbott Signed-off-by: Jan Kara --- fs/udf/file.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/udf/file.c b/fs/udf/file.c index 7f3f7ba3df6e..d1c6093fd3d3 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -39,20 +39,24 @@ #include "udf_i.h" #include "udf_sb.h" -static int udf_adinicb_readpage(struct file *file, struct page *page) +static void __udf_adinicb_readpage(struct page *page) { struct inode *inode = page->mapping->host; char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); - BUG_ON(!PageLocked(page)); - kaddr = kmap(page); - memset(kaddr, 0, PAGE_CACHE_SIZE); memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size); + memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size); flush_dcache_page(page); SetPageUptodate(page); kunmap(page); +} + +static int udf_adinicb_readpage(struct file *file, struct page *page) +{ + BUG_ON(!PageLocked(page)); + __udf_adinicb_readpage(page); unlock_page(page); return 0; @@ -77,6 +81,25 @@ static int udf_adinicb_writepage(struct page *page, return 0; } +static int udf_adinicb_write_begin(struct file *file, + struct address_space *mapping, loff_t pos, + unsigned len, unsigned flags, struct page **pagep, + void **fsdata) +{ + struct page *page; + + if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE)) + return -EIO; + page = grab_cache_page_write_begin(mapping, 0, flags); + if (!page) + return -ENOMEM; + *pagep = page; + + if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) + __udf_adinicb_readpage(page); + return 0; +} + static int udf_adinicb_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, @@ -98,8 +121,8 @@ static int udf_adinicb_write_end(struct file *file, const struct address_space_operations udf_adinicb_aops = { .readpage = udf_adinicb_readpage, .writepage = udf_adinicb_writepage, - .write_begin = simple_write_begin, - .write_end = udf_adinicb_write_end, + .write_begin = udf_adinicb_write_begin, + .write_end = udf_adinicb_write_end, }; static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, -- cgit v1.2.3 From 527a1361387cd132a577dc8a6ae676cfc1e8414b Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Thu, 6 Sep 2012 13:33:37 +0800 Subject: btrfs: fix trivial typo for the comment of BTRFS_FREE_INO_OBJECTID It should be storing, not sotring. Signed-off-by: Wang Sheng-Hui Signed-off-by: Jiri Kosina --- fs/btrfs/ctree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fa5c45b39075..11d0e09e1616 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -113,7 +113,7 @@ struct btrfs_ordered_sum; #define BTRFS_FREE_SPACE_OBJECTID -11ULL /* - * The inode number assigned to the special inode for sotring + * The inode number assigned to the special inode for storing * free ino cache */ #define BTRFS_FREE_INO_OBJECTID -12ULL -- cgit v1.2.3 From 5eec54fcde7e065eb3d8a6e70e61d90673ca706b Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 5 Sep 2012 17:44:31 +0100 Subject: UDF: Add support for O_DIRECT Add support for the O_DIRECT flag. There are two cases to deal with: 1. Small files stored in the ICB (inode control block?): just return 0 from the new udf_adinicb_direct_IO() handler to fall back to buffered I/O. 2. Larger files, not stored in the ICB: nothing special here. Just call blockdev_direct_IO() from our new udf_direct_IO() handler and tidy up any blocks instantiated outside i_size on error. This is pretty standard. Factor error handling code out of udf_write_begin() into new function udf_write_failed() so it can also be called by udf_direct_IO(). Also change the whitespace in udf_aops to make it a bit neater. Signed-off-by: Ian Abbott Signed-off-by: Jan Kara --- arch/um/os-Linux/time.c | 2 +- fs/udf/file.c | 9 +++++++++ fs/udf/inode.c | 52 ++++++++++++++++++++++++++++++++++--------------- 3 files changed, 46 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index f60238559af3..0748fe0c8a73 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -114,7 +114,7 @@ static void deliver_alarm(void) skew += this_tick - last_tick; while (skew >= one_tick) { - alarm_handler(SIGVTALRM, NULL); + alarm_handler(SIGVTALRM, NULL, NULL); skew -= one_tick; } diff --git a/fs/udf/file.c b/fs/udf/file.c index d1c6093fd3d3..77b5953eaac8 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -118,11 +118,20 @@ static int udf_adinicb_write_end(struct file *file, return simple_write_end(file, mapping, pos, len, copied, page, fsdata); } +static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + /* Fallback to buffered I/O. */ + return 0; +} + const struct address_space_operations udf_adinicb_aops = { .readpage = udf_adinicb_readpage, .writepage = udf_adinicb_writepage, .write_begin = udf_adinicb_write_begin, .write_end = udf_adinicb_write_end, + .direct_IO = udf_adinicb_direct_IO, }; static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 1a0588e77a2f..41d58309d6a8 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -95,6 +95,22 @@ void udf_evict_inode(struct inode *inode) } } +static void udf_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + struct udf_inode_info *iinfo = UDF_I(inode); + loff_t isize = inode->i_size; + + if (to > isize) { + truncate_pagecache(inode, to, isize); + if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { + down_write(&iinfo->i_data_sem); + udf_truncate_extents(inode); + up_write(&iinfo->i_data_sem); + } + } +} + static int udf_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, udf_get_block, wbc); @@ -124,21 +140,24 @@ static int udf_write_begin(struct file *file, struct address_space *mapping, int ret; ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block); - if (unlikely(ret)) { - struct inode *inode = mapping->host; - struct udf_inode_info *iinfo = UDF_I(inode); - loff_t isize = inode->i_size; - - if (pos + len > isize) { - truncate_pagecache(inode, pos + len, isize); - if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { - down_write(&iinfo->i_data_sem); - udf_truncate_extents(inode); - up_write(&iinfo->i_data_sem); - } - } - } + if (unlikely(ret)) + udf_write_failed(mapping, pos + len); + return ret; +} +static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + ssize_t ret; + + ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + udf_get_block); + if (unlikely(ret < 0 && (rw & WRITE))) + udf_write_failed(mapping, offset + iov_length(iov, nr_segs)); return ret; } @@ -152,8 +171,9 @@ const struct address_space_operations udf_aops = { .readpages = udf_readpages, .writepage = udf_writepage, .writepages = udf_writepages, - .write_begin = udf_write_begin, - .write_end = generic_write_end, + .write_begin = udf_write_begin, + .write_end = generic_write_end, + .direct_IO = udf_direct_IO, .bmap = udf_bmap, }; -- cgit v1.2.3 From 1f1ea6c2d9d8c0be9ec56454b05315273b5de8ce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 26 Aug 2012 11:44:43 -0700 Subject: NFSv4: Fix buffer overflow checking in __nfs4_get_acl_uncached Pass the checks made by decode_getacl back to __nfs4_get_acl_uncached so that it knows if the acl has been truncated. The current overflow checking is broken, resulting in Oopses on user-triggered nfs4_getfacl calls, and is opaque to the point where several attempts at fixing it have failed. This patch tries to clean up the code in addition to fixing the Oopses by ensuring that the overflow checks are performed in a single place (decode_getacl). If the overflow check failed, we will still be able to report the acl length, but at least we will no longer attempt to cache the acl or copy the truncated contents to user space. Reported-by: Sachin Prabhu Signed-off-by: Trond Myklebust Tested-by: Sachin Prabhu --- fs/nfs/nfs4proc.c | 31 ++++++++++++------------------- fs/nfs/nfs4xdr.c | 14 +++++--------- include/linux/nfs_xdr.h | 2 +- 3 files changed, 18 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6b94f2d52532..1e50326d00dd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3739,7 +3739,7 @@ static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size struct nfs4_cached_acl *acl; size_t buflen = sizeof(*acl) + acl_len; - if (pages && buflen <= PAGE_SIZE) { + if (buflen <= PAGE_SIZE) { acl = kmalloc(buflen, GFP_KERNEL); if (acl == NULL) goto out; @@ -3784,7 +3784,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu }; unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); int ret = -ENOMEM, i; - size_t acl_len = 0; /* As long as we're doing a round trip to the server anyway, * let's be prepared for a page of acl data. */ @@ -3807,11 +3806,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu args.acl_len = npages * PAGE_SIZE; args.acl_pgbase = 0; - /* Let decode_getfacl know not to fail if the ACL data is larger than - * the page we send as a guess */ - if (buf == NULL) - res.acl_flags |= NFS4_ACL_LEN_REQUEST; - dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", __func__, buf, buflen, npages, args.acl_len); ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), @@ -3819,20 +3813,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu if (ret) goto out_free; - acl_len = res.acl_len; - if (acl_len > args.acl_len) - nfs4_write_cached_acl(inode, NULL, 0, acl_len); - else - nfs4_write_cached_acl(inode, pages, res.acl_data_offset, - acl_len); - if (buf) { + /* Handle the case where the passed-in buffer is too short */ + if (res.acl_flags & NFS4_ACL_TRUNC) { + /* Did the user only issue a request for the acl length? */ + if (buf == NULL) + goto out_ok; ret = -ERANGE; - if (acl_len > buflen) - goto out_free; - _copy_from_pages(buf, pages, res.acl_data_offset, - acl_len); + goto out_free; } - ret = acl_len; + nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len); + if (buf) + _copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len); +out_ok: + ret = res.acl_len; out_free: for (i = 0; i < npages; i++) if (pages[i]) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1bfbd67c556d..541e796e6db5 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5072,18 +5072,14 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, * are stored with the acl data to handle the problem of * variable length bitmaps.*/ res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset; - - /* We ignore &savep and don't do consistency checks on - * the attr length. Let userspace figure it out.... */ res->acl_len = attrlen; - if (attrlen > (xdr->nwords << 2)) { - if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { - /* getxattr interface called with a NULL buf */ - goto out; - } + + /* Check for receive buffer overflow */ + if (res->acl_len > (xdr->nwords << 2) || + res->acl_len + res->acl_data_offset > xdr->buf->page_len) { + res->acl_flags |= NFS4_ACL_TRUNC; dprintk("NFS: acl reply: attrlen %u > page_len %u\n", attrlen, xdr->nwords << 2); - return -EINVAL; } } else status = -EOPNOTSUPP; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ac7c8ae254f2..be9cf3c7e79e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -652,7 +652,7 @@ struct nfs_getaclargs { }; /* getxattr ACL interface flags */ -#define NFS4_ACL_LEN_REQUEST 0x0001 /* zero length getxattr buffer */ +#define NFS4_ACL_TRUNC 0x0001 /* ACL was truncated */ struct nfs_getaclres { size_t acl_len; size_t acl_data_offset; -- cgit v1.2.3 From e2f2886a824ff0a56da1eaa13019fde86aa89fa6 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 29 Aug 2012 21:13:38 +0400 Subject: CIFS: Fix error handling in cifs_push_mandatory_locks Cc: Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9154192b0683..71e9ad9f5961 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -917,7 +917,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) if (!buf) { mutex_unlock(&cinode->lock_mutex); free_xid(xid); - return rc; + return -ENOMEM; } for (i = 0; i < 2; i++) { -- cgit v1.2.3 From b2ede58e98c87c7f0f44a926f974262f65c3402f Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 4 Sep 2012 15:49:31 +0400 Subject: CIFS: Fix endianness conversion Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2pdu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index c5fbfac5d576..15dc8eea8273 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -96,7 +96,7 @@ * */ -#define SMB2_HEADER_STRUCTURE_SIZE __constant_le16_to_cpu(64) +#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64) struct smb2_hdr { __be32 smb2_buf_length; /* big endian on wire */ @@ -140,7 +140,7 @@ struct smb2_pdu { * */ -#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_le16_to_cpu(9) +#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9) struct smb2_err_rsp { struct smb2_hdr hdr; -- cgit v1.2.3 From 01913b49cf1dc6409a07dd2a4cc6af2e77f3c410 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 6 Sep 2012 15:54:27 -0400 Subject: NFS: return error from decode_getfh in decode open If decode_getfh failed, nfs4_xdr_dec_open would return 0 since the last decode_* call must have succeeded. Cc: stable@vger.kernel.org Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 541e796e6db5..8dba6bd48557 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6225,7 +6225,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_open(xdr, res); if (status) goto out; - if (decode_getfh(xdr, &res->fh) != 0) + status = decode_getfh(xdr, &res->fh); + if (status) goto out; decode_getfattr(xdr, res->f_attr, res->server); out: -- cgit v1.2.3 From 7dc05881b64792e0ea41293e9595cc962a716225 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 3 Apr 2012 14:01:31 -0700 Subject: userns: Convert debugfs to use kuid/kgid where appropriate. Acked-by: Greg Kroah-Hartman Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/debugfs/inode.c | 26 ++++++++++++++++++-------- init/Kconfig | 1 - 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 4733eab34a23..36e2b667e822 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -128,8 +128,8 @@ static inline int debugfs_positive(struct dentry *dentry) } struct debugfs_mount_opts { - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; umode_t mode; }; @@ -156,6 +156,8 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts) substring_t args[MAX_OPT_ARGS]; int option; int token; + kuid_t uid; + kgid_t gid; char *p; opts->mode = DEBUGFS_DEFAULT_MODE; @@ -169,12 +171,18 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts) case Opt_uid: if (match_int(&args[0], &option)) return -EINVAL; - opts->uid = option; + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) + return -EINVAL; + opts->uid = uid; break; case Opt_gid: if (match_octal(&args[0], &option)) return -EINVAL; - opts->gid = option; + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) + return -EINVAL; + opts->gid = gid; break; case Opt_mode: if (match_octal(&args[0], &option)) @@ -226,10 +234,12 @@ static int debugfs_show_options(struct seq_file *m, struct dentry *root) struct debugfs_fs_info *fsi = root->d_sb->s_fs_info; struct debugfs_mount_opts *opts = &fsi->mount_opts; - if (opts->uid != 0) - seq_printf(m, ",uid=%u", opts->uid); - if (opts->gid != 0) - seq_printf(m, ",gid=%u", opts->gid); + if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) + seq_printf(m, ",uid=%u", + from_kuid_munged(&init_user_ns, opts->uid)); + if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) + seq_printf(m, ",gid=%u", + from_kgid_munged(&init_user_ns, opts->gid)); if (opts->mode != DEBUGFS_DEFAULT_MODE) seq_printf(m, ",mode=%o", opts->mode); diff --git a/init/Kconfig b/init/Kconfig index fdabc5160cdf..071dbb4928ef 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -964,7 +964,6 @@ config UIDGID_CONVERTED depends on CODA_FS = n depends on CONFIGFS_FS = n depends on CRAMFS = n - depends on DEBUG_FS = n depends on ECRYPT_FS = n depends on EFS_FS = n depends on EXOFS_FS = n -- cgit v1.2.3 From 65f8c95e46a1827ae8bbc52a817ea308dd7d65ae Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 17 Jul 2012 14:26:15 -0700 Subject: pstore/ftrace: Convert to its own enable/disable debugfs knob With this patch we no longer reuse function tracer infrastructure, now we register our own tracer back-end via a debugfs knob. It's a bit more code, but that is the only downside. On the bright side we have: - Ability to make persistent_ram module removable (when needed, we can move ftrace_ops struct into a module). Note that persistent_ram is still not removable for other reasons, but with this patch it's just one thing less to worry about; - Pstore part is more isolated from the generic function tracer. We tried it already by registering our own tracer in available_tracers, but that way we're loosing ability to see the traces while we record them to pstore. This solution is somewhere in the middle: we only register "internal ftracer" back-end, but not the "front-end"; - When there is only pstore tracing enabled, the kernel will only write to the pstore buffer, omitting function tracer buffer (which, of course, still can be enabled via 'echo function > current_tracer'). Suggested-by: Steven Rostedt Signed-off-by: Anton Vorontsov --- Documentation/ramoops.txt | 4 +- fs/pstore/Kconfig | 1 + fs/pstore/ftrace.c | 96 +++++++++++++++++++++++++++++++++++++++++- fs/pstore/internal.h | 6 +++ fs/pstore/platform.c | 1 + include/linux/pstore.h | 8 ---- kernel/trace/trace_functions.c | 15 +------ 7 files changed, 105 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/Documentation/ramoops.txt b/Documentation/ramoops.txt index 197ad59ab9bf..69b3cac4749d 100644 --- a/Documentation/ramoops.txt +++ b/Documentation/ramoops.txt @@ -102,9 +102,7 @@ related hangs. The functions call chain log is stored in a "ftrace-ramoops" file. Here is an example of usage: # mount -t debugfs debugfs /sys/kernel/debug/ - # cd /sys/kernel/debug/tracing - # echo function > current_tracer - # echo 1 > options/func_pstore + # echo 1 > /sys/kernel/debug/pstore/record_ftrace # reboot -f [...] # mount -t pstore pstore /mnt/ diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index d39bb5cce883..ca71db69da07 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -23,6 +23,7 @@ config PSTORE_FTRACE bool "Persistent function tracer" depends on PSTORE depends on FUNCTION_TRACER + depends on DEBUG_FS help With this option kernel traces function calls into a persistent ram buffer that can be decoded and dumped after reboot through diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index a130d484b7d3..2d57e1ac0115 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -17,19 +17,113 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include #include #include "internal.h" -void notrace pstore_ftrace_call(unsigned long ip, unsigned long parent_ip) +static void notrace pstore_ftrace_call(unsigned long ip, + unsigned long parent_ip) { + unsigned long flags; struct pstore_ftrace_record rec = {}; if (unlikely(oops_in_progress)) return; + local_irq_save(flags); + rec.ip = ip; rec.parent_ip = parent_ip; pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, sizeof(rec), psinfo); + + local_irq_restore(flags); +} + +static struct ftrace_ops pstore_ftrace_ops __read_mostly = { + .func = pstore_ftrace_call, +}; + +static DEFINE_MUTEX(pstore_ftrace_lock); +static bool pstore_ftrace_enabled; + +static ssize_t pstore_ftrace_knob_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + u8 on; + ssize_t ret; + + ret = kstrtou8_from_user(buf, count, 2, &on); + if (ret) + return ret; + + mutex_lock(&pstore_ftrace_lock); + + if (!on ^ pstore_ftrace_enabled) + goto out; + + if (on) + ret = register_ftrace_function(&pstore_ftrace_ops); + else + ret = unregister_ftrace_function(&pstore_ftrace_ops); + if (ret) { + pr_err("%s: unable to %sregister ftrace ops: %zd\n", + __func__, on ? "" : "un", ret); + goto err; + } + + pstore_ftrace_enabled = on; +out: + ret = count; +err: + mutex_unlock(&pstore_ftrace_lock); + + return ret; +} + +static ssize_t pstore_ftrace_knob_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + char val[] = { '0' + pstore_ftrace_enabled, '\n' }; + + return simple_read_from_buffer(buf, count, ppos, val, sizeof(val)); +} + +static const struct file_operations pstore_knob_fops = { + .open = simple_open, + .read = pstore_ftrace_knob_read, + .write = pstore_ftrace_knob_write, +}; + +void pstore_register_ftrace(void) +{ + struct dentry *dir; + struct dentry *file; + + if (!psinfo->write_buf) + return; + + dir = debugfs_create_dir("pstore", NULL); + if (!dir) { + pr_err("%s: unable to create pstore directory\n", __func__); + return; + } + + file = debugfs_create_file("record_ftrace", 0600, dir, NULL, + &pstore_knob_fops); + if (!file) { + pr_err("%s: unable to create record_ftrace file\n", __func__); + goto err_file; + } + + return; +err_file: + debugfs_remove(dir); } diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 0d0d3b7d5f12..4847f588b7d5 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -39,6 +39,12 @@ pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec) #endif } +#ifdef CONFIG_PSTORE_FTRACE +extern void pstore_register_ftrace(void); +#else +static inline void pstore_register_ftrace(void) {} +#endif + extern struct pstore_info *psinfo; extern void pstore_set_kmsg_bytes(int); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 29996e8793a7..6c23eab7f76c 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -236,6 +236,7 @@ int pstore_register(struct pstore_info *psi) kmsg_dump_register(&pstore_dumper); pstore_register_console(); + pstore_register_ftrace(); if (pstore_update_ms >= 0) { pstore_timer.expires = jiffies + diff --git a/include/linux/pstore.h b/include/linux/pstore.h index c892587d9b81..ee3034a40884 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -64,14 +64,6 @@ struct pstore_info { void *data; }; - -#ifdef CONFIG_PSTORE_FTRACE -extern void pstore_ftrace_call(unsigned long ip, unsigned long parent_ip); -#else -static inline void pstore_ftrace_call(unsigned long ip, unsigned long parent_ip) -{ } -#endif - #ifdef CONFIG_PSTORE extern int pstore_register(struct pstore_info *); #else diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index a426f410c060..0ad83e3929d1 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include "trace.h" @@ -75,10 +74,9 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) preempt_enable_notrace(); } -/* Our two options */ +/* Our option */ enum { TRACE_FUNC_OPT_STACK = 0x1, - TRACE_FUNC_OPT_PSTORE = 0x2, }; static struct tracer_flags func_flags; @@ -106,12 +104,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - /* - * So far tracing doesn't support multiple buffers, so - * we make an explicit call for now. - */ - if (unlikely(func_flags.val & TRACE_FUNC_OPT_PSTORE)) - pstore_ftrace_call(ip, parent_ip); pc = preempt_count(); trace_function(tr, ip, parent_ip, flags, pc); } @@ -176,9 +168,6 @@ static struct ftrace_ops trace_stack_ops __read_mostly = static struct tracer_opt func_opts[] = { #ifdef CONFIG_STACKTRACE { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) }, -#endif -#ifdef CONFIG_PSTORE_FTRACE - { TRACER_OPT(func_pstore, TRACE_FUNC_OPT_PSTORE) }, #endif { } /* Always set a last empty entry */ }; @@ -231,8 +220,6 @@ static int func_set_flag(u32 old_flags, u32 bit, int set) register_ftrace_function(&trace_ops); } - break; - case TRACE_FUNC_OPT_PSTORE: break; default: return -EINVAL; -- cgit v1.2.3 From 2ab51f3721f7abdf92d89cb79d3d6c0062ddc14b Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 23 Mar 2011 17:23:06 -0400 Subject: vfs: extend vfs_removexattr locking This patch takes the i_mutex lock before security_inode_removexattr(), instead of after, in preparation of calling ima_inode_removexattr(). Signed-off-by: Mimi Zohar Signed-off-by: Dmitry Kasatkin --- fs/xattr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xattr.c b/fs/xattr.c index 4d45b7189e7e..107f457143c3 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -295,11 +295,13 @@ vfs_removexattr(struct dentry *dentry, const char *name) if (error) return error; + mutex_lock(&inode->i_mutex); error = security_inode_removexattr(dentry, name); - if (error) + if (error) { + mutex_unlock(&inode->i_mutex); return error; + } - mutex_lock(&inode->i_mutex); error = inode->i_op->removexattr(dentry, name); mutex_unlock(&inode->i_mutex); -- cgit v1.2.3 From 4199d35cbc90c15db447d115bd96ffa5f1d60d3a Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 16 Mar 2011 22:48:43 -0400 Subject: vfs: move ima_file_free before releasing the file ima_file_free(), called on __fput(), currently flags files that have changed, so that the file is re-measured. For appraising a files's integrity, the file's hash must be re-calculated and stored in the 'security.ima' xattr to reflect any changes. This patch moves the ima_file_free() call to before releasing the file in preparation of ima-appraisal measuring the file and updating the 'security.ima' xattr. Signed-off-by: Mimi Zohar Acked-by: Serge Hallyn Acked-by: Dmitry Kasatkin --- fs/file_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/file_table.c b/fs/file_table.c index 701985e4ccda..a41f23f90b17 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -243,10 +243,10 @@ static void __fput(struct file *file) if (file->f_op && file->f_op->fasync) file->f_op->fasync(-1, file, 0); } + ima_file_free(file); if (file->f_op && file->f_op->release) file->f_op->release(inode, file); security_file_free(file); - ima_file_free(file); if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(file->f_mode & FMODE_PATH))) { cdev_put(inode->i_cdev); -- cgit v1.2.3 From 9957a5043e7b0b7361cdf48eea22b2900293e63a Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 9 Mar 2011 22:57:53 -0500 Subject: ima: add inode_post_setattr call Changing an inode's metadata may result in our not needing to appraise the file. In such cases, we must remove 'security.ima'. Changelog v1: - use ima_inode_post_setattr() stub function, if IMA_APPRAISE not configured Signed-off-by: Mimi Zohar Acked-by: Serge Hallyn Acked-by: Dmitry Kasatkin --- fs/attr.c | 2 ++ include/linux/ima.h | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'fs') diff --git a/fs/attr.c b/fs/attr.c index 29e38a1f7f77..cce7df53b694 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -14,6 +14,7 @@ #include #include #include +#include /** * inode_change_ok - check if attribute changes to an inode are allowed @@ -247,6 +248,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr) if (!error) { fsnotify_change(dentry, ia_valid); + ima_inode_post_setattr(dentry); evm_inode_post_setattr(dentry, ia_valid); } diff --git a/include/linux/ima.h b/include/linux/ima.h index 6ac8e50c6cf5..e2bfbb1e9af6 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -39,5 +39,15 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot) { return 0; } + #endif /* CONFIG_IMA_H */ + +#ifdef CONFIG_IMA_APPRAISE +extern void ima_inode_post_setattr(struct dentry *dentry); +#else +static inline void ima_inode_post_setattr(struct dentry *dentry) +{ + return; +} +#endif /* CONFIG_IMA_APPRAISE_H */ #endif /* _LINUX_IMA_H */ -- cgit v1.2.3 From 2b75bc9121e54e22537207b47b71373bcb0be41c Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 9 Sep 2012 16:16:58 +0200 Subject: dlm: check the maximum size of a request from user device_write only checks whether the request size is big enough, but it doesn't check if the size is too big. At that point, it also tries to allocate as much memory as the user has requested even if it's too much. This can lead to OOM killer kicking in, or memory corruption if (count + 1) overflows. Signed-off-by: Sasha Levin Signed-off-by: David Teigland --- fs/dlm/user.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/dlm/user.c b/fs/dlm/user.c index eb4ed9ba3098..7ff49852b0cb 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -503,6 +503,13 @@ static ssize_t device_write(struct file *file, const char __user *buf, #endif return -EINVAL; +#ifdef CONFIG_COMPAT + if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN) +#else + if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN) +#endif + return -EINVAL; + kbuf = kzalloc(count + 1, GFP_NOFS); if (!kbuf) return -ENOMEM; -- cgit v1.2.3 From 15e473046cb6e5d18a4d0057e61d76315230382b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 7 Sep 2012 20:12:54 +0000 Subject: netlink: Rename pid to portid to avoid confusion It is a frequent mistake to confuse the netlink port identifier with a process identifier. Try to reduce this confusion by renaming fields that hold port identifiers portid instead of pid. I have carefully avoided changing the structures exported to userspace to avoid changing the userspace API. I have successfully built an allyesconfig kernel with this change. Signed-off-by: "Eric W. Biederman" Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- crypto/crypto_user.c | 4 +- drivers/net/team/team.c | 38 +++--- drivers/net/wireless/mac80211_hwsim.c | 46 ++++---- drivers/scsi/scsi_transport_iscsi.c | 4 +- drivers/staging/gdm72xx/netlink_k.c | 2 +- fs/dlm/netlink.c | 8 +- include/linux/netlink.h | 14 +-- include/net/cfg80211.h | 2 +- include/net/genetlink.h | 34 +++--- include/net/netfilter/nf_conntrack_ecache.h | 32 +++--- include/net/netlink.h | 26 ++--- include/net/nfc/nfc.h | 2 +- include/net/xfrm.h | 6 +- kernel/audit.c | 20 ++-- kernel/taskstats.c | 4 +- net/bridge/br_fdb.c | 6 +- net/bridge/br_netlink.c | 2 +- net/can/gw.c | 2 +- net/core/fib_rules.c | 6 +- net/core/neighbour.c | 8 +- net/core/rtnetlink.c | 12 +- net/dcb/dcbnl.c | 18 +-- net/decnet/dn_dev.c | 6 +- net/decnet/dn_route.c | 10 +- net/decnet/dn_table.c | 12 +- net/ieee802154/nl-mac.c | 6 +- net/ieee802154/nl-phy.c | 6 +- net/ipv4/devinet.c | 28 ++--- net/ipv4/fib_frontend.c | 10 +- net/ipv4/fib_semantics.c | 8 +- net/ipv4/fib_trie.c | 2 +- net/ipv4/inet_diag.c | 32 +++--- net/ipv4/ipmr.c | 10 +- net/ipv4/route.c | 8 +- net/ipv4/tcp_metrics.c | 2 +- net/ipv4/udp_diag.c | 6 +- net/ipv6/addrconf.c | 32 +++--- net/ipv6/addrlabel.c | 10 +- net/ipv6/ip6mr.c | 10 +- net/ipv6/route.c | 20 ++-- net/irda/irnetlink.c | 2 +- net/key/af_key.c | 32 +++--- net/l2tp/l2tp_netlink.c | 24 ++-- net/netfilter/ipset/ip_set_core.c | 24 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 6 +- net/netfilter/nf_conntrack_ecache.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 78 ++++++------- net/netfilter/nfnetlink_acct.c | 12 +- net/netfilter/nfnetlink_cthelper.c | 12 +- net/netfilter/nfnetlink_cttimeout.c | 12 +- net/netfilter/nfnetlink_log.c | 18 +-- net/netfilter/nfnetlink_queue_core.c | 28 ++--- net/netlabel/netlabel_cipso_v4.c | 2 +- net/netlabel/netlabel_mgmt.c | 4 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/netlink/af_netlink.c | 172 ++++++++++++++-------------- net/netlink/genetlink.c | 42 +++---- net/nfc/netlink.c | 26 ++--- net/openvswitch/actions.c | 4 +- net/openvswitch/datapath.c | 84 +++++++------- net/openvswitch/datapath.h | 2 +- net/openvswitch/vport.c | 2 +- net/openvswitch/vport.h | 6 +- net/packet/diag.c | 6 +- net/phonet/pn_netlink.c | 14 +-- net/sched/act_api.c | 52 ++++----- net/sched/cls_api.c | 14 +-- net/sched/sch_api.c | 44 +++---- net/tipc/netlink.c | 2 +- net/unix/diag.c | 14 +-- net/wireless/core.h | 2 +- net/wireless/mlme.c | 16 +-- net/wireless/nl80211.c | 110 +++++++++--------- net/xfrm/xfrm_state.c | 10 +- net/xfrm/xfrm_user.c | 62 +++++----- 75 files changed, 728 insertions(+), 728 deletions(-) (limited to 'fs') diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 165914e63ef2..6bba414d0c61 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -166,7 +166,7 @@ static int crypto_report_alg(struct crypto_alg *alg, struct crypto_user_alg *ualg; int err = 0; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, info->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq, CRYPTO_MSG_GETALG, sizeof(*ualg), info->nlmsg_flags); if (!nlh) { err = -EMSGSIZE; @@ -216,7 +216,7 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, if (err) return err; - return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).pid); + return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid); } static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index b4f67b55ef79..266af7b38ebc 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1886,7 +1886,7 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &team_nl_family, 0, TEAM_CMD_NOOP); if (IS_ERR(hdr)) { err = PTR_ERR(hdr); @@ -1895,7 +1895,7 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) genlmsg_end(msg, hdr); - return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_msg_put: nlmsg_free(msg); @@ -1952,7 +1952,7 @@ static int team_nl_send_generic(struct genl_info *info, struct team *team, if (err < 0) goto err_fill; - err = genlmsg_unicast(genl_info_net(info), skb, info->snd_pid); + err = genlmsg_unicast(genl_info_net(info), skb, info->snd_portid); return err; err_fill: @@ -1961,11 +1961,11 @@ err_fill: } typedef int team_nl_send_func_t(struct sk_buff *skb, - struct team *team, u32 pid); + struct team *team, u32 portid); -static int team_nl_send_unicast(struct sk_buff *skb, struct team *team, u32 pid) +static int team_nl_send_unicast(struct sk_buff *skb, struct team *team, u32 portid) { - return genlmsg_unicast(dev_net(team->dev), skb, pid); + return genlmsg_unicast(dev_net(team->dev), skb, portid); } static int team_nl_fill_one_option_get(struct sk_buff *skb, struct team *team, @@ -2050,13 +2050,13 @@ nest_cancel: } static int __send_and_alloc_skb(struct sk_buff **pskb, - struct team *team, u32 pid, + struct team *team, u32 portid, team_nl_send_func_t *send_func) { int err; if (*pskb) { - err = send_func(*pskb, team, pid); + err = send_func(*pskb, team, portid); if (err) return err; } @@ -2066,7 +2066,7 @@ static int __send_and_alloc_skb(struct sk_buff **pskb, return 0; } -static int team_nl_send_options_get(struct team *team, u32 pid, u32 seq, +static int team_nl_send_options_get(struct team *team, u32 portid, u32 seq, int flags, team_nl_send_func_t *send_func, struct list_head *sel_opt_inst_list) { @@ -2083,11 +2083,11 @@ static int team_nl_send_options_get(struct team *team, u32 pid, u32 seq, struct team_option_inst, tmp_list); start_again: - err = __send_and_alloc_skb(&skb, team, pid, send_func); + err = __send_and_alloc_skb(&skb, team, portid, send_func); if (err) return err; - hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags | NLM_F_MULTI, + hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_OPTIONS_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -2120,15 +2120,15 @@ start_again: goto start_again; send_done: - nlh = nlmsg_put(skb, pid, seq, NLMSG_DONE, 0, flags | NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, NLMSG_DONE, 0, flags | NLM_F_MULTI); if (!nlh) { - err = __send_and_alloc_skb(&skb, team, pid, send_func); + err = __send_and_alloc_skb(&skb, team, portid, send_func); if (err) goto errout; goto send_done; } - return send_func(skb, team, pid); + return send_func(skb, team, portid); nla_put_failure: err = -EMSGSIZE; @@ -2151,7 +2151,7 @@ static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info) list_for_each_entry(opt_inst, &team->option_inst_list, list) list_add_tail(&opt_inst->tmp_list, &sel_opt_inst_list); - err = team_nl_send_options_get(team, info->snd_pid, info->snd_seq, + err = team_nl_send_options_get(team, info->snd_portid, info->snd_seq, NLM_F_ACK, team_nl_send_unicast, &sel_opt_inst_list); @@ -2305,7 +2305,7 @@ team_put: } static int team_nl_fill_port_list_get(struct sk_buff *skb, - u32 pid, u32 seq, int flags, + u32 portid, u32 seq, int flags, struct team *team, bool fillall) { @@ -2313,7 +2313,7 @@ static int team_nl_fill_port_list_get(struct sk_buff *skb, void *hdr; struct team_port *port; - hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags, + hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags, TEAM_CMD_PORT_LIST_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -2362,7 +2362,7 @@ static int team_nl_fill_port_list_get_all(struct sk_buff *skb, struct genl_info *info, int flags, struct team *team) { - return team_nl_fill_port_list_get(skb, info->snd_pid, + return team_nl_fill_port_list_get(skb, info->snd_portid, info->snd_seq, NLM_F_ACK, team, true); } @@ -2415,7 +2415,7 @@ static struct genl_multicast_group team_change_event_mcgrp = { }; static int team_nl_send_multicast(struct sk_buff *skb, - struct team *team, u32 pid) + struct team *team, u32 portid) { return genlmsg_multicast_netns(dev_net(team->dev), skb, 0, team_change_event_mcgrp.id, GFP_KERNEL); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 72b0456e41bf..9d45b3bb974c 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -38,7 +38,7 @@ MODULE_AUTHOR("Jouni Malinen"); MODULE_DESCRIPTION("Software simulator of 802.11 radio(s) for mac80211"); MODULE_LICENSE("GPL"); -static u32 wmediumd_pid; +static u32 wmediumd_portid; static int radios = 2; module_param(radios, int, 0444); @@ -545,7 +545,7 @@ static bool mac80211_hwsim_addr_match(struct mac80211_hwsim_data *data, static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, struct sk_buff *my_skb, - int dst_pid) + int dst_portid) { struct sk_buff *skb; struct mac80211_hwsim_data *data = hw->priv; @@ -619,7 +619,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, goto nla_put_failure; genlmsg_end(skb, msg_head); - genlmsg_unicast(&init_net, skb, dst_pid); + genlmsg_unicast(&init_net, skb, dst_portid); /* Enqueue the packet */ skb_queue_tail(&data->pending, my_skb); @@ -715,7 +715,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, { bool ack; struct ieee80211_tx_info *txi; - u32 _pid; + u32 _portid; mac80211_hwsim_monitor_rx(hw, skb); @@ -726,10 +726,10 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, } /* wmediumd mode check */ - _pid = ACCESS_ONCE(wmediumd_pid); + _portid = ACCESS_ONCE(wmediumd_portid); - if (_pid) - return mac80211_hwsim_tx_frame_nl(hw, skb, _pid); + if (_portid) + return mac80211_hwsim_tx_frame_nl(hw, skb, _portid); /* NO wmediumd detected, perfect medium simulation */ ack = mac80211_hwsim_tx_frame_no_nl(hw, skb); @@ -814,7 +814,7 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, struct ieee80211_hw *hw = arg; struct sk_buff *skb; struct ieee80211_tx_info *info; - u32 _pid; + u32 _portid; hwsim_check_magic(vif); @@ -831,10 +831,10 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, mac80211_hwsim_monitor_rx(hw, skb); /* wmediumd mode check */ - _pid = ACCESS_ONCE(wmediumd_pid); + _portid = ACCESS_ONCE(wmediumd_portid); - if (_pid) - return mac80211_hwsim_tx_frame_nl(hw, skb, _pid); + if (_portid) + return mac80211_hwsim_tx_frame_nl(hw, skb, _portid); mac80211_hwsim_tx_frame_no_nl(hw, skb); dev_kfree_skb(skb); @@ -1315,7 +1315,7 @@ static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif) struct hwsim_vif_priv *vp = (void *)vif->drv_priv; struct sk_buff *skb; struct ieee80211_pspoll *pspoll; - u32 _pid; + u32 _portid; if (!vp->assoc) return; @@ -1336,10 +1336,10 @@ static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif) memcpy(pspoll->ta, mac, ETH_ALEN); /* wmediumd mode check */ - _pid = ACCESS_ONCE(wmediumd_pid); + _portid = ACCESS_ONCE(wmediumd_portid); - if (_pid) - return mac80211_hwsim_tx_frame_nl(data->hw, skb, _pid); + if (_portid) + return mac80211_hwsim_tx_frame_nl(data->hw, skb, _portid); if (!mac80211_hwsim_tx_frame_no_nl(data->hw, skb)) printk(KERN_DEBUG "%s: PS-poll frame not ack'ed\n", __func__); @@ -1353,7 +1353,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, struct hwsim_vif_priv *vp = (void *)vif->drv_priv; struct sk_buff *skb; struct ieee80211_hdr *hdr; - u32 _pid; + u32 _portid; if (!vp->assoc) return; @@ -1375,10 +1375,10 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, memcpy(hdr->addr3, vp->bssid, ETH_ALEN); /* wmediumd mode check */ - _pid = ACCESS_ONCE(wmediumd_pid); + _portid = ACCESS_ONCE(wmediumd_portid); - if (_pid) - return mac80211_hwsim_tx_frame_nl(data->hw, skb, _pid); + if (_portid) + return mac80211_hwsim_tx_frame_nl(data->hw, skb, _portid); if (!mac80211_hwsim_tx_frame_no_nl(data->hw, skb)) printk(KERN_DEBUG "%s: nullfunc frame not ack'ed\n", __func__); @@ -1632,10 +1632,10 @@ static int hwsim_register_received_nl(struct sk_buff *skb_2, if (info == NULL) goto out; - wmediumd_pid = info->snd_pid; + wmediumd_portid = info->snd_portid; printk(KERN_DEBUG "mac80211_hwsim: received a REGISTER, " - "switching to wmediumd mode with pid %d\n", info->snd_pid); + "switching to wmediumd mode with pid %d\n", info->snd_portid); return 0; out: @@ -1672,10 +1672,10 @@ static int mac80211_hwsim_netlink_notify(struct notifier_block *nb, if (state != NETLINK_URELEASE) return NOTIFY_DONE; - if (notify->pid == wmediumd_pid) { + if (notify->portid == wmediumd_portid) { printk(KERN_INFO "mac80211_hwsim: wmediumd released netlink" " socket, switching to perfect channel medium\n"); - wmediumd_pid = 0; + wmediumd_portid = 0; } return NOTIFY_DONE; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 519bd5303f3f..31969f2e13ce 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2119,7 +2119,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) switch (nlh->nlmsg_type) { case ISCSI_UEVENT_CREATE_SESSION: err = iscsi_if_create_session(priv, ep, ev, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, ev->u.c_session.initial_cmdsn, ev->u.c_session.cmds_max, ev->u.c_session.queue_depth); @@ -2132,7 +2132,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) } err = iscsi_if_create_session(priv, ep, ev, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, ev->u.c_bound_session.initial_cmdsn, ev->u.c_bound_session.cmds_max, ev->u.c_bound_session.queue_depth); diff --git a/drivers/staging/gdm72xx/netlink_k.c b/drivers/staging/gdm72xx/netlink_k.c index 2109cab0a14c..20d0aec52e72 100644 --- a/drivers/staging/gdm72xx/netlink_k.c +++ b/drivers/staging/gdm72xx/netlink_k.c @@ -135,7 +135,7 @@ int netlink_send(struct sock *sock, int group, u16 type, void *msg, int len) } memcpy(nlmsg_data(nlh), msg, len); - NETLINK_CB(skb).pid = 0; + NETLINK_CB(skb).portid = 0; NETLINK_CB(skb).dst_group = 0; ret = netlink_broadcast(sock, skb, 0, group+1, GFP_ATOMIC); diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index ef17e0169da1..60a327863b11 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -14,7 +14,7 @@ #include "dlm_internal.h" static uint32_t dlm_nl_seqnum; -static uint32_t listener_nlpid; +static uint32_t listener_nlportid; static struct genl_family family = { .id = GENL_ID_GENERATE, @@ -64,13 +64,13 @@ static int send_data(struct sk_buff *skb) return rv; } - return genlmsg_unicast(&init_net, skb, listener_nlpid); + return genlmsg_unicast(&init_net, skb, listener_nlportid); } static int user_cmd(struct sk_buff *skb, struct genl_info *info) { - listener_nlpid = info->snd_pid; - printk("user_cmd nlpid %u\n", listener_nlpid); + listener_nlportid = info->snd_portid; + printk("user_cmd nlpid %u\n", listener_nlportid); return 0; } diff --git a/include/linux/netlink.h b/include/linux/netlink.h index cd17dda5a987..73ade5fbc856 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -165,7 +165,7 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) struct netlink_skb_parms { struct scm_creds creds; /* Skb credentials */ - __u32 pid; + __u32 portid; __u32 dst_group; struct sock *ssk; }; @@ -205,14 +205,14 @@ extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group) extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); -extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); -extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, +extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); +extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); extern int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, - __u32 pid, __u32 group, gfp_t allocation, + __u32 portid, __u32 group, gfp_t allocation, int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), void *filter_data); -extern int netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); +extern int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code); extern int netlink_register_notifier(struct notifier_block *nb); extern int netlink_unregister_notifier(struct notifier_block *nb); @@ -253,12 +253,12 @@ struct netlink_callback { struct netlink_notify { struct net *net; - int pid; + int portid; int protocol; }; struct nlmsghdr * -__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags); +__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags); struct netlink_dump_control { int (*dump)(struct sk_buff *skb, struct netlink_callback *); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ba2e6160fad1..7b3b0568d289 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2458,7 +2458,7 @@ struct wireless_dev { int beacon_interval; - u32 ap_unexpected_nlpid; + u32 ap_unexpected_nlportid; #ifdef CONFIG_CFG80211_WEXT /* wext data */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 48905cd3884c..bdfbe68c1c3b 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -65,7 +65,7 @@ struct genl_family { /** * struct genl_info - receiving information * @snd_seq: sending sequence number - * @snd_pid: netlink pid of sender + * @snd_portid: netlink portid of sender * @nlhdr: netlink message header * @genlhdr: generic netlink message header * @userhdr: user specific header @@ -75,7 +75,7 @@ struct genl_family { */ struct genl_info { u32 snd_seq; - u32 snd_pid; + u32 snd_portid; struct nlmsghdr * nlhdr; struct genlmsghdr * genlhdr; void * userhdr; @@ -130,10 +130,10 @@ extern int genl_register_mc_group(struct genl_family *family, struct genl_multicast_group *grp); extern void genl_unregister_mc_group(struct genl_family *family, struct genl_multicast_group *grp); -extern void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, +extern void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags); -void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, +void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, struct genl_family *family, int flags, u8 cmd); /** @@ -183,7 +183,7 @@ static inline void *genlmsg_put_reply(struct sk_buff *skb, struct genl_family *family, int flags, u8 cmd) { - return genlmsg_put(skb, info->snd_pid, info->snd_seq, family, + return genlmsg_put(skb, info->snd_portid, info->snd_seq, family, flags, cmd); } @@ -212,49 +212,49 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr) * genlmsg_multicast_netns - multicast a netlink message to a specific netns * @net: the net namespace * @skb: netlink message as socket buffer - * @pid: own netlink pid to avoid sending to yourself + * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags */ static inline int genlmsg_multicast_netns(struct net *net, struct sk_buff *skb, - u32 pid, unsigned int group, gfp_t flags) + u32 portid, unsigned int group, gfp_t flags) { - return nlmsg_multicast(net->genl_sock, skb, pid, group, flags); + return nlmsg_multicast(net->genl_sock, skb, portid, group, flags); } /** * genlmsg_multicast - multicast a netlink message to the default netns * @skb: netlink message as socket buffer - * @pid: own netlink pid to avoid sending to yourself + * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags */ -static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid, +static inline int genlmsg_multicast(struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - return genlmsg_multicast_netns(&init_net, skb, pid, group, flags); + return genlmsg_multicast_netns(&init_net, skb, portid, group, flags); } /** * genlmsg_multicast_allns - multicast a netlink message to all net namespaces * @skb: netlink message as socket buffer - * @pid: own netlink pid to avoid sending to yourself + * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags * * This function must hold the RTNL or rcu_read_lock(). */ -int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid, +int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags); /** * genlmsg_unicast - unicast a netlink message * @skb: netlink message as socket buffer - * @pid: netlink pid of the destination socket + * @portid: netlink portid of the destination socket */ -static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 pid) +static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 portid) { - return nlmsg_unicast(net->genl_sock, skb, pid); + return nlmsg_unicast(net->genl_sock, skb, portid); } /** @@ -264,7 +264,7 @@ static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 pid) */ static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info) { - return genlmsg_unicast(genl_info_net(info), skb, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), skb, info->snd_portid); } /** diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 4a045cda9c60..5654d292efd4 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -17,7 +17,7 @@ struct nf_conntrack_ecache { unsigned long missed; /* missed events */ u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ - u32 pid; /* netlink pid of destroyer */ + u32 portid; /* netlink portid of destroyer */ struct timer_list timeout; }; @@ -60,7 +60,7 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) /* This structure is passed to event handler */ struct nf_ct_event { struct nf_conn *ct; - u32 pid; + u32 portid; int report; }; @@ -92,7 +92,7 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) static inline int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, - u32 pid, + u32 portid, int report) { int ret = 0; @@ -112,11 +112,11 @@ nf_conntrack_eventmask_report(unsigned int eventmask, if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { struct nf_ct_event item = { .ct = ct, - .pid = e->pid ? e->pid : pid, + .portid = e->portid ? e->portid : portid, .report = report }; /* This is a resent of a destroy event? If so, skip missed */ - unsigned long missed = e->pid ? 0 : e->missed; + unsigned long missed = e->portid ? 0 : e->missed; if (!((eventmask | missed) & e->ctmask)) goto out_unlock; @@ -126,11 +126,11 @@ nf_conntrack_eventmask_report(unsigned int eventmask, spin_lock_bh(&ct->lock); if (ret < 0) { /* This is a destroy event that has been - * triggered by a process, we store the PID + * triggered by a process, we store the PORTID * to include it in the retransmission. */ if (eventmask & (1 << IPCT_DESTROY) && - e->pid == 0 && pid != 0) - e->pid = pid; + e->portid == 0 && portid != 0) + e->portid = portid; else e->missed |= eventmask; } else @@ -145,9 +145,9 @@ out_unlock: static inline int nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, - u32 pid, int report) + u32 portid, int report) { - return nf_conntrack_eventmask_report(1 << event, ct, pid, report); + return nf_conntrack_eventmask_report(1 << event, ct, portid, report); } static inline int @@ -158,7 +158,7 @@ nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) struct nf_exp_event { struct nf_conntrack_expect *exp; - u32 pid; + u32 portid; int report; }; @@ -172,7 +172,7 @@ extern void nf_ct_expect_unregister_notifier(struct net *net, struct nf_exp_even static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp, - u32 pid, + u32 portid, int report) { struct net *net = nf_ct_exp_net(exp); @@ -191,7 +191,7 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event, if (e->expmask & (1 << event)) { struct nf_exp_event item = { .exp = exp, - .pid = pid, + .portid = portid, .report = report }; notify->fcn(1 << event, &item); @@ -216,20 +216,20 @@ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) {} static inline int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, - u32 pid, + u32 portid, int report) { return 0; } static inline int nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) { return 0; } static inline int nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, - u32 pid, + u32 portid, int report) { return 0; } static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp) {} static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, struct nf_conntrack_expect *exp, - u32 pid, + u32 portid, int report) {} static inline int nf_conntrack_ecache_init(struct net *net) diff --git a/include/net/netlink.h b/include/net/netlink.h index 09175d5d1fbf..9690b0f6698a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -217,19 +217,19 @@ struct nla_policy { /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request - * @pid: Netlink PID of requesting application + * @portid: Netlink PORTID of requesting application */ struct nl_info { struct nlmsghdr *nlh; struct net *nl_net; - u32 pid; + u32 portid; }; extern int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *)); extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, - u32 pid, unsigned int group, int report, + u32 portid, unsigned int group, int report, gfp_t flags); extern int nla_validate(const struct nlattr *head, @@ -444,7 +444,7 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh) /** * nlmsg_put - Add a new netlink message to an skb * @skb: socket buffer to store message in - * @pid: netlink process id + * @portid: netlink process id * @seq: sequence number of message * @type: message type * @payload: length of message payload @@ -453,13 +453,13 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh) * Returns NULL if the tailroom of the skb is insufficient to store * the message header and payload. */ -static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, +static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int payload, int flags) { if (unlikely(skb_tailroom(skb) < nlmsg_total_size(payload))) return NULL; - return __nlmsg_put(skb, pid, seq, type, payload, flags); + return __nlmsg_put(skb, portid, seq, type, payload, flags); } /** @@ -478,7 +478,7 @@ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb, int type, int payload, int flags) { - return nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + return nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, type, payload, flags); } @@ -563,18 +563,18 @@ static inline void nlmsg_free(struct sk_buff *skb) * nlmsg_multicast - multicast a netlink message * @sk: netlink socket to spread messages to * @skb: netlink message as socket buffer - * @pid: own netlink pid to avoid sending to yourself + * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags */ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, - u32 pid, unsigned int group, gfp_t flags) + u32 portid, unsigned int group, gfp_t flags) { int err; NETLINK_CB(skb).dst_group = group; - err = netlink_broadcast(sk, skb, pid, group, flags); + err = netlink_broadcast(sk, skb, portid, group, flags); if (err > 0) err = 0; @@ -585,13 +585,13 @@ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, * nlmsg_unicast - unicast a netlink message * @sk: netlink socket to spread message to * @skb: netlink message as socket buffer - * @pid: netlink pid of the destination socket + * @portid: netlink portid of the destination socket */ -static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 pid) +static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 portid) { int err; - err = netlink_unicast(sk, skb, pid, MSG_DONTWAIT); + err = netlink_unicast(sk, skb, portid, MSG_DONTWAIT); if (err > 0) err = 0; diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 6431f5e39022..6735909f826d 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -89,7 +89,7 @@ struct nfc_target { }; struct nfc_genl_data { - u32 poll_req_pid; + u32 poll_req_portid; struct mutex genl_data_mutex; }; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 36ad56ba648b..ee8613f01860 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -263,7 +263,7 @@ struct km_event { } data; u32 seq; - u32 pid; + u32 portid; u32 event; struct net *net; }; @@ -310,7 +310,7 @@ extern void km_state_notify(struct xfrm_state *x, const struct km_event *c); struct xfrm_tmpl; extern int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -extern void km_state_expired(struct xfrm_state *x, int hard, u32 pid); +extern void km_state_expired(struct xfrm_state *x, int hard, u32 portid); extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { @@ -1554,7 +1554,7 @@ extern int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, #endif extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); -extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid); +extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid); extern int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); extern void xfrm_input_init(void); diff --git a/kernel/audit.c b/kernel/audit.c index a24aafa850ae..e0cf64a0ae2d 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -87,11 +87,11 @@ static int audit_failure = AUDIT_FAIL_PRINTK; /* * If audit records are to be written to the netlink socket, audit_pid - * contains the pid of the auditd process and audit_nlk_pid contains - * the pid to use to send netlink messages to that process. + * contains the pid of the auditd process and audit_nlk_portid contains + * the portid to use to send netlink messages to that process. */ int audit_pid; -static int audit_nlk_pid; +static int audit_nlk_portid; /* If audit_rate_limit is non-zero, limit the rate of sending audit records * to that number per second. This prevents DoS attacks, but results in @@ -401,7 +401,7 @@ static void kauditd_send_skb(struct sk_buff *skb) int err; /* take a reference in case we can't send it and we want to hold it */ skb_get(skb); - err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0); + err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); if (err < 0) { BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */ printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); @@ -692,7 +692,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) status_set.backlog_limit = audit_backlog_limit; status_set.lost = atomic_read(&audit_lost); status_set.backlog = skb_queue_len(&audit_skb_queue); - audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_GET, 0, 0, + audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, &status_set, sizeof(status_set)); break; case AUDIT_SET: @@ -720,7 +720,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) sessionid, sid, 1); audit_pid = new_pid; - audit_nlk_pid = NETLINK_CB(skb).pid; + audit_nlk_portid = NETLINK_CB(skb).portid; } if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) { err = audit_set_rate_limit(status_get->rate_limit, @@ -782,7 +782,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } /* fallthrough */ case AUDIT_LIST: - err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, + err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid, uid, seq, data, nlmsg_len(nlh), loginuid, sessionid, sid); break; @@ -801,7 +801,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } /* fallthrough */ case AUDIT_LIST_RULES: - err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, + err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid, uid, seq, data, nlmsg_len(nlh), loginuid, sessionid, sid); break; @@ -872,7 +872,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memcpy(sig_data->ctx, ctx, len); security_release_secctx(ctx, len); } - audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, + audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_SIGNAL_INFO, 0, 0, sig_data, sizeof(*sig_data) + len); kfree(sig_data); break; @@ -891,7 +891,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) rcu_read_unlock(); if (!err) - audit_send_reply(NETLINK_CB(skb).pid, seq, + audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); break; } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d0a32796550f..123793cd06f9 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -467,7 +467,7 @@ static int cmd_attr_register_cpumask(struct genl_info *info) rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); if (rc < 0) goto out; - rc = add_del_listener(info->snd_pid, mask, REGISTER); + rc = add_del_listener(info->snd_portid, mask, REGISTER); out: free_cpumask_var(mask); return rc; @@ -483,7 +483,7 @@ static int cmd_attr_deregister_cpumask(struct genl_info *info) rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); if (rc < 0) goto out; - rc = add_del_listener(info->snd_pid, mask, DEREGISTER); + rc = add_del_listener(info->snd_portid, mask, DEREGISTER); out: free_cpumask_var(mask); return rc; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9ce430b4657c..ddf93efc133c 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -467,14 +467,14 @@ static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb) static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, - u32 pid, u32 seq, int type, unsigned int flags) + u32 portid, u32 seq, int type, unsigned int flags) { unsigned long now = jiffies; struct nda_cacheinfo ci; struct nlmsghdr *nlh; struct ndmsg *ndm; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -555,7 +555,7 @@ int br_fdb_dump(struct sk_buff *skb, goto skip; if (fdb_fill_info(skb, br, f, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI) < 0) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fe41260fbf38..093f527276a3 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -127,7 +127,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) goto skip; if (br_fill_ifinfo(skb, port, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) < 0) break; diff --git a/net/can/gw.c b/net/can/gw.c index b54d5e695b03..127879c55fb6 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -549,7 +549,7 @@ static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; - if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).pid, + if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) break; cont: diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ab7db83236c9..58a4ba27dfe3 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -402,7 +402,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (unresolved) ops->unresolved_rules++; - notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); + notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); flush_route_cache(ops); rules_ops_put(ops); return 0; @@ -500,7 +500,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } notify_rule_change(RTM_DELRULE, rule, ops, nlh, - NETLINK_CB(skb).pid); + NETLINK_CB(skb).portid); if (ops->delete) ops->delete(rule); fib_rule_put(rule); @@ -601,7 +601,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, if (idx < cb->args[1]) goto skip; - if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, + if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWRULE, NLM_F_MULTI, ops) < 0) break; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 117afaf51268..c160adb38e5a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2102,7 +2102,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) if (tidx < tbl_skip || (family && tbl->family != family)) continue; - if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid, + if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) <= 0) break; @@ -2115,7 +2115,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (neightbl_fill_param_info(skb, tbl, p, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) <= 0) @@ -2244,7 +2244,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, continue; if (idx < s_idx) goto next; - if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, + if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI) <= 0) { @@ -2281,7 +2281,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, continue; if (idx < s_idx) goto next; - if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, + if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI, tbl) <= 0) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 508c5df4a09c..92575370d9f0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1081,7 +1081,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, NLM_F_MULTI, ext_filter_mask) <= 0) @@ -1899,14 +1899,14 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (nskb == NULL) return -ENOBUFS; - err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, + err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 0, ext_filter_mask); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); kfree_skb(nskb); } else - err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); return err; } @@ -2180,9 +2180,9 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, { struct netdev_hw_addr *ha; int err; - u32 pid, seq; + u32 portid, seq; - pid = NETLINK_CB(cb->skb).pid; + portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; list_for_each_entry(ha, &list->list, list) { @@ -2190,7 +2190,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, goto skip; err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, - pid, seq, 0, NTF_SELF); + portid, seq, 0, NTF_SELF); if (err < 0) return err; skip: diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 81f2bb62dea3..70989e672304 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1319,7 +1319,7 @@ nla_put_failure: } static int dcbnl_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid, int dcbx_ver) + u32 seq, u32 portid, int dcbx_ver) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -1330,7 +1330,7 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd, if (!ops) return -EOPNOTSUPP; - skb = dcbnl_newmsg(event, cmd, pid, seq, 0, &nlh); + skb = dcbnl_newmsg(event, cmd, portid, seq, 0, &nlh); if (!skb) return -ENOBUFS; @@ -1353,16 +1353,16 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd, } int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid) + u32 seq, u32 portid) { - return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_IEEE); + return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_IEEE); } EXPORT_SYMBOL(dcbnl_ieee_notify); int dcbnl_cee_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid) + u32 seq, u32 portid) { - return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_CEE); + return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_CEE); } EXPORT_SYMBOL(dcbnl_cee_notify); @@ -1656,7 +1656,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_device *netdev; struct dcbmsg *dcb = nlmsg_data(nlh); struct nlattr *tb[DCB_ATTR_MAX + 1]; - u32 pid = skb ? NETLINK_CB(skb).pid : 0; + u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = -EINVAL; struct sk_buff *reply_skb; struct nlmsghdr *reply_nlh = NULL; @@ -1690,7 +1690,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto out; } - reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, pid, nlh->nlmsg_seq, + reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, portid, nlh->nlmsg_seq, nlh->nlmsg_flags, &reply_nlh); if (!reply_skb) { ret = -ENOBUFS; @@ -1705,7 +1705,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) nlmsg_end(reply_skb, reply_nlh); - ret = rtnl_unicast(reply_skb, &init_net, pid); + ret = rtnl_unicast(reply_skb, &init_net, portid); out: dev_put(netdev); return ret; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index f3924ab1e019..7b7e561412d3 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -667,12 +667,12 @@ static inline size_t dn_ifaddr_nlmsg_size(void) } static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -753,7 +753,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (dn_idx < skip_naddr) continue; - if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, + if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) < 0) goto done; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index c855e8d0738f..b57419cc41a4 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1543,7 +1543,7 @@ static int dn_route_input(struct sk_buff *skb) return dn_route_input_slow(skb); } -static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, int nowait, unsigned int flags) { struct dn_route *rt = (struct dn_route *)skb_dst(skb); @@ -1551,7 +1551,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, struct nlmsghdr *nlh; long expires; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); if (!nlh) return -EMSGSIZE; @@ -1685,7 +1685,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); + err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err == 0) goto out_free; @@ -1694,7 +1694,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void goto out_free; } - return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid); out_free: kfree_skb(skb); @@ -1737,7 +1737,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) continue; skb_dst_set(skb, dst_clone(&rt->dst)); - if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { skb_dst_drop(skb); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 16c986ab1228..f968c1b58f47 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -291,14 +291,14 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi) return payload; } -static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, +static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, struct dn_fib_info *fi, unsigned int flags) { struct rtmsg *rtm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; @@ -374,14 +374,14 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct sk_buff *skb; - u32 pid = req ? req->pid : 0; + u32 portid = req ? req->portid : 0; int err = -ENOBUFS; skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL); if (skb == NULL) goto errout; - err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, + err = dn_fib_dump_info(skb, portid, nlh->nlmsg_seq, event, tb_id, f->fn_type, f->fn_scope, &f->fn_key, z, DN_FIB_INFO(f), 0); if (err < 0) { @@ -390,7 +390,7 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, kfree_skb(skb); goto errout; } - rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); + rtnl_notify(skb, &init_net, portid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); return; errout: if (err < 0) @@ -411,7 +411,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, continue; if (f->fn_state & DN_S_ZOMBIE) continue; - if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).pid, + if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->n, diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 1e9917124e75..96bb08abece2 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -246,7 +246,7 @@ nla_put_failure: } EXPORT_SYMBOL(ieee802154_nl_start_confirm); -static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 pid, +static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev) { void *hdr; @@ -534,7 +534,7 @@ static int ieee802154_list_iface(struct sk_buff *skb, if (!msg) goto out_dev; - rc = ieee802154_nl_fill_iface(msg, info->snd_pid, info->snd_seq, + rc = ieee802154_nl_fill_iface(msg, info->snd_portid, info->snd_seq, 0, dev); if (rc < 0) goto out_free; @@ -565,7 +565,7 @@ static int ieee802154_dump_iface(struct sk_buff *skb, if (idx < s_idx || (dev->type != ARPHRD_IEEE802154)) goto cont; - if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).pid, + if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) break; cont: diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index d54be34cca94..22b1a7058fd3 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -35,7 +35,7 @@ #include "ieee802154.h" -static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, +static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct wpan_phy *phy) { void *hdr; @@ -105,7 +105,7 @@ static int ieee802154_list_phy(struct sk_buff *skb, if (!msg) goto out_dev; - rc = ieee802154_nl_fill_phy(msg, info->snd_pid, info->snd_seq, + rc = ieee802154_nl_fill_phy(msg, info->snd_portid, info->snd_seq, 0, phy); if (rc < 0) goto out_free; @@ -138,7 +138,7 @@ static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data) return 0; rc = ieee802154_nl_fill_phy(data->skb, - NETLINK_CB(data->cb->skb).pid, + NETLINK_CB(data->cb->skb).portid, data->cb->nlh->nlmsg_seq, NLM_F_MULTI, phy); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f14eff506743..7b00556e184b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -311,7 +311,7 @@ int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) } static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, - int destroy, struct nlmsghdr *nlh, u32 pid) + int destroy, struct nlmsghdr *nlh, u32 portid) { struct in_ifaddr *promote = NULL; struct in_ifaddr *ifa, *ifa1 = *ifap; @@ -345,7 +345,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, inet_hash_remove(ifa); *ifap1 = ifa->ifa_next; - rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); + rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); inet_free_ifa(ifa); @@ -382,7 +382,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, is valid, it will try to restore deleted routes... Grr. So that, this order is correct. */ - rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); + rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (promote) { @@ -395,7 +395,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } promote->ifa_flags &= ~IFA_F_SECONDARY; - rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); + rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { @@ -417,7 +417,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, - u32 pid) + u32 portid) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; @@ -464,7 +464,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ - rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); + rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); return 0; @@ -563,7 +563,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) continue; - __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); + __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); return 0; } @@ -649,7 +649,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg if (IS_ERR(ifa)) return PTR_ERR(ifa); - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); } /* @@ -1246,12 +1246,12 @@ static size_t inet_nlmsg_size(void) } static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -1313,7 +1313,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (ip_idx < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) <= 0) { rcu_read_unlock(); @@ -1335,7 +1335,7 @@ done: } static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, - u32 pid) + u32 portid) { struct sk_buff *skb; u32 seq = nlh ? nlh->nlmsg_seq : 0; @@ -1347,14 +1347,14 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, if (skb == NULL) goto errout; - err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); + err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0); if (err < 0) { /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); return; errout: if (err < 0) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index dc1f10ed1872..68c93d1bb03a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -557,7 +557,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg->fc_flags = rtm->rtm_flags; cfg->fc_nlflags = nlh->nlmsg_flags; - cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = net; @@ -955,7 +955,7 @@ static void nl_fib_input(struct sk_buff *skb) struct fib_result_nl *frn; struct nlmsghdr *nlh; struct fib_table *tb; - u32 pid; + u32 portid; net = sock_net(skb->sk); nlh = nlmsg_hdr(skb); @@ -973,10 +973,10 @@ static void nl_fib_input(struct sk_buff *skb) nl_fib_lookup(frn, tb); - pid = NETLINK_CB(skb).pid; /* pid of sending process */ - NETLINK_CB(skb).pid = 0; /* from kernel */ + portid = NETLINK_CB(skb).portid; /* pid of sending process */ + NETLINK_CB(skb).portid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ - netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); + netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT); } static int __net_init nl_fib_lookup_init(struct net *net) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index da80dc14cc76..3509065e409a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -391,7 +391,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, if (skb == NULL) goto errout; - err = fib_dump_info(skb, info->pid, seq, event, tb_id, + err = fib_dump_info(skb, info->portid, seq, event, tb_id, fa->fa_type, key, dst_len, fa->fa_tos, fa->fa_info, nlm_flags); if (err < 0) { @@ -400,7 +400,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, kfree_skb(skb); goto errout; } - rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, + rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, info->nlh, GFP_KERNEL); return; errout: @@ -989,14 +989,14 @@ failure: return ERR_PTR(err); } -int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, +int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 8d766106b540..31d771ca9a70 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1873,7 +1873,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, continue; } - if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, + if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->tb_id, diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8bc005b1435f..535584c00f91 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -70,7 +70,7 @@ static inline void inet_diag_unlock_handler( int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct inet_diag_req_v2 *req, struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); @@ -84,7 +84,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(handler == NULL); - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -201,23 +201,23 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_csk_diag_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_req_v2 *req, struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { return inet_sk_diag_fill(sk, inet_csk(sk), - skb, req, user_ns, pid, seq, nlmsg_flags, unlh); + skb, req, user_ns, portid, seq, nlmsg_flags, unlh); } static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, struct sk_buff *skb, struct inet_diag_req_v2 *req, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { long tmo; struct inet_diag_msg *r; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -260,14 +260,14 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_req_v2 *r, struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, - skb, r, pid, seq, nlmsg_flags, + skb, r, portid, seq, nlmsg_flags, unlh); - return inet_csk_diag_fill(sk, skb, r, user_ns, pid, seq, nlmsg_flags, unlh); + return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh); } int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, @@ -316,14 +316,14 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).ssk), - NETLINK_CB(in_skb).pid, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { WARN_ON(err == -EMSGSIZE); nlmsg_free(rep); goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -557,7 +557,7 @@ static int inet_csk_diag_dump(struct sock *sk, return inet_csk_diag_fill(sk, skb, r, sk_user_ns(NETLINK_CB(cb->skb).ssk), - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -592,14 +592,14 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, } return inet_twsk_diag_fill(tw, skb, r, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, struct request_sock *req, struct user_namespace *user_ns, - u32 pid, u32 seq, + u32 portid, u32 seq, const struct nlmsghdr *unlh) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -608,7 +608,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, struct nlmsghdr *nlh; long tmo; - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; @@ -711,7 +711,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, err = inet_diag_fill_req(skb, sk, req, sk_user_ns(NETLINK_CB(cb->skb).ssk), - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh); if (err < 0) { cb->args[3] = j + 1; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8aa7a4cf9139..1daa95c2a0ba 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -626,7 +626,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { kfree_skb(skb); } @@ -870,7 +870,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, memset(&e->msg, 0, sizeof(e->msg)); } - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { ip_mr_forward(net, mrt, skb, c, 0); } @@ -2117,12 +2117,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - u32 pid, u32 seq, struct mfc_cache *c) + u32 portid, u32 seq, struct mfc_cache *c) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); if (nlh == NULL) return -EMSGSIZE; @@ -2176,7 +2176,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (e < s_e) goto next_entry; if (ipmr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc) < 0) goto done; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d39edf16d607..940f4f4cb201 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2136,7 +2136,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, EXPORT_SYMBOL_GPL(ip_route_output_flow); static int rt_fill_info(struct net *net, __be32 dst, __be32 src, - struct flowi4 *fl4, struct sk_buff *skb, u32 pid, + struct flowi4 *fl4, struct sk_buff *skb, u32 portid, u32 seq, int event, int nowait, unsigned int flags) { struct rtable *rt = skb_rtable(skb); @@ -2146,7 +2146,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 error; u32 metrics[RTAX_MAX]; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2306,12 +2306,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void rt->rt_flags |= RTCF_NOTIFY; err = rt_fill_info(net, dst, src, &fl4, skb, - NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err <= 0) goto errout_free; - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 988edb63ee73..4c752a6e0bcd 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -803,7 +803,7 @@ static int tcp_metrics_dump_info(struct sk_buff *skb, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &tcp_metrics_nl_family, NLM_F_MULTI, TCP_METRICS_CMD_GET); if (!hdr) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index d2f336ea82ca..505b30ad9182 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -26,7 +26,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, return inet_sk_diag_fill(sk, NULL, skb, req, sk_user_ns(NETLINK_CB(cb->skb).ssk), - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -72,14 +72,14 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, err = inet_sk_diag_fill(sk, NULL, rep, req, sk_user_ns(NETLINK_CB(in_skb).ssk), - NETLINK_CB(in_skb).pid, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(rep); goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 572cb660837b..1237d5d037d8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3534,12 +3534,12 @@ static inline int inet6_ifaddr_msgsize(void) } static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct nlmsghdr *nlh; u32 preferred, valid; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3577,7 +3577,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, - u32 pid, u32 seq, int event, u16 flags) + u32 portid, u32 seq, int event, u16 flags) { struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; @@ -3586,7 +3586,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3602,7 +3602,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, } static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; @@ -3611,7 +3611,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3652,7 +3652,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (++ip_idx < s_ip_idx) continue; err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI); @@ -3668,7 +3668,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_GETMULTICAST, NLM_F_MULTI); @@ -3683,7 +3683,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifacaddr(skb, ifaca, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_GETANYCAST, NLM_F_MULTI); @@ -3805,7 +3805,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout_ifa; } - err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, + err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWADDR, 0); if (err < 0) { /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ @@ -3813,7 +3813,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, kfree_skb(skb); goto errout_ifa; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout_ifa: in6_ifa_put(ifa); errout: @@ -4015,14 +4015,14 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) } static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct net_device *dev = idev->dev; struct ifinfomsg *hdr; struct nlmsghdr *nlh; void *protoinfo; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); if (nlh == NULL) return -EMSGSIZE; @@ -4080,7 +4080,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (!idev) goto cont; if (inet6_fill_ifinfo(skb, idev, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) <= 0) goto out; @@ -4128,14 +4128,14 @@ static inline size_t inet6_prefix_nlmsg_size(void) } static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, - struct prefix_info *pinfo, u32 pid, u32 seq, + struct prefix_info *pinfo, u32 portid, u32 seq, int event, unsigned int flags) { struct prefixmsg *pmsg; struct nlmsghdr *nlh; struct prefix_cacheinfo ci; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags); if (nlh == NULL) return -EMSGSIZE; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index eb6a63632d3c..0b0171570d17 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -470,10 +470,10 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, static int ip6addrlbl_fill(struct sk_buff *skb, struct ip6addrlbl_entry *p, u32 lseq, - u32 pid, u32 seq, int event, + u32 portid, u32 seq, int event, unsigned int flags) { - struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event, + struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrlblmsg), flags); if (!nlh) return -EMSGSIZE; @@ -503,7 +503,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) net_eq(ip6addrlbl_net(p), net)) { if ((err = ip6addrlbl_fill(skb, p, ip6addrlbl_table.seq, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDRLABEL, NLM_F_MULTI)) <= 0) @@ -574,7 +574,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, } err = ip6addrlbl_fill(skb, p, lseq, - NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWADDRLABEL, 0); ip6addrlbl_put(p); @@ -585,7 +585,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, goto out; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); out: return err; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 4532973f0dd4..08ea3f0b6e55 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -838,7 +838,7 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else kfree_skb(skb); } @@ -1052,7 +1052,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; } - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else ip6_mr_forward(net, mrt, skb, c); } @@ -2202,12 +2202,12 @@ int ip6mr_get_route(struct net *net, } static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - u32 pid, u32 seq, struct mfc6_cache *c) + u32 portid, u32 seq, struct mfc6_cache *c) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); if (nlh == NULL) return -EMSGSIZE; @@ -2260,7 +2260,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (e < s_e) goto next_entry; if (ip6mr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc) < 0) goto done; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 339d921cf3b6..a81c6790a648 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1874,7 +1874,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), - .fc_nlinfo.pid = 0, + .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = net, }; @@ -1924,7 +1924,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), - .fc_nlinfo.pid = 0, + .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = dev_net(dev), }; @@ -2285,7 +2285,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_type == RTN_LOCAL) cfg->fc_flags |= RTF_LOCAL; - cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = sock_net(skb->sk); @@ -2376,7 +2376,7 @@ static inline size_t rt6_nlmsg_size(void) static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, - int iif, int type, u32 pid, u32 seq, + int iif, int type, u32 portid, u32 seq, int prefix, int nowait, unsigned int flags) { struct rtmsg *rtm; @@ -2392,7 +2392,7 @@ static int rt6_fill_node(struct net *net, } } - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; @@ -2537,7 +2537,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) return rt6_fill_node(arg->net, arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, - NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, + NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, prefix, 0, NLM_F_MULTI); } @@ -2617,14 +2617,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_dst_set(skb, &rt->dst); err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).pid, + RTM_NEWROUTE, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { kfree_skb(skb); goto errout; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; } @@ -2644,14 +2644,14 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) goto errout; err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, - event, info->pid, seq, 0, 0, 0); + event, info->portid, seq, 0, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, + rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any()); return; errout: diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 6c7c4b92e4f8..c32971269280 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -100,7 +100,7 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) goto err_out; } - hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); if (hdr == NULL) { ret = -EMSGSIZE; diff --git a/net/key/af_key.c b/net/key/af_key.c index 334f93b8cfcb..2ca7d7f6861c 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -54,7 +54,7 @@ struct pfkey_sock { struct { uint8_t msg_version; - uint32_t msg_pid; + uint32_t msg_portid; int (*dump)(struct pfkey_sock *sk); void (*done)(struct pfkey_sock *sk); union { @@ -1447,7 +1447,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); @@ -1486,7 +1486,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg else c.event = XFRM_MSG_UPDSA; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; km_state_notify(x, &c); out: xfrm_state_put(x); @@ -1523,7 +1523,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_ goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: @@ -1701,7 +1701,7 @@ static int key_notify_sa_flush(const struct km_event *c) hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto); hdr->sadb_msg_type = SADB_FLUSH; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); @@ -1736,7 +1736,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m c.data.proto = proto; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_FLUSHSA; c.net = net; km_state_notify(NULL, &c); @@ -1764,7 +1764,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = count + 1; - out_hdr->sadb_msg_pid = pfk->dump.msg_pid; + out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, @@ -1798,7 +1798,7 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms return -EINVAL; pfk->dump.msg_version = hdr->sadb_msg_version; - pfk->dump.msg_pid = hdr->sadb_msg_pid; + pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sa; pfk->dump.done = pfkey_dump_sa_done; xfrm_state_walk_init(&pfk->dump.u.state, proto); @@ -2157,7 +2157,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev out_hdr->sadb_msg_type = event2poltype(c->event); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; - out_hdr->sadb_msg_pid = c->pid; + out_hdr->sadb_msg_pid = c->portid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); return 0; @@ -2272,7 +2272,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ c.event = XFRM_MSG_NEWPOLICY; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); xfrm_pol_put(xp); @@ -2351,7 +2351,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.data.byid = 0; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); @@ -2597,7 +2597,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ if (err) goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.data.byid = 1; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, dir, &c); @@ -2634,7 +2634,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = count + 1; - out_hdr->sadb_msg_pid = pfk->dump.msg_pid; + out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, @@ -2663,7 +2663,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb return -EBUSY; pfk->dump.msg_version = hdr->sadb_msg_version; - pfk->dump.msg_pid = hdr->sadb_msg_pid; + pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sp; pfk->dump.done = pfkey_dump_sp_done; xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN); @@ -2682,7 +2682,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); hdr->sadb_msg_type = SADB_X_SPDFLUSH; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); @@ -2711,7 +2711,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; c.net = net; km_policy_notify(NULL, 0, &c); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index d71cd9229a47..6ec3f67ad3f1 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -78,7 +78,7 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) goto out; } - hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &l2tp_nl_family, 0, L2TP_CMD_NOOP); if (IS_ERR(hdr)) { ret = PTR_ERR(hdr); @@ -87,7 +87,7 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) genlmsg_end(msg, hdr); - return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -235,7 +235,7 @@ out: return ret; } -static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, +static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_tunnel *tunnel) { void *hdr; @@ -248,7 +248,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, struct l2tp_stats stats; unsigned int start; - hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -359,12 +359,12 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info) goto out; } - ret = l2tp_nl_tunnel_send(msg, info->snd_pid, info->snd_seq, + ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, tunnel); if (ret < 0) goto err_out; - return genlmsg_unicast(net, msg, info->snd_pid); + return genlmsg_unicast(net, msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -384,7 +384,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback if (tunnel == NULL) goto out; - if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).pid, + if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, tunnel) <= 0) goto out; @@ -604,7 +604,7 @@ out: return ret; } -static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, +static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_session *session) { void *hdr; @@ -616,7 +616,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags sk = tunnel->sock; - hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -705,12 +705,12 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) goto out; } - ret = l2tp_nl_session_send(msg, info->snd_pid, info->snd_seq, + ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, 0, session); if (ret < 0) goto err_out; - return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -742,7 +742,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback continue; } - if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).pid, + if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, session) <= 0) break; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 9730882697aa..ad39ef406851 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -563,13 +563,13 @@ flag_exist(const struct nlmsghdr *nlh) } static struct nlmsghdr * -start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags, +start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, enum ipset_cmd cmd) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), + nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), sizeof(*nfmsg), flags); if (nlh == NULL) return NULL; @@ -1045,7 +1045,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) ip_set_id_t index = IPSET_INVALID_ID, max; struct ip_set *set = NULL; struct nlmsghdr *nlh = NULL; - unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0; + unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; u32 dump_type, dump_flags; int ret = 0; @@ -1093,7 +1093,7 @@ dump_last: pr_debug("reference set\n"); __ip_set_get(index); } - nlh = start_msg(skb, NETLINK_CB(cb->skb).pid, + nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, IPSET_CMD_LIST); if (!nlh) { @@ -1226,7 +1226,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, skb2 = nlmsg_new(payload, GFP_KERNEL); if (skb2 == NULL) return -ENOMEM; - rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid, + rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); errmsg = nlmsg_data(rep); errmsg->error = ret; @@ -1241,7 +1241,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, *errline = lineno; - netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); /* Signal netlink not to send its ACK/errmsg. */ return -EINTR; } @@ -1416,7 +1416,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_HEADER); if (!nlh2) goto nlmsg_failure; @@ -1428,7 +1428,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; @@ -1476,7 +1476,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_TYPE); if (!nlh2) goto nlmsg_failure; @@ -1489,7 +1489,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, nlmsg_end(skb2, nlh2); pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; @@ -1525,7 +1525,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_PROTOCOL); if (!nlh2) goto nlmsg_failure; @@ -1533,7 +1533,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 767cc12da0fe..0f924bf19c2b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2939,7 +2939,7 @@ static int ip_vs_genl_dump_service(struct sk_buff *skb, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_SERVICE); if (!hdr) @@ -3128,7 +3128,7 @@ static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_DEST); if (!hdr) @@ -3257,7 +3257,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, struct netlink_callback *cb) { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_DAEMON); if (!hdr) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index e7be79e640de..de9781b6464f 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -61,7 +61,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) goto out_unlock; item.ct = ct; - item.pid = 0; + item.portid = 0; item.report = 0; ret = notify->fcn(events | missed, &item); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a205bd6ce294..59770039b825 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -418,16 +418,16 @@ nla_put_failure: } static int -ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nf_conn *ct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -604,7 +604,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto errout; type |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -680,7 +680,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - err = nfnetlink_send(skb, net, item->pid, group, item->report, + err = nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); if (err == -ENOBUFS || err == -EAGAIN) return -ENOBUFS; @@ -757,7 +757,7 @@ restart: #endif rcu_read_lock(); res = - ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct); @@ -961,7 +961,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, else { /* Flush the whole table */ nf_conntrack_flush_report(net, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); return 0; } @@ -985,7 +985,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (del_timer(&ct->timeout)) { if (nf_conntrack_event_report(IPCT_DESTROY, ct, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)) < 0) { nf_ct_delete_from_lists(ct); /* we failed to report the event, try later */ @@ -1069,14 +1069,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, } rcu_read_lock(); - err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, + err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), ct); rcu_read_unlock(); nf_ct_put(ct); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -1616,7 +1616,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | (1 << IPCT_MARK) | events, - ct, NETLINK_CB(skb).pid, + ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_put(ct); } @@ -1638,7 +1638,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | (1 << IPCT_MARK), - ct, NETLINK_CB(skb).pid, + ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); } } @@ -1648,15 +1648,15 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, __u16 cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS_CPU); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -1708,7 +1708,7 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) st = per_cpu_ptr(net->ct.stat, cpu); if (ctnetlink_ct_stat_cpu_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cpu, st) < 0) break; @@ -1734,16 +1734,16 @@ ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct net *net) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; unsigned int nr_conntracks = atomic_read(&net->ct.count); event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -1776,14 +1776,14 @@ ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).pid, + err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), sock_net(skb->sk)); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -2073,15 +2073,15 @@ nla_put_failure: } static int -ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, const struct nf_conntrack_expect *exp) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2132,7 +2132,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) goto errout; type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2147,7 +2147,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, net, item->pid, group, item->report, GFP_ATOMIC); + nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); return 0; nla_put_failure: @@ -2190,7 +2190,7 @@ restart: cb->args[1] = 0; } if (ctnetlink_exp_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { @@ -2283,14 +2283,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, } rcu_read_lock(); - err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, + err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); nf_ct_expect_put(exp); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -2344,7 +2344,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* after list removal, usage count == 1 */ spin_lock_bh(&nf_conntrack_lock); if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).pid, + nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2366,7 +2366,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, if (!strcmp(m_help->helper->name, name) && del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2382,7 +2382,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, hnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2447,7 +2447,7 @@ static int ctnetlink_create_expect(struct net *net, u16 zone, const struct nlattr * const cda[], u_int8_t u3, - u32 pid, int report) + u32 portid, int report) { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -2560,7 +2560,7 @@ ctnetlink_create_expect(struct net *net, u16 zone, if (err < 0) goto err_out; } - err = nf_ct_expect_related_report(exp, pid, report); + err = nf_ct_expect_related_report(exp, portid, report); err_out: nf_ct_expect_put(exp); out: @@ -2603,7 +2603,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_CREATE) { err = ctnetlink_create_expect(net, zone, cda, u3, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); } return err; @@ -2618,15 +2618,15 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int cpu, +ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_EXP_GET_STATS_CPU); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2665,7 +2665,7 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; st = per_cpu_ptr(net->ct.stat, cpu); - if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cpu, st) < 0) break; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index d7ec92879071..589d686f0b4c 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -91,16 +91,16 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, } static int -nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_acct *acct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; u64 pkts, bytes; event |= NFNL_SUBSYS_ACCT << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -150,7 +150,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) if (last && cur != last) continue; - if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), NFNL_MSG_ACCT_NEW, cur) < 0) { @@ -195,7 +195,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid, + ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), NFNL_MSG_ACCT_NEW, cur); @@ -203,7 +203,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, kfree_skb(skb2); break; } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 32a1ba3f3e27..3678073360a3 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -395,16 +395,16 @@ nla_put_failure: } static int -nfnl_cthelper_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_conntrack_helper *helper) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; int status; event |= NFNL_SUBSYS_CTHELPER << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -468,7 +468,7 @@ restart: cb->args[1] = 0; } if (nfnl_cthelper_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur) < 0) { @@ -538,7 +538,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).pid, + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur); @@ -547,7 +547,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index cdecbc8fe965..8847b4d8be06 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -155,16 +155,16 @@ err_proto_put: } static int -ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct ctnl_timeout *timeout) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; struct nf_conntrack_l4proto *l4proto = timeout->l4proto; event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -222,7 +222,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) if (last && cur != last) continue; - if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) { @@ -268,7 +268,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, break; } - ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).pid, + ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur); @@ -276,7 +276,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, kfree_skb(skb2); break; } - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index be194b144297..8cb67c4dbd62 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -56,7 +56,7 @@ struct nfulnl_instance { struct sk_buff *skb; /* pre-allocatd skb */ struct timer_list timer; struct user_namespace *peer_user_ns; /* User namespace of the peer process */ - int peer_pid; /* PID of the peer process */ + int peer_portid; /* PORTID of the peer process */ /* configurable parameters */ unsigned int flushtimeout; /* timeout until queue flush */ @@ -133,7 +133,7 @@ instance_put(struct nfulnl_instance *inst) static void nfulnl_timer(unsigned long data); static struct nfulnl_instance * -instance_create(u_int16_t group_num, int pid, struct user_namespace *user_ns) +instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) { struct nfulnl_instance *inst; int err; @@ -164,7 +164,7 @@ instance_create(u_int16_t group_num, int pid, struct user_namespace *user_ns) setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); inst->peer_user_ns = user_ns; - inst->peer_pid = pid; + inst->peer_portid = portid; inst->group_num = group_num; inst->qthreshold = NFULNL_QTHRESH_DEFAULT; @@ -336,7 +336,7 @@ __nfulnl_send(struct nfulnl_instance *inst) if (!nlh) goto out; } - status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid, + status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid, MSG_DONTWAIT); inst->qlen = 0; @@ -703,7 +703,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; - /* destroy all instances for this pid */ + /* destroy all instances for this portid */ spin_lock_bh(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; @@ -712,7 +712,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { if ((net_eq(n->net, &init_net)) && - (n->pid == inst->peer_pid)) + (n->portid == inst->peer_portid)) __instance_destroy(inst); } } @@ -774,7 +774,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } inst = instance_lookup_get(group_num); - if (inst && inst->peer_pid != NETLINK_CB(skb).pid) { + if (inst && inst->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto out_put; } @@ -788,7 +788,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } inst = instance_create(group_num, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, sk_user_ns(NETLINK_CB(skb).ssk)); if (IS_ERR(inst)) { ret = PTR_ERR(inst); @@ -947,7 +947,7 @@ static int seq_show(struct seq_file *s, void *v) return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", inst->group_num, - inst->peer_pid, inst->qlen, + inst->peer_portid, inst->qlen, inst->copy_mode, inst->copy_range, inst->flushtimeout, atomic_read(&inst->use)); } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index c0496a55ad0c..b8d9995b76a8 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -44,7 +44,7 @@ struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ struct rcu_head rcu; - int peer_pid; + int peer_portid; unsigned int queue_maxlen; unsigned int copy_range; unsigned int queue_dropped; @@ -92,7 +92,7 @@ instance_lookup(u_int16_t queue_num) } static struct nfqnl_instance * -instance_create(u_int16_t queue_num, int pid) +instance_create(u_int16_t queue_num, int portid) { struct nfqnl_instance *inst; unsigned int h; @@ -111,7 +111,7 @@ instance_create(u_int16_t queue_num, int pid) } inst->queue_num = queue_num; - inst->peer_pid = pid; + inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; inst->copy_range = 0xfffff; inst->copy_mode = NFQNL_COPY_NONE; @@ -440,7 +440,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } spin_lock_bh(&queue->lock); - if (!queue->peer_pid) { + if (!queue->peer_portid) { err = -EINVAL; goto err_out_free_nskb; } @@ -459,7 +459,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) *packet_id_ptr = htonl(entry->id); /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT); if (err < 0) { queue->queue_user_dropped++; goto err_out_unlock; @@ -616,7 +616,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; - /* destroy all instances for this pid */ + /* destroy all instances for this portid */ spin_lock(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; @@ -625,7 +625,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { if ((n->net == &init_net) && - (n->pid == inst->peer_pid)) + (n->portid == inst->peer_portid)) __instance_destroy(inst); } } @@ -650,7 +650,7 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { [NFQA_MARK] = { .type = NLA_U32 }, }; -static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) +static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid) { struct nfqnl_instance *queue; @@ -658,7 +658,7 @@ static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) if (!queue) return ERR_PTR(-ENODEV); - if (queue->peer_pid != nlpid) + if (queue->peer_portid != nlportid) return ERR_PTR(-EPERM); return queue; @@ -698,7 +698,7 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, LIST_HEAD(batch_list); u16 queue_num = ntohs(nfmsg->res_id); - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -749,7 +749,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, queue = instance_lookup(queue_num); if (!queue) - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -832,7 +832,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, rcu_read_lock(); queue = instance_lookup(queue_num); - if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { + if (queue && queue->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto err_out_unlock; } @@ -844,7 +844,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ret = -EBUSY; goto err_out_unlock; } - queue = instance_create(queue_num, NETLINK_CB(skb).pid); + queue = instance_create(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) { ret = PTR_ERR(queue); goto err_out_unlock; @@ -1016,7 +1016,7 @@ static int seq_show(struct seq_file *s, void *v) return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", inst->queue_num, - inst->peer_pid, inst->queue_total, + inst->peer_portid, inst->queue_total, inst->copy_mode, inst->copy_range, inst->queue_dropped, inst->queue_user_dropped, inst->id_sequence, 1); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 6bf878335d94..c15042f987bd 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -627,7 +627,7 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg) struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg; void *data; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_cipsov4_gnl_family, NLM_F_MULTI, NLBL_CIPSOV4_C_LISTALL); if (data == NULL) diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 4809e2e48b02..c5384ffc6146 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -448,7 +448,7 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) struct netlbl_domhsh_walk_arg *cb_arg = arg; void *data; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_mgmt_gnl_family, NLM_F_MULTI, NLBL_MGMT_C_LISTALL); if (data == NULL) @@ -613,7 +613,7 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb, int ret_val = -ENOMEM; void *data; - data = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + data = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &netlbl_mgmt_gnl_family, NLM_F_MULTI, NLBL_MGMT_C_PROTOCOLS); if (data == NULL) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index e7ff694f1049..b7944413b404 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1096,7 +1096,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, char *secctx; u32 secctx_len; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_unlabel_gnl_family, NLM_F_MULTI, cmd); if (data == NULL) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4d348e97e131..0f2e3ad69c47 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -67,8 +67,8 @@ struct netlink_sock { /* struct sock has to be the first member of netlink_sock */ struct sock sk; - u32 pid; - u32 dst_pid; + u32 portid; + u32 dst_portid; u32 dst_group; u32 flags; u32 subscriptions; @@ -104,7 +104,7 @@ static inline int netlink_is_kernel(struct sock *sk) return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; } -struct nl_pid_hash { +struct nl_portid_hash { struct hlist_head *table; unsigned long rehash_time; @@ -118,7 +118,7 @@ struct nl_pid_hash { }; struct netlink_table { - struct nl_pid_hash hash; + struct nl_portid_hash hash; struct hlist_head mc_list; struct listeners __rcu *listeners; unsigned int flags; @@ -145,9 +145,9 @@ static inline u32 netlink_group_mask(u32 group) return group ? 1 << (group - 1) : 0; } -static inline struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) +static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid) { - return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; + return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; } static void netlink_destroy_callback(struct netlink_callback *cb) @@ -239,17 +239,17 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } -static struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) +static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { - struct nl_pid_hash *hash = &nl_table[protocol].hash; + struct nl_portid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; struct sock *sk; struct hlist_node *node; read_lock(&nl_table_lock); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); sk_for_each(sk, node, head) { - if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) { + if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { sock_hold(sk); goto found; } @@ -260,7 +260,7 @@ found: return sk; } -static struct hlist_head *nl_pid_hash_zalloc(size_t size) +static struct hlist_head *nl_portid_hash_zalloc(size_t size) { if (size <= PAGE_SIZE) return kzalloc(size, GFP_ATOMIC); @@ -270,7 +270,7 @@ static struct hlist_head *nl_pid_hash_zalloc(size_t size) get_order(size)); } -static void nl_pid_hash_free(struct hlist_head *table, size_t size) +static void nl_portid_hash_free(struct hlist_head *table, size_t size) { if (size <= PAGE_SIZE) kfree(table); @@ -278,7 +278,7 @@ static void nl_pid_hash_free(struct hlist_head *table, size_t size) free_pages((unsigned long)table, get_order(size)); } -static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) +static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow) { unsigned int omask, mask, shift; size_t osize, size; @@ -296,7 +296,7 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) size *= 2; } - table = nl_pid_hash_zalloc(size); + table = nl_portid_hash_zalloc(size); if (!table) return 0; @@ -311,23 +311,23 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) struct hlist_node *node, *tmp; sk_for_each_safe(sk, node, tmp, &otable[i]) - __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid)); + __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid)); } - nl_pid_hash_free(otable, osize); + nl_portid_hash_free(otable, osize); hash->rehash_time = jiffies + 10 * 60 * HZ; return 1; } -static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) +static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len) { int avg = hash->entries >> hash->shift; - if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1)) + if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1)) return 1; if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { - nl_pid_hash_rehash(hash, 0); + nl_portid_hash_rehash(hash, 0); return 1; } @@ -356,9 +356,9 @@ netlink_update_listeners(struct sock *sk) * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, struct net *net, u32 pid) +static int netlink_insert(struct sock *sk, struct net *net, u32 portid) { - struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; int err = -EADDRINUSE; struct sock *osk; @@ -366,10 +366,10 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid) int len; netlink_table_grab(); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); len = 0; sk_for_each(osk, node, head) { - if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid)) + if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) break; len++; } @@ -377,17 +377,17 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid) goto err; err = -EBUSY; - if (nlk_sk(sk)->pid) + if (nlk_sk(sk)->portid) goto err; err = -ENOMEM; if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) goto err; - if (len && nl_pid_hash_dilute(hash, len)) - head = nl_pid_hashfn(hash, pid); + if (len && nl_portid_hash_dilute(hash, len)) + head = nl_portid_hashfn(hash, portid); hash->entries++; - nlk_sk(sk)->pid = pid; + nlk_sk(sk)->portid = portid; sk_add_node(sk, head); err = 0; @@ -518,11 +518,11 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->pid) { + if (nlk->portid) { struct netlink_notify n = { .net = sock_net(sk), .protocol = sk->sk_protocol, - .pid = nlk->pid, + .portid = nlk->portid, }; atomic_notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); @@ -559,24 +559,24 @@ static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; struct hlist_node *node; - s32 pid = task_tgid_vnr(current); + s32 portid = task_tgid_vnr(current); int err; static s32 rover = -4097; retry: cond_resched(); netlink_table_grab(); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); sk_for_each(osk, node, head) { if (!net_eq(sock_net(osk), net)) continue; - if (nlk_sk(osk)->pid == pid) { - /* Bind collision, search negative pid values. */ - pid = rover--; + if (nlk_sk(osk)->portid == portid) { + /* Bind collision, search negative portid values. */ + portid = rover--; if (rover > -4097) rover = -4097; netlink_table_ungrab(); @@ -585,7 +585,7 @@ retry: } netlink_table_ungrab(); - err = netlink_insert(sk, net, pid); + err = netlink_insert(sk, net, portid); if (err == -EADDRINUSE) goto retry; @@ -668,8 +668,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return err; } - if (nlk->pid) { - if (nladdr->nl_pid != nlk->pid) + if (nlk->portid) { + if (nladdr->nl_pid != nlk->portid) return -EINVAL; } else { err = nladdr->nl_pid ? @@ -715,7 +715,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family == AF_UNSPEC) { sk->sk_state = NETLINK_UNCONNECTED; - nlk->dst_pid = 0; + nlk->dst_portid = 0; nlk->dst_group = 0; return 0; } @@ -726,12 +726,12 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; - if (!nlk->pid) + if (!nlk->portid) err = netlink_autobind(sock); if (err == 0) { sk->sk_state = NETLINK_CONNECTED; - nlk->dst_pid = nladdr->nl_pid; + nlk->dst_portid = nladdr->nl_pid; nlk->dst_group = ffs(nladdr->nl_groups); } @@ -750,10 +750,10 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, *addr_len = sizeof(*nladdr); if (peer) { - nladdr->nl_pid = nlk->dst_pid; + nladdr->nl_pid = nlk->dst_portid; nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { - nladdr->nl_pid = nlk->pid; + nladdr->nl_pid = nlk->portid; nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; } return 0; @@ -772,19 +772,19 @@ static void netlink_overrun(struct sock *sk) atomic_inc(&sk->sk_drops); } -static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) +static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) { struct sock *sock; struct netlink_sock *nlk; - sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid); + sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid); if (!sock) return ERR_PTR(-ECONNREFUSED); /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); if (sock->sk_state == NETLINK_CONNECTED && - nlk->dst_pid != nlk_sk(ssk)->pid) { + nlk->dst_portid != nlk_sk(ssk)->portid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); } @@ -935,7 +935,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, } int netlink_unicast(struct sock *ssk, struct sk_buff *skb, - u32 pid, int nonblock) + u32 portid, int nonblock) { struct sock *sk; int err; @@ -945,7 +945,7 @@ int netlink_unicast(struct sock *ssk, struct sk_buff *skb, timeo = sock_sndtimeo(ssk, nonblock); retry: - sk = netlink_getsockbypid(ssk, pid); + sk = netlink_getsockbyportid(ssk, portid); if (IS_ERR(sk)) { kfree_skb(skb); return PTR_ERR(sk); @@ -1005,7 +1005,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) struct netlink_broadcast_data { struct sock *exclude_sk; struct net *net; - u32 pid; + u32 portid; u32 group; int failure; int delivery_failure; @@ -1026,7 +1026,7 @@ static int do_one_broadcast(struct sock *sk, if (p->exclude_sk == sk) goto out; - if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1078,7 +1078,7 @@ out: return 0; } -int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, +int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, u32 group, gfp_t allocation, int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), void *filter_data) @@ -1092,7 +1092,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, info.exclude_sk = ssk; info.net = net; - info.pid = pid; + info.portid = portid; info.group = group; info.failure = 0; info.delivery_failure = 0; @@ -1130,17 +1130,17 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, } EXPORT_SYMBOL(netlink_broadcast_filtered); -int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, +int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, u32 group, gfp_t allocation) { - return netlink_broadcast_filtered(ssk, skb, pid, group, allocation, + return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, NULL, NULL); } EXPORT_SYMBOL(netlink_broadcast); struct netlink_set_err_data { struct sock *exclude_sk; - u32 pid; + u32 portid; u32 group; int code; }; @@ -1156,7 +1156,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) goto out; - if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1174,14 +1174,14 @@ out: /** * netlink_set_err - report error to broadcast listeners * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() - * @pid: the PID of a process that we want to skip (if any) + * @portid: the PORTID of a process that we want to skip (if any) * @groups: the broadcast group that will notice the error * @code: error code, must be negative (as usual in kernelspace) * * This function returns the number of broadcast listeners that have set the * NETLINK_RECV_NO_ENOBUFS socket option. */ -int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) +int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) { struct netlink_set_err_data info; struct hlist_node *node; @@ -1189,7 +1189,7 @@ int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) int ret = 0; info.exclude_sk = ssk; - info.pid = pid; + info.portid = portid; info.group = group; /* sk->sk_err wants a positive error value */ info.code = -code; @@ -1354,7 +1354,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *addr = msg->msg_name; - u32 dst_pid; + u32 dst_portid; u32 dst_group; struct sk_buff *skb; int err; @@ -1374,18 +1374,18 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, err = -EINVAL; if (addr->nl_family != AF_NETLINK) goto out; - dst_pid = addr->nl_pid; + dst_portid = addr->nl_pid; dst_group = ffs(addr->nl_groups); err = -EPERM; - if ((dst_group || dst_pid) && + if ((dst_group || dst_portid) && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) goto out; } else { - dst_pid = nlk->dst_pid; + dst_portid = nlk->dst_portid; dst_group = nlk->dst_group; } - if (!nlk->pid) { + if (!nlk->portid) { err = netlink_autobind(sock); if (err) goto out; @@ -1399,7 +1399,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; - NETLINK_CB(skb).pid = nlk->pid; + NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).creds = siocb->scm->creds; @@ -1417,9 +1417,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (dst_group) { atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); + netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); } - err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); + err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); out: scm_destroy(siocb->scm); @@ -1482,7 +1482,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; addr->nl_family = AF_NETLINK; addr->nl_pad = 0; - addr->nl_pid = NETLINK_CB(skb).pid; + addr->nl_pid = NETLINK_CB(skb).portid; addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); msg->msg_namelen = sizeof(*addr); } @@ -1683,7 +1683,7 @@ void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) } struct nlmsghdr * -__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) +__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) { struct nlmsghdr *nlh; int size = NLMSG_LENGTH(len); @@ -1692,7 +1692,7 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) nlh->nlmsg_type = type; nlh->nlmsg_len = size; nlh->nlmsg_flags = flags; - nlh->nlmsg_pid = pid; + nlh->nlmsg_pid = portid; nlh->nlmsg_seq = seq; if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); @@ -1788,7 +1788,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, atomic_inc(&skb->users); cb->skb = skb; - sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid); + sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; @@ -1836,7 +1836,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) sk = netlink_lookup(sock_net(in_skb->sk), in_skb->sk->sk_protocol, - NETLINK_CB(in_skb).pid); + NETLINK_CB(in_skb).portid); if (sk) { sk->sk_err = ENOBUFS; sk->sk_error_report(sk); @@ -1845,12 +1845,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) return; } - rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); errmsg = nlmsg_data(rep); errmsg->error = err; memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); - netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); } EXPORT_SYMBOL(netlink_ack); @@ -1900,33 +1900,33 @@ EXPORT_SYMBOL(netlink_rcv_skb); * nlmsg_notify - send a notification netlink message * @sk: netlink socket to use * @skb: notification message - * @pid: destination netlink pid for reports or 0 + * @portid: destination netlink portid for reports or 0 * @group: destination multicast group or 0 * @report: 1 to report back, 0 to disable * @flags: allocation flags */ -int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, +int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, unsigned int group, int report, gfp_t flags) { int err = 0; if (group) { - int exclude_pid = 0; + int exclude_portid = 0; if (report) { atomic_inc(&skb->users); - exclude_pid = pid; + exclude_portid = portid; } /* errors reported via destination sk->sk_err, but propagate * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ - err = nlmsg_multicast(sk, skb, exclude_pid, group, flags); + err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); } if (report) { int err2; - err2 = nlmsg_unicast(sk, skb, pid); + err2 = nlmsg_unicast(sk, skb, portid); if (!err || err == -ESRCH) err = err2; } @@ -1951,7 +1951,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) loff_t off = 0; for (i = 0; i < MAX_LINKS; i++) { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; for (j = 0; j <= hash->mask; j++) { sk_for_each(s, node, &hash->table[j]) { @@ -1999,7 +1999,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) j = iter->hash_idx + 1; do { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); @@ -2038,7 +2038,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n", s, s->sk_protocol, - nlk->pid, + nlk->portid, nlk->groups ? (u32)nlk->groups[0] : 0, sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), @@ -2183,12 +2183,12 @@ static int __init netlink_proto_init(void) order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; for (i = 0; i < MAX_LINKS; i++) { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; - hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table)); + hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table)); if (!hash->table) { while (i-- > 0) - nl_pid_hash_free(nl_table[i].hash.table, + nl_portid_hash_free(nl_table[i].hash.table, 1 * sizeof(*hash->table)); kfree(nl_table); goto panic; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 19288b7d6135..f2aabb6f4105 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -501,7 +501,7 @@ EXPORT_SYMBOL(genl_unregister_family); /** * genlmsg_put - Add generic netlink header to netlink message * @skb: socket buffer holding the message - * @pid: netlink pid the message is addressed to + * @portid: netlink portid the message is addressed to * @seq: sequence number (usually the one of the sender) * @family: generic netlink family * @flags: netlink message flags @@ -509,13 +509,13 @@ EXPORT_SYMBOL(genl_unregister_family); * * Returns pointer to user specific header */ -void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, +void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, struct genl_family *family, int flags, u8 cmd) { struct nlmsghdr *nlh; struct genlmsghdr *hdr; - nlh = nlmsg_put(skb, pid, seq, family->id, GENL_HDRLEN + + nlh = nlmsg_put(skb, portid, seq, family->id, GENL_HDRLEN + family->hdrsize, flags); if (nlh == NULL) return NULL; @@ -585,7 +585,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } info.snd_seq = nlh->nlmsg_seq; - info.snd_pid = NETLINK_CB(skb).pid; + info.snd_portid = NETLINK_CB(skb).portid; info.nlhdr = nlh; info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; @@ -626,12 +626,12 @@ static struct genl_family genl_ctrl = { .netnsok = true, }; -static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, +static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { void *hdr; - hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) return -1; @@ -701,7 +701,7 @@ nla_put_failure: return -EMSGSIZE; } -static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, +static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { @@ -709,7 +709,7 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, struct nlattr *nla_grps; struct nlattr *nest; - hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) return -1; @@ -756,7 +756,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) continue; if (++n < fams_to_skip) continue; - if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid, + if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, CTRL_CMD_NEWFAMILY) < 0) goto errout; @@ -773,7 +773,7 @@ errout: } static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, - u32 pid, int seq, u8 cmd) + u32 portid, int seq, u8 cmd) { struct sk_buff *skb; int err; @@ -782,7 +782,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, if (skb == NULL) return ERR_PTR(-ENOBUFS); - err = ctrl_fill_info(family, pid, seq, 0, skb, cmd); + err = ctrl_fill_info(family, portid, seq, 0, skb, cmd); if (err < 0) { nlmsg_free(skb); return ERR_PTR(err); @@ -792,7 +792,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, } static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, - u32 pid, int seq, u8 cmd) + u32 portid, int seq, u8 cmd) { struct sk_buff *skb; int err; @@ -801,7 +801,7 @@ static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, if (skb == NULL) return ERR_PTR(-ENOBUFS); - err = ctrl_fill_mcgrp_info(grp, pid, seq, 0, skb, cmd); + err = ctrl_fill_mcgrp_info(grp, portid, seq, 0, skb, cmd); if (err < 0) { nlmsg_free(skb); return ERR_PTR(err); @@ -853,7 +853,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) return -ENOENT; } - msg = ctrl_build_family_msg(res, info->snd_pid, info->snd_seq, + msg = ctrl_build_family_msg(res, info->snd_portid, info->snd_seq, CTRL_CMD_NEWFAMILY); if (IS_ERR(msg)) return PTR_ERR(msg); @@ -971,7 +971,7 @@ problem: subsys_initcall(genl_init); -static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, +static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, gfp_t flags) { struct sk_buff *tmp; @@ -986,7 +986,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, goto error; } err = nlmsg_multicast(prev->genl_sock, tmp, - pid, group, flags); + portid, group, flags); if (err) goto error; } @@ -994,20 +994,20 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, prev = net; } - return nlmsg_multicast(prev->genl_sock, skb, pid, group, flags); + return nlmsg_multicast(prev->genl_sock, skb, portid, group, flags); error: kfree_skb(skb); return err; } -int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid, unsigned int group, +int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - return genlmsg_mcast(skb, pid, group, flags); + return genlmsg_mcast(skb, portid, group, flags); } EXPORT_SYMBOL(genlmsg_multicast_allns); -void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, +void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { struct sock *sk = net->genl_sock; @@ -1016,6 +1016,6 @@ void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, if (nlh) report = nlmsg_report(nlh); - nlmsg_notify(sk, skb, pid, group, report, flags); + nlmsg_notify(sk, skb, portid, group, report, flags); } EXPORT_SYMBOL(genl_notify); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 4c51714ee741..4bbb70e32d1e 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -58,7 +58,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, { void *hdr; - hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &nfc_genl_family, flags, NFC_CMD_GET_TARGET); if (!hdr) return -EMSGSIZE; @@ -165,7 +165,7 @@ int nfc_genl_targets_found(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) @@ -347,13 +347,13 @@ free_msg: } static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, - u32 pid, u32 seq, + u32 portid, u32 seq, struct netlink_callback *cb, int flags) { void *hdr; - hdr = genlmsg_put(msg, pid, seq, &nfc_genl_family, flags, + hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags, NFC_CMD_GET_DEVICE); if (!hdr) return -EMSGSIZE; @@ -401,7 +401,7 @@ static int nfc_genl_dump_devices(struct sk_buff *skb, while (dev) { int rc; - rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).pid, + rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb, NLM_F_MULTI); if (rc < 0) break; @@ -520,7 +520,7 @@ static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info) goto out_putdev; } - rc = nfc_genl_send_device(msg, dev, info->snd_pid, info->snd_seq, + rc = nfc_genl_send_device(msg, dev, info->snd_portid, info->snd_seq, NULL, 0); if (rc < 0) goto out_free; @@ -611,7 +611,7 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info) rc = nfc_start_poll(dev, im_protocols, tm_protocols); if (!rc) - dev->genl_data.poll_req_pid = info->snd_pid; + dev->genl_data.poll_req_portid = info->snd_portid; mutex_unlock(&dev->genl_data.genl_data_mutex); @@ -645,13 +645,13 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info) mutex_lock(&dev->genl_data.genl_data_mutex); - if (dev->genl_data.poll_req_pid != info->snd_pid) { + if (dev->genl_data.poll_req_portid != info->snd_portid) { rc = -EBUSY; goto out; } rc = nfc_stop_poll(dev); - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; out: mutex_unlock(&dev->genl_data.genl_data_mutex); @@ -771,15 +771,15 @@ static int nfc_genl_rcv_nl_event(struct notifier_block *this, if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC) goto out; - pr_debug("NETLINK_URELEASE event from id %d\n", n->pid); + pr_debug("NETLINK_URELEASE event from id %d\n", n->portid); nfc_device_iter_init(&iter); dev = nfc_device_iter_next(&iter); while (dev) { - if (dev->genl_data.poll_req_pid == n->pid) { + if (dev->genl_data.poll_req_portid == n->portid) { nfc_stop_poll(dev); - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; } dev = nfc_device_iter_next(&iter); } @@ -792,7 +792,7 @@ out: void nfc_genl_data_init(struct nfc_genl_data *genl_data) { - genl_data->poll_req_pid = 0; + genl_data->poll_req_portid = 0; mutex_init(&genl_data->genl_data_mutex); } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 0da687769f56..c7425f32e7db 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -286,7 +286,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.key = &OVS_CB(skb)->flow->key; upcall.userdata = NULL; - upcall.pid = 0; + upcall.portid = 0; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { @@ -296,7 +296,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, break; case OVS_USERSPACE_ATTR_PID: - upcall.pid = nla_get_u32(a); + upcall.portid = nla_get_u32(a); break; } } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 105a0b5adc51..56327e877ed9 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -225,7 +225,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) upcall.cmd = OVS_PACKET_CMD_MISS; upcall.key = &key; upcall.userdata = NULL; - upcall.pid = p->upcall_pid; + upcall.portid = p->upcall_portid; ovs_dp_upcall(dp, skb, &upcall); consume_skb(skb); stats_counter = &stats->n_missed; @@ -261,7 +261,7 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, int dp_ifindex; int err; - if (upcall_info->pid == 0) { + if (upcall_info->portid == 0) { err = -ENOTCONN; goto err; } @@ -395,7 +395,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, skb_copy_and_csum_dev(skb, nla_data(nla)); - err = genlmsg_unicast(net, user_skb, upcall_info->pid); + err = genlmsg_unicast(net, user_skb, upcall_info->portid); out: kfree_skb(nskb); @@ -780,7 +780,7 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = { /* Called with genl_lock. */ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, - struct sk_buff *skb, u32 pid, + struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { const int skb_orig_len = skb->len; @@ -795,7 +795,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, sf_acts = rcu_dereference_protected(flow->sf_acts, lockdep_genl_is_held()); - ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd); + ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; @@ -879,7 +879,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, - u32 pid, u32 seq, u8 cmd) + u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; int retval; @@ -888,7 +888,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd); + retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); BUG_ON(retval < 0); return skb; } @@ -970,7 +970,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->hash = ovs_flow_hash(&key, key_len); ovs_flow_tbl_insert(table, flow); - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); } else { @@ -1008,7 +1008,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ovs_flow_deferred_free_acts(old_acts); } - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); /* Clear stats. */ @@ -1020,7 +1020,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) } if (!IS_ERR(reply)) - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); else @@ -1061,7 +1061,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) if (!flow) return -ENOENT; - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); if (IS_ERR(reply)) return PTR_ERR(reply); @@ -1103,13 +1103,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_flow_tbl_remove(table, flow); - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid, + err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); BUG_ON(err < 0); ovs_flow_deferred_free(flow); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); return 0; } @@ -1137,7 +1137,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) break; if (ovs_flow_cmd_fill_info(flow, dp, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_FLOW_CMD_NEW) < 0) break; @@ -1191,13 +1191,13 @@ static struct genl_multicast_group ovs_dp_datapath_multicast_group = { }; static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, - u32 pid, u32 seq, u32 flags, u8 cmd) + u32 portid, u32 seq, u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_dp_stats dp_stats; int err; - ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family, + ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, flags, cmd); if (!ovs_header) goto error; @@ -1222,7 +1222,7 @@ error: return -EMSGSIZE; } -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, +static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; @@ -1232,7 +1232,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd); + retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); @@ -1311,7 +1311,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = NULL; parms.dp = dp; parms.port_no = OVSP_LOCAL; - parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); + parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); vport = new_vport(&parms); if (IS_ERR(vport)) { @@ -1322,7 +1322,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_ports_array; } - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1332,7 +1332,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) list_add_tail(&dp->list_node, &ovs_net->dps); rtnl_unlock(); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); return 0; @@ -1394,7 +1394,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) return err; - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1402,7 +1402,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) __dp_destroy(dp); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); @@ -1419,7 +1419,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) return PTR_ERR(dp); - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); @@ -1428,7 +1428,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) return 0; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); @@ -1444,7 +1444,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) return PTR_ERR(dp); - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) return PTR_ERR(reply); @@ -1461,7 +1461,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry(dp, &ovs_net->dps, list_node) { if (i >= skip && - ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid, + ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_DP_CMD_NEW) < 0) break; @@ -1521,13 +1521,13 @@ struct genl_multicast_group ovs_dp_vport_multicast_group = { /* Called with RTNL lock or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, - u32 pid, u32 seq, u32 flags, u8 cmd) + u32 portid, u32 seq, u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; int err; - ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family, + ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; @@ -1537,7 +1537,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) || - nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid)) + nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid)) goto nla_put_failure; ovs_vport_get_stats(vport, &vport_stats); @@ -1559,7 +1559,7 @@ error: } /* Called with RTNL lock or RCU read lock. */ -struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid, +struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; @@ -1569,7 +1569,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd); + retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); @@ -1661,21 +1661,21 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = a[OVS_VPORT_ATTR_OPTIONS]; parms.dp = dp; parms.port_no = port_no; - parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); vport = new_vport(&parms); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); ovs_dp_detach_port(vport); goto exit_unlock; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1707,9 +1707,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) if (err) goto exit_unlock; if (a[OVS_VPORT_ATTR_UPCALL_PID]) - vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { netlink_set_err(sock_net(skb->sk)->genl_sock, 0, @@ -1717,7 +1717,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1743,7 +1743,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1751,7 +1751,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_dp_detach_port(vport); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1773,7 +1773,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1808,7 +1808,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) { if (j >= skip && ovs_vport_cmd_fill_info(vport, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_VPORT_CMD_NEW) < 0) diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 129ec5480758..031dfbf37c93 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -129,7 +129,7 @@ struct dp_upcall_info { u8 cmd; const struct sw_flow_key *key; const struct nlattr *userdata; - u32 pid; + u32 portid; }; static inline struct net *ovs_dp_get_net(struct datapath *dp) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 1abd9609ba78..9055dd698c70 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -125,7 +125,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->dp = parms->dp; vport->port_no = parms->port_no; - vport->upcall_pid = parms->upcall_pid; + vport->upcall_portid = parms->upcall_portid; vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index c56e4836e93b..3f7961ea3c56 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -70,7 +70,7 @@ struct vport_err_stats { * @rcu: RCU callback head for deferred destruction. * @port_no: Index into @dp's @ports array. * @dp: Datapath to which this port belongs. - * @upcall_pid: The Netlink port to use for packets received on this port that + * @upcall_portid: The Netlink port to use for packets received on this port that * miss the flow table. * @hash_node: Element in @dev_table hash table in vport.c. * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. @@ -83,7 +83,7 @@ struct vport { struct rcu_head rcu; u16 port_no; struct datapath *dp; - u32 upcall_pid; + u32 upcall_portid; struct hlist_node hash_node; struct hlist_node dp_hash_node; @@ -113,7 +113,7 @@ struct vport_parms { /* For ovs_vport_alloc(). */ struct datapath *dp; u16 port_no; - u32 upcall_pid; + u32 upcall_portid; }; /** diff --git a/net/packet/diag.c b/net/packet/diag.c index 39bce0d50df9..8db6e21c46bd 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -126,13 +126,13 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, - u32 pid, u32 seq, u32 flags, int sk_ino) + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct packet_diag_msg *rp; struct packet_sock *po = pkt_sk(sk); - nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); if (!nlh) return -EMSGSIZE; @@ -184,7 +184,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (num < s_num) goto next; - if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).pid, + if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, sock_i_ino(sk)) < 0) goto done; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 7dd762a464e5..83a8389619aa 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -33,7 +33,7 @@ /* Device address handling */ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, - u32 pid, u32 seq, int event); + u32 portid, u32 seq, int event); void phonet_address_notify(int event, struct net_device *dev, u8 addr) { @@ -101,12 +101,12 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) } static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, - u32 pid, u32 seq, int event) + u32 portid, u32 seq, int event) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), 0); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), 0); if (nlh == NULL) return -EMSGSIZE; @@ -148,7 +148,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) continue; if (fill_addr(skb, pnd->netdev, addr << 2, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR) < 0) goto out; } @@ -165,12 +165,12 @@ out: /* Routes handling */ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst, - u32 pid, u32 seq, int event) + u32 portid, u32 seq, int event) { struct rtmsg *rtm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), 0); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), 0); if (nlh == NULL) return -EMSGSIZE; @@ -276,7 +276,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (addr_idx++ < addr_start_idx) continue; - if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).pid, + if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE)) goto out; } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index e3d2c78cb52c..102761d294cb 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -644,7 +644,7 @@ errout: } static int -tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, +tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq, u16 flags, int event, int bind, int ref) { struct tcamsg *t; @@ -652,7 +652,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); if (!nlh) goto out_nlmsg_trim; t = nlmsg_data(nlh); @@ -678,7 +678,7 @@ out_nlmsg_trim: } static int -act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, +act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, struct tc_action *a, int event) { struct sk_buff *skb; @@ -686,16 +686,16 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tca_get_fill(skb, a, pid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { + if (tca_get_fill(skb, a, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { kfree_skb(skb); return -EINVAL; } - return rtnl_unicast(skb, net, pid); + return rtnl_unicast(skb, net, portid); } static struct tc_action * -tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid) +tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) { struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action *a; @@ -762,7 +762,7 @@ static struct tc_action *create_a(int i) } static int tca_action_flush(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 pid) + struct nlmsghdr *n, u32 portid) { struct sk_buff *skb; unsigned char *b; @@ -799,7 +799,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, if (a->ops == NULL) goto err_out; - nlh = nlmsg_put(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); + nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); if (!nlh) goto out_module_put; t = nlmsg_data(nlh); @@ -823,7 +823,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); - err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (err > 0) return 0; @@ -841,7 +841,7 @@ noflush_out: static int tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 pid, int event) + u32 portid, int event) { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; @@ -853,13 +853,13 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { if (tb[1] != NULL) - return tca_action_flush(net, tb[1], n, pid); + return tca_action_flush(net, tb[1], n, portid); else return -EINVAL; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_get_1(tb[i], n, pid); + act = tcf_action_get_1(tb[i], n, portid); if (IS_ERR(act)) { ret = PTR_ERR(act); goto err; @@ -874,7 +874,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, } if (event == RTM_GETACTION) - ret = act_get_notify(net, pid, n, head, event); + ret = act_get_notify(net, portid, n, head, event); else { /* delete */ struct sk_buff *skb; @@ -884,7 +884,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } - if (tca_get_fill(skb, head, pid, n->nlmsg_seq, 0, event, + if (tca_get_fill(skb, head, portid, n->nlmsg_seq, 0, event, 0, 1) <= 0) { kfree_skb(skb); ret = -EINVAL; @@ -893,7 +893,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, /* now do the delete */ tcf_action_destroy(head, 0); - ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, + ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (ret > 0) return 0; @@ -905,7 +905,7 @@ err: } static int tcf_add_notify(struct net *net, struct tc_action *a, - u32 pid, u32 seq, int event, u16 flags) + u32 portid, u32 seq, int event, u16 flags) { struct tcamsg *t; struct nlmsghdr *nlh; @@ -920,7 +920,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, b = skb_tail_pointer(skb); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); if (!nlh) goto out_kfree_skb; t = nlmsg_data(nlh); @@ -940,7 +940,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, nlh->nlmsg_len = skb_tail_pointer(skb) - b; NETLINK_CB(skb).dst_group = RTNLGRP_TC; - err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); if (err > 0) err = 0; return err; @@ -953,7 +953,7 @@ out_kfree_skb: static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 pid, int ovr) + u32 portid, int ovr) { int ret = 0; struct tc_action *act; @@ -971,7 +971,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, /* dump then free all the actions after update; inserted policy * stays intact */ - ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); + ret = tcf_add_notify(net, act, portid, seq, RTM_NEWACTION, n->nlmsg_flags); for (a = act; a; a = act) { act = a->next; kfree(a); @@ -984,7 +984,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ACT_MAX + 1]; - u32 pid = skb ? NETLINK_CB(skb).pid : 0; + u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); @@ -1008,17 +1008,17 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (n->nlmsg_flags & NLM_F_REPLACE) ovr = 1; replay: - ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); + ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr); if (ret == -EAGAIN) goto replay; break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - pid, RTM_DELACTION); + portid, RTM_DELACTION); break; case RTM_GETACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - pid, RTM_GETACTION); + portid, RTM_GETACTION); break; default: BUG(); @@ -1085,7 +1085,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) goto out_module_put; } - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) goto out_module_put; @@ -1109,7 +1109,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_cancel(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; - if (NETLINK_CB(cb->skb).pid && ret) + if (NETLINK_CB(cb->skb).portid && ret) nlh->nlmsg_flags |= NLM_F_MULTI; module_put(a_o->owner); return skb->len; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index dc3ef5aef355..7ae02892437c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -343,13 +343,13 @@ errout: } static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, - unsigned long fh, u32 pid, u32 seq, u16 flags, int event) + unsigned long fh, u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -381,18 +381,18 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, unsigned long fh, int event) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tcf_fill_node(skb, tp, fh, pid, n->nlmsg_seq, 0, event) <= 0) { + if (tcf_fill_node(skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) { kfree_skb(skb); return -EINVAL; } - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } @@ -407,7 +407,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, { struct tcf_dump_args *a = (void *)arg; - return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid, + return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); } @@ -465,7 +465,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (t > s_t) memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); if (cb->args[1] == 0) { - if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid, + if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) break; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a08b4ab3e421..a18d975db59c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1185,7 +1185,7 @@ graft: } static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, - u32 pid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1193,7 +1193,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, struct gnet_dump d; struct qdisc_size_table *stab; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -1248,25 +1248,25 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct Qdisc *old, struct Qdisc *new) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (old && !tc_qdisc_dump_ignore(old)) { - if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, + if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) goto err_out; } if (new && !tc_qdisc_dump_ignore(new)) { - if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, + if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) goto err_out; } if (skb->len) - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); err_out: @@ -1289,7 +1289,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, q_idx++; } else { if (!tc_qdisc_dump_ignore(q) && - tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) goto done; q_idx++; @@ -1300,7 +1300,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, continue; } if (!tc_qdisc_dump_ignore(q) && - tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) goto done; q_idx++; @@ -1375,7 +1375,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) const struct Qdisc_class_ops *cops; unsigned long cl = 0; unsigned long new_cl; - u32 pid = tcm->tcm_parent; + u32 portid = tcm->tcm_parent; u32 clid = tcm->tcm_handle; u32 qid = TC_H_MAJ(clid); int err; @@ -1403,8 +1403,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ - if (pid != TC_H_ROOT) { - u32 qid1 = TC_H_MAJ(pid); + if (portid != TC_H_ROOT) { + u32 qid1 = TC_H_MAJ(portid); if (qid && qid1) { /* If both majors are known, they must be identical. */ @@ -1418,10 +1418,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Now qid is genuine qdisc handle consistent * both with parent and child. * - * TC_H_MAJ(pid) still may be unspecified, complete it now. + * TC_H_MAJ(portid) still may be unspecified, complete it now. */ - if (pid) - pid = TC_H_MAKE(qid, pid); + if (portid) + portid = TC_H_MAKE(qid, portid); } else { if (qid == 0) qid = dev->qdisc->handle; @@ -1439,7 +1439,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Now try to get class */ if (clid == 0) { - if (pid == TC_H_ROOT) + if (portid == TC_H_ROOT) clid = qid; } else clid = TC_H_MAKE(qid, clid); @@ -1478,7 +1478,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) new_cl = cl; err = -EOPNOTSUPP; if (cops->change) - err = cops->change(q, clid, pid, tca, &new_cl); + err = cops->change(q, clid, portid, tca, &new_cl); if (err == 0) tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); @@ -1492,7 +1492,7 @@ out: static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, unsigned long cl, - u32 pid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1500,7 +1500,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, struct gnet_dump d; const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -1540,18 +1540,18 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, unsigned long cl, int event) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) { + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { kfree_skb(skb); return -EINVAL; } - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } @@ -1565,7 +1565,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk { struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; - return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid, + return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); } diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 47a839df27dc..6675914dc592 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -62,7 +62,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) rep_nlh = nlmsg_hdr(rep_buf); memcpy(rep_nlh, req_nlh, hdr_space); rep_nlh->nlmsg_len = rep_buf->len; - genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).pid); + genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).portid); } return 0; diff --git a/net/unix/diag.c b/net/unix/diag.c index 750b13408449..06748f108a57 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -110,12 +110,12 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 pid, u32 seq, u32 flags, int sk_ino) + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct unix_diag_msg *rep; - nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), flags); if (!nlh) return -EMSGSIZE; @@ -159,7 +159,7 @@ out_nlmsg_trim: } static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { int sk_ino; @@ -170,7 +170,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (!sk_ino) return 0; - return sk_diag_fill(sk, skb, req, pid, seq, flags, sk_ino); + return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino); } static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -200,7 +200,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!(req->udiag_states & (1 << sk->sk_state))) goto next; if (sk_diag_dump(sk, skb, req, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) goto done; @@ -267,7 +267,7 @@ again: if (!rep) goto out; - err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid, + err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { nlmsg_free(rep); @@ -277,7 +277,7 @@ again: goto again; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; diff --git a/net/wireless/core.h b/net/wireless/core.h index bc7430b54771..a343be4a52bd 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -55,7 +55,7 @@ struct cfg80211_registered_device { int opencount; /* also protected by devlist_mtx */ wait_queue_head_t dev_wait; - u32 ap_beacons_nlpid; + u32 ap_beacons_nlportid; /* protected by RTNL only */ int num_running_ifaces; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 8fd0242ee169..ec7fcee5bad6 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -615,7 +615,7 @@ EXPORT_SYMBOL(cfg80211_del_sta); struct cfg80211_mgmt_registration { struct list_head list; - u32 nlpid; + u32 nlportid; int match_len; @@ -624,7 +624,7 @@ struct cfg80211_mgmt_registration { u8 match[]; }; -int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, +int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, u16 frame_type, const u8 *match_data, int match_len) { @@ -672,7 +672,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, memcpy(nreg->match, match_data, match_len); nreg->match_len = match_len; - nreg->nlpid = snd_pid; + nreg->nlportid = snd_portid; nreg->frame_type = cpu_to_le16(frame_type); list_add(&nreg->list, &wdev->mgmt_registrations); @@ -685,7 +685,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, return err; } -void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) +void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); @@ -694,7 +694,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) spin_lock_bh(&wdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { - if (reg->nlpid != nlpid) + if (reg->nlportid != nlportid) continue; if (rdev->ops->mgmt_frame_register) { @@ -710,8 +710,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) spin_unlock_bh(&wdev->mgmt_registrations_lock); - if (nlpid == wdev->ap_unexpected_nlpid) - wdev->ap_unexpected_nlpid = 0; + if (nlportid == wdev->ap_unexpected_nlportid) + wdev->ap_unexpected_nlportid = 0; } void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) @@ -872,7 +872,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, /* found match! */ /* Indicate the received Action frame to user space */ - if (nl80211_send_mgmt(rdev, wdev, reg->nlpid, + if (nl80211_send_mgmt(rdev, wdev, reg->nlportid, freq, sig_mbm, buf, len, gfp)) continue; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 787aeaa902fe..34eb5c07a567 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -496,11 +496,11 @@ static bool is_valid_ie_attr(const struct nlattr *attr) } /* message building helper */ -static inline void *nl80211hdr_put(struct sk_buff *skb, u32 pid, u32 seq, +static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq, int flags, u8 cmd) { /* since there is no private header just add the generic one */ - return genlmsg_put(skb, pid, seq, &nl80211_fam, flags, cmd); + return genlmsg_put(skb, portid, seq, &nl80211_fam, flags, cmd); } static int nl80211_msg_put_channel(struct sk_buff *msg, @@ -851,7 +851,7 @@ nla_put_failure: return -ENOBUFS; } -static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, +static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *dev) { void *hdr; @@ -866,7 +866,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, const struct ieee80211_txrx_stypes *mgmt_stypes = dev->wiphy.mgmt_stypes; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) return -1; @@ -1267,7 +1267,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) continue; if (++idx <= start) continue; - if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) { idx--; @@ -1290,7 +1290,7 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_wiphy(msg, info->snd_pid, info->snd_seq, 0, dev) < 0) { + if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -1736,14 +1736,14 @@ static inline u64 wdev_id(struct wireless_dev *wdev) ((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32); } -static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, +static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_INTERFACE); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_INTERFACE); if (!hdr) return -1; @@ -1807,7 +1807,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx++; continue; } - if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev) < 0) { mutex_unlock(&rdev->devlist_mtx); @@ -1838,7 +1838,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, dev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -2056,7 +2056,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) break; } - if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, rdev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -2191,7 +2191,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_NEW_KEY); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -2769,7 +2769,7 @@ nla_put_failure: return false; } -static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct net_device *dev, @@ -2778,7 +2778,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, void *hdr; struct nlattr *sinfoattr, *bss_param; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) return -1; @@ -2931,7 +2931,7 @@ static int nl80211_dump_station(struct sk_buff *skb, goto out_err; if (nl80211_send_station(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev, netdev, mac_addr, &sinfo) < 0) @@ -2977,7 +2977,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_station(msg, info->snd_portid, info->snd_seq, 0, rdev, dev, mac_addr, &sinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -3303,7 +3303,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) return rdev->ops->del_station(&rdev->wiphy, dev, mac_addr); } -static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) @@ -3311,7 +3311,7 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, void *hdr; struct nlattr *pinfoattr; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) return -1; @@ -3389,7 +3389,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, if (err) goto out_err; - if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, netdev, dst, next_hop, &pinfo) < 0) @@ -3438,7 +3438,7 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_mpath(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_mpath(msg, info->snd_portid, info->snd_seq, 0, dev, dst, next_hop, &pinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -3679,7 +3679,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_MESH_CONFIG); if (!hdr) goto out; @@ -3998,7 +3998,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) goto out; } - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_REG); if (!hdr) goto put_failure; @@ -4616,7 +4616,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, ASSERT_WDEV_LOCK(wdev); - hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).pid, seq, flags, + hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, NL80211_CMD_NEW_SCAN_RESULTS); if (!hdr) return -1; @@ -4735,14 +4735,14 @@ static int nl80211_dump_scan(struct sk_buff *skb, return skb->len; } -static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, struct survey_info *survey) { void *hdr; struct nlattr *infoattr; - hdr = nl80211hdr_put(msg, pid, seq, flags, + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_SURVEY_RESULTS); if (!hdr) return -ENOMEM; @@ -4836,7 +4836,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, } if (nl80211_send_survey(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, netdev, &survey) < 0) @@ -5451,7 +5451,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, } while (1) { - void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).pid, + void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, NL80211_CMD_TESTMODE); struct nlattr *tmdata; @@ -5491,7 +5491,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, static struct sk_buff * __cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, - int approxlen, u32 pid, u32 seq, gfp_t gfp) + int approxlen, u32 portid, u32 seq, gfp_t gfp) { struct sk_buff *skb; void *hdr; @@ -5501,7 +5501,7 @@ __cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, if (!skb) return NULL; - hdr = nl80211hdr_put(skb, pid, seq, 0, NL80211_CMD_TESTMODE); + hdr = nl80211hdr_put(skb, portid, seq, 0, NL80211_CMD_TESTMODE); if (!hdr) { kfree_skb(skb); return NULL; @@ -5531,7 +5531,7 @@ struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, return NULL; return __cfg80211_testmode_alloc_skb(rdev, approxlen, - rdev->testmode_info->snd_pid, + rdev->testmode_info->snd_portid, rdev->testmode_info->snd_seq, GFP_KERNEL); } @@ -5867,7 +5867,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_REMAIN_ON_CHANNEL); if (IS_ERR(hdr)) { @@ -6086,7 +6086,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; - return cfg80211_mlme_register_mgmt(wdev, info->snd_pid, frame_type, + return cfg80211_mlme_register_mgmt(wdev, info->snd_portid, frame_type, nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); } @@ -6167,7 +6167,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_FRAME); if (IS_ERR(hdr)) { @@ -6284,7 +6284,7 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_POWER_SAVE); if (!hdr) { err = -ENOBUFS; @@ -6486,7 +6486,7 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_WOWLAN); if (!hdr) goto nla_put_failure; @@ -6760,10 +6760,10 @@ static int nl80211_register_unexpected_frame(struct sk_buff *skb, wdev->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; - if (wdev->ap_unexpected_nlpid) + if (wdev->ap_unexpected_nlportid) return -EBUSY; - wdev->ap_unexpected_nlpid = info->snd_pid; + wdev->ap_unexpected_nlportid = info->snd_portid; return 0; } @@ -6793,7 +6793,7 @@ static int nl80211_probe_client(struct sk_buff *skb, if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_PROBE_CLIENT); if (IS_ERR(hdr)) { @@ -6828,10 +6828,10 @@ static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info) if (!(rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS)) return -EOPNOTSUPP; - if (rdev->ap_beacons_nlpid) + if (rdev->ap_beacons_nlportid) return -EBUSY; - rdev->ap_beacons_nlpid = info->snd_pid; + rdev->ap_beacons_nlportid = info->snd_portid; return 0; } @@ -7628,12 +7628,12 @@ static int nl80211_add_scan_req(struct sk_buff *msg, static int nl80211_send_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - u32 pid, u32 seq, int flags, + u32 portid, u32 seq, int flags, u32 cmd) { void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, cmd); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; @@ -7657,11 +7657,11 @@ static int nl80211_send_sched_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, struct net_device *netdev, - u32 pid, u32 seq, int flags, u32 cmd) + u32 portid, u32 seq, int flags, u32 cmd) { void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, cmd); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; @@ -8370,9 +8370,9 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, struct sk_buff *msg; void *hdr; int err; - u32 nlpid = ACCESS_ONCE(wdev->ap_unexpected_nlpid); + u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); - if (!nlpid) + if (!nlportid) return false; msg = nlmsg_new(100, gfp); @@ -8396,7 +8396,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, return true; } - genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return true; nla_put_failure: @@ -8420,7 +8420,7 @@ bool nl80211_unexpected_4addr_frame(struct net_device *dev, } int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, u32 nlpid, + struct wireless_dev *wdev, u32 nlportid, int freq, int sig_dbm, const u8 *buf, size_t len, gfp_t gfp) { @@ -8449,7 +8449,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, genlmsg_end(msg, hdr); - return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); nla_put_failure: genlmsg_cancel(msg, hdr); @@ -8804,9 +8804,9 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; void *hdr; - u32 nlpid = ACCESS_ONCE(rdev->ap_beacons_nlpid); + u32 nlportid = ACCESS_ONCE(rdev->ap_beacons_nlportid); - if (!nlpid) + if (!nlportid) return; msg = nlmsg_new(len + 100, gfp); @@ -8829,7 +8829,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, genlmsg_end(msg, hdr); - genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return; nla_put_failure: @@ -8853,9 +8853,9 @@ static int nl80211_netlink_notify(struct notifier_block * nb, list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) - cfg80211_mlme_unregister_socket(wdev, notify->pid); - if (rdev->ap_beacons_nlpid == notify->pid) - rdev->ap_beacons_nlpid = 0; + cfg80211_mlme_unregister_socket(wdev, notify->portid); + if (rdev->ap_beacons_nlportid == notify->portid) + rdev->ap_beacons_nlportid = 0; } rcu_read_unlock(); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7856c33898fa..30edad44e7fc 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -166,7 +166,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid); +void km_state_expired(struct xfrm_state *x, int hard, u32 portid); static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family) { @@ -1674,13 +1674,13 @@ void km_state_notify(struct xfrm_state *x, const struct km_event *c) EXPORT_SYMBOL(km_policy_notify); EXPORT_SYMBOL(km_state_notify); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid) +void km_state_expired(struct xfrm_state *x, int hard, u32 portid) { struct net *net = xs_net(x); struct km_event c; c.data.hard = hard; - c.pid = pid; + c.portid = portid; c.event = XFRM_MSG_EXPIRE; km_state_notify(x, &c); @@ -1726,13 +1726,13 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) } EXPORT_SYMBOL(km_new_mapping); -void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) +void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid) { struct net *net = xp_net(pol); struct km_event c; c.data.hard = hard; - c.pid = pid; + c.portid = portid; c.event = XFRM_MSG_POLEXPIRE; km_policy_notify(pol, dir, &c); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 354070adb5ef..b313d932d678 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -603,7 +603,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, } c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); @@ -676,7 +676,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); @@ -826,7 +826,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; @@ -904,7 +904,7 @@ static inline size_t xfrm_spdinfo_msgsize(void) } static int build_spdinfo(struct sk_buff *skb, struct net *net, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { struct xfrmk_spdinfo si; struct xfrmu_spdinfo spc; @@ -913,7 +913,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, int err; u32 *f; - nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; @@ -946,17 +946,17 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); - u32 spid = NETLINK_CB(skb).pid; + u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_spdinfo(r_skb, net, spid, seq, *flags) < 0) + if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static inline size_t xfrm_sadinfo_msgsize(void) @@ -967,7 +967,7 @@ static inline size_t xfrm_sadinfo_msgsize(void) } static int build_sadinfo(struct sk_buff *skb, struct net *net, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { struct xfrmk_sadinfo si; struct xfrmu_sadhinfo sh; @@ -975,7 +975,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, int err; u32 *f; - nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; @@ -1003,17 +1003,17 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); - u32 spid = NETLINK_CB(skb).pid; + u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_sadinfo(r_skb, net, spid, seq, *flags) < 0) + if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1033,7 +1033,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: @@ -1114,7 +1114,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -1401,7 +1401,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); xfrm_pol_put(xp); @@ -1486,7 +1486,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; @@ -1621,7 +1621,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, err = PTR_ERR(resp_skb); } else { err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, - NETLINK_CB(skb).pid); + NETLINK_CB(skb).portid); } } else { uid_t loginuid = audit_get_loginuid(current); @@ -1638,7 +1638,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.byid = p->index; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); } @@ -1668,7 +1668,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.net = net; km_state_notify(NULL, &c); @@ -1695,7 +1695,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); if (nlh == NULL) return -EMSGSIZE; @@ -1777,11 +1777,11 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, spin_lock_bh(&x->lock); c.data.aevent = p->flags; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; if (build_aevent(r_skb, x, &c) < 0) BUG(); - err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -1827,7 +1827,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.data.aevent = XFRM_AE_CU; km_state_notify(x, &c); err = 0; @@ -1862,7 +1862,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.net = net; km_policy_notify(NULL, 0, &c); return 0; @@ -2370,7 +2370,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); if (nlh == NULL) return -EMSGSIZE; @@ -2429,7 +2429,7 @@ static int xfrm_notify_sa_flush(const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); if (nlh == NULL) { kfree_skb(skb); return -EMSGSIZE; @@ -2497,7 +2497,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; @@ -2696,7 +2696,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); if (nlh == NULL) return -EMSGSIZE; @@ -2756,7 +2756,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; @@ -2810,7 +2810,7 @@ static int xfrm_notify_policy_flush(const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; -- cgit v1.2.3 From 7b281ee026552f10862b617a2a51acf49c829554 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Sep 2012 15:38:32 -0400 Subject: NFS: fsync() must exit with an error if page writeback failed We need to ensure that if the call to filemap_write_and_wait_range() fails, then we report that error back to the application. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 4 +++- fs/nfs/nfs4file.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 75d6d0a3d32e..6a7fcab7ecb3 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -287,10 +287,12 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct inode *inode = file->f_path.dentry->d_inode; ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret != 0) + goto out; mutex_lock(&inode->i_mutex); ret = nfs_file_fsync_commit(file, start, end, datasync); mutex_unlock(&inode->i_mutex); - +out: return ret; } diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index acb65e7887f8..eb5eb8eef4d3 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -96,13 +96,15 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct inode *inode = file->f_path.dentry->d_inode; ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret != 0) + goto out; mutex_lock(&inode->i_mutex); ret = nfs_file_fsync_commit(file, start, end, datasync); if (!ret && !datasync) /* application has asked for meta-data sync */ ret = pnfs_layoutcommit_inode(inode, true); mutex_unlock(&inode->i_mutex); - +out: return ret; } -- cgit v1.2.3 From 09e05d4805e6c524c1af74e524e5d0528bb3fef3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 Jul 2012 23:16:25 +0200 Subject: jbd: Fix assertion failure in commit code due to lacking transaction credits ext3 users of data=journal mode with blocksize < pagesize were occasionally hitting assertion failure in journal_commit_transaction() checking whether the transaction has at least as many credits reserved as buffers attached. The core of the problem is that when a file gets truncated, buffers that still need checkpointing or that are attached to the committing transaction are left with buffer_mapped set. When this happens to buffers beyond i_size attached to a page stradding i_size, subsequent write extending the file will see these buffers and as they are mapped (but underlying blocks were freed) things go awry from here. The assertion failure just coincidentally (and in this case luckily as we would start corrupting filesystem) triggers due to journal_head not being properly cleaned up as well. Under some rare circumstances this bug could even hit data=ordered mode users. There the assertion won't trigger and we would end up corrupting the filesystem. We fix the problem by unmapping buffers if possible (in lots of cases we just need a buffer attached to a transaction as a place holder but it must not be written out anyway). And in one case, we just have to bite the bullet and wait for transaction commit to finish. Reviewed-by: Josef Bacik Signed-off-by: Jan Kara --- fs/jbd/commit.c | 45 +++++++++++++++++++++++++++--------- fs/jbd/transaction.c | 64 ++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 78 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 52c15c776029..86b39b167c23 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -86,7 +86,12 @@ nope: static void release_data_buffer(struct buffer_head *bh) { if (buffer_freed(bh)) { + WARN_ON_ONCE(buffer_dirty(bh)); clear_buffer_freed(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; release_buffer_page(bh); } else put_bh(bh); @@ -866,17 +871,35 @@ restart_loop: * there's no point in keeping a checkpoint record for * it. */ - /* A buffer which has been freed while still being - * journaled by a previous transaction may end up still - * being dirty here, but we want to avoid writing back - * that buffer in the future after the "add to orphan" - * operation been committed, That's not only a performance - * gain, it also stops aliasing problems if the buffer is - * left behind for writeback and gets reallocated for another - * use in a different page. */ - if (buffer_freed(bh) && !jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); + /* + * A buffer which has been freed while still being journaled by + * a previous transaction. + */ + if (buffer_freed(bh)) { + /* + * If the running transaction is the one containing + * "add to orphan" operation (b_next_transaction != + * NULL), we have to wait for that transaction to + * commit before we can really get rid of the buffer. + * So just clear b_modified to not confuse transaction + * credit accounting and refile the buffer to + * BJ_Forget of the running transaction. If the just + * committed transaction contains "add to orphan" + * operation, we can completely invalidate the buffer + * now. We are rather throughout in that since the + * buffer may be still accessible when blocksize < + * pagesize and it is attached to the last partial + * page. + */ + jh->b_modified = 0; + if (!jh->b_next_transaction) { + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; + } } if (buffer_jbddirty(bh)) { diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index febc10db5ced..78b7f84241d4 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1843,15 +1843,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) * We're outside-transaction here. Either or both of j_running_transaction * and j_committing_transaction may be NULL. */ -static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) +static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, + int partial_page) { transaction_t *transaction; struct journal_head *jh; int may_free = 1; - int ret; BUFFER_TRACE(bh, "entry"); +retry: /* * It is safe to proceed here without the j_list_lock because the * buffers cannot be stolen by try_to_free_buffers as long as we are @@ -1879,10 +1880,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * clear the buffer dirty bit at latest at the moment when the * transaction marking the buffer as freed in the filesystem * structures is committed because from that moment on the - * buffer can be reallocated and used by a different page. + * block can be reallocated and used by a different page. * Since the block hasn't been freed yet but the inode has * already been added to orphan list, it is safe for us to add * the buffer to BJ_Forget list of the newest transaction. + * + * Also we have to clear buffer_mapped flag of a truncated buffer + * because the buffer_head may be attached to the page straddling + * i_size (can happen only when blocksize < pagesize) and thus the + * buffer_head can be reused when the file is extended again. So we end + * up keeping around invalidated buffers attached to transactions' + * BJ_Forget list just to stop checkpointing code from cleaning up + * the transaction this buffer was modified in. */ transaction = jh->b_transaction; if (transaction == NULL) { @@ -1909,13 +1918,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * committed, the buffer won't be needed any * longer. */ JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_running_transaction); - journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* There is no currently-running transaction. So the * orphan record which we wrote for this file must have @@ -1923,13 +1928,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * the committing transaction, if it exists. */ if (journal->j_committing_transaction) { JBUFFER_TRACE(jh, "give to committing trans"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_committing_transaction); - journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* The orphan record's transaction has * committed. We can cleanse this buffer */ @@ -1950,10 +1951,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } /* * The buffer is committing, we simply cannot touch - * it. So we just set j_next_transaction to the - * running transaction (if there is one) and mark - * buffer as freed so that commit code knows it should - * clear dirty bits when it is done with the buffer. + * it. If the page is straddling i_size we have to wait + * for commit and try again. + */ + if (partial_page) { + tid_t tid = journal->j_committing_transaction->t_tid; + + journal_put_journal_head(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + spin_unlock(&journal->j_state_lock); + log_wait_commit(journal, tid); + goto retry; + } + /* + * OK, buffer won't be reachable after truncate. We just set + * j_next_transaction to the running transaction (if there is + * one) and mark buffer as freed so that commit code knows it + * should clear dirty bits when it is done with the buffer. */ set_buffer_freed(bh); if (journal->j_running_transaction && buffer_jbddirty(bh)) @@ -1976,6 +1991,14 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } zap_buffer: + /* + * This is tricky. Although the buffer is truncated, it may be reused + * if blocksize < pagesize and it is attached to the page straddling + * EOF. Since the buffer might have been added to BJ_Forget list of the + * running transaction, journal_get_write_access() won't clear + * b_modified and credit accounting gets confused. So clear b_modified + * here. */ + jh->b_modified = 0; journal_put_journal_head(jh); zap_buffer_no_jh: spin_unlock(&journal->j_list_lock); @@ -2024,7 +2047,8 @@ void journal_invalidatepage(journal_t *journal, if (offset <= curr_off) { /* This block is wholly outside the truncation point */ lock_buffer(bh); - may_free &= journal_unmap_buffer(journal, bh); + may_free &= journal_unmap_buffer(journal, bh, + offset > 0); unlock_buffer(bh); } curr_off = next_off; -- cgit v1.2.3 From 93f9052643409c13b3b5f76833865087351f55b8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 12 Sep 2012 14:32:42 -0400 Subject: ext4: set bg_itable_unused when resizing Set bg_itable_unused for file systems that have uninit_bg enabled. This will speed up the first e2fsck run after the file system is resized. Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 7adc08854588..a5be589c85bc 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1268,6 +1268,9 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, ext4_free_group_clusters_set(sb, gdp, EXT4_B2C(sbi, group_data->free_blocks_count)); ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); + if (ext4_has_group_desc_csum(sb)) + ext4_itable_unused_set(sb, gdp, + EXT4_INODES_PER_GROUP(sb)); gdp->bg_flags = cpu_to_le16(*bg_flags); ext4_group_desc_csum_set(sb, group, gdp); -- cgit v1.2.3 From da1dfb6af849cb05aa82b0c18866a7b2bafb6905 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 26 Jul 2012 11:30:54 +0100 Subject: GFS2: Make write size hinting code common This collects up the write size hinting code which is used by the block reservation subsystem into a single function. At the same time this also corrects the rounding for this calculation. Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index d1d791ef38de..382000ffac1f 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -322,6 +322,29 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -ENOTTY; } +/** + * gfs2_size_hint - Give a hint to the size of a write request + * @file: The struct file + * @offset: The file offset of the write + * @size: The length of the write + * + * When we are about to do a write, this function records the total + * write size in order to provide a suitable hint to the lower layers + * about how many blocks will be required. + * + */ + +static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) +{ + struct inode *inode = filep->f_dentry->d_inode; + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_inode *ip = GFS2_I(inode); + size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift; + int hint = min_t(size_t, INT_MAX, blks); + + atomic_set(&ip->i_res->rs_sizehint, hint); +} + /** * gfs2_allocate_page_backing - Use bmap to allocate blocks * @page: The (locked) page to allocate backing for @@ -382,8 +405,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) return ret; - atomic_set(&ip->i_res->rs_sizehint, - PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift); + gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); @@ -663,7 +685,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (ret) return ret; - atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift); + gfs2_size_hint(file, pos, writesize); + if (file->f_flags & O_APPEND) { struct gfs2_holder gh; @@ -789,7 +812,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (unlikely(error)) goto out_uninit; - atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift); + gfs2_size_hint(file, offset, len); while (len > 0) { if (len < bytes) -- cgit v1.2.3 From 645b2ccc75d3d15928e3dbfc659659c2b8d4d9a6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 26 Jul 2012 14:15:17 +0100 Subject: GFS2: Fix missing allocation data for set/remove xattr These entry points were missed in the original patch to allocate this data structure. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 4ce22e547308..753af3d86bbc 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1722,7 +1722,9 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name, gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret == 0) { - ret = generic_setxattr(dentry, name, data, size, flags); + ret = gfs2_rs_alloc(ip); + if (ret == 0) + ret = generic_setxattr(dentry, name, data, size, flags); gfs2_glock_dq(&gh); } gfs2_holder_uninit(&gh); @@ -1757,7 +1759,9 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret == 0) { - ret = generic_removexattr(dentry, name); + ret = gfs2_rs_alloc(ip); + if (ret == 0) + ret = generic_removexattr(dentry, name); gfs2_glock_dq(&gh); } gfs2_holder_uninit(&gh); -- cgit v1.2.3 From 62e252eeefda62eb8cae9f4286270317ab8d5a42 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 30 Jul 2012 11:06:08 +0100 Subject: GFS2: Take account of blockages when using reserved blocks The claim_reserved_blks() function was not taking account of the possibility of "blockages" while performing allocation. This can be caused by another node allocating something in the same extent which has been reserved locally. This patch tests for this condition and then skips the remainder of the reservation in this case. This is a relatively rare event, so that it should not affect the general performance improvement which the block reservations provide. The claim_reserved_blks() function also appears not to be able to deal with reservations which cross bitmap boundaries, but that can be dealt with in a future patch since we don't generate boundary crossing reservations currently. Signed-off-by: Steven Whitehouse Reported-by: David Teigland Cc: Bob Peterson --- fs/gfs2/rgrp.c | 66 +++++++++++++++++++++++++--------------------------------- 1 file changed, 28 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 4d34887a601d..c9ed814eeb6f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1961,7 +1961,7 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) * @dinode: 1 if this block is a dinode block, otherwise data block * @nblocks: desired extent length * - * Lay claim to previously allocated block reservation blocks. + * Lay claim to previously reserved blocks. * Returns: Starting block number of the blocks claimed. * Sets *nblocks to the actual extent length allocated. */ @@ -1970,19 +1970,17 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, { struct gfs2_blkreserv *rs = ip->i_res; struct gfs2_rgrpd *rgd = rs->rs_rgd; - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_bitmap *bi; u64 start_block = gfs2_rs_startblk(rs); const unsigned int elen = *nblocks; - /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ - gfs2_assert_withdraw(sdp, rgd); - /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ bi = rs->rs_bi; gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { - /* Make sure the bitmap hasn't changed */ + if (gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, + bi->bi_len, rs->rs_biblk) != GFS2_BLKST_FREE) + break; gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); rs->rs_biblk++; @@ -1991,20 +1989,12 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, BUG_ON(!rgd->rd_reserved); rgd->rd_reserved--; dinode = false; - trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); } - if (!rs->rs_free) { - struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd; - + trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); + if (!rs->rs_free || *nblocks != elen) gfs2_rs_deltree(rs); - /* -nblocks because we haven't returned to do the math yet. - I'm doing the math backwards to prevent negative numbers, - but think of it as: - if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */ - if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks) - rg_mblk_search(rgd, ip); - } + return start_block; } @@ -2037,34 +2027,34 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, if (ip->i_res->rs_requested == 0) return -ECANCELED; - /* Check if we have a multi-block reservation, and if so, claim the - next free block from it. */ + /* If we have a reservation, claim blocks from it. */ if (gfs2_rs_active(ip->i_res)) { BUG_ON(!ip->i_res->rs_free); rgd = ip->i_res->rs_rgd; block = claim_reserved_blks(ip, dinode, nblocks); - } else { - rgd = ip->i_rgd; + if (*nblocks) + goto found_blocks; + } - if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; - else - goal = rgd->rd_last_alloc; - - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); - - /* Since all blocks are reserved in advance, this shouldn't - happen */ - if (blk == BFITNOENT) { - printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", - *nblocks); - printk(KERN_WARNING "FULL=%d\n", - test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); - goto rgrp_error; - } + rgd = ip->i_rgd; - block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) + goal = ip->i_goal - rgd->rd_data0; + else + goal = rgd->rd_last_alloc; + + blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); + + /* Since all blocks are reserved in advance, this shouldn't happen */ + if (blk == BFITNOENT) { + printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", *nblocks); + printk(KERN_WARNING "FULL=%d\n", + test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); + goto rgrp_error; } + + block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); +found_blocks: ndata = *nblocks; if (dinode) ndata--; -- cgit v1.2.3 From 1c6bd7173d66b3dfdefcedb38cabc1fb03997509 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 13 Sep 2012 10:19:24 -0400 Subject: ext4: convert file system to meta_bg if needed during resizing If we have run out of reserved gdt blocks, then clear the resize_inode feature and enable the meta_bg feature, so that we can continue resizing the file system seamlessly. Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 133 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index a5be589c85bc..5932ab5ca53f 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1756,6 +1756,99 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, return err; } /* ext4_group_extend */ + +static int num_desc_blocks(struct super_block *sb, ext4_group_t groups) +{ + return (groups + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); +} + +/* + * Release the resize inode and drop the resize_inode feature if there + * are no more reserved gdt blocks, and then convert the file system + * to enable meta_bg + */ +static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) +{ + handle_t *handle; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + struct ext4_inode_info *ei = 0; + ext4_fsblk_t nr; + int i, ret, err = 0; + int credits = 1; + + ext4_msg(sb, KERN_INFO, "Converting file system to meta_bg"); + if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) { + if (es->s_reserved_gdt_blocks) { + ext4_error(sb, "Unexpected non-zero " + "s_reserved_gdt_blocks"); + return -EPERM; + } + if (!inode) { + ext4_error(sb, "Unexpected NULL resize_inode"); + return -EPERM; + } + ei = EXT4_I(inode); + + /* Do a quick sanity check of the resize inode */ + if (inode->i_blocks != 1 << (inode->i_blkbits - 9)) + goto invalid_resize_inode; + for (i = 0; i < EXT4_N_BLOCKS; i++) { + if (i == EXT4_DIND_BLOCK) { + if (ei->i_data[i]) + continue; + else + goto invalid_resize_inode; + } + if (ei->i_data[i]) + goto invalid_resize_inode; + } + credits += 3; /* block bitmap, bg descriptor, resize inode */ + } + + handle = ext4_journal_start_sb(sb, credits); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + err = ext4_journal_get_write_access(handle, sbi->s_sbh); + if (err) + goto errout; + + EXT4_CLEAR_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE); + EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + sbi->s_es->s_first_meta_bg = + cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count)); + + err = ext4_handle_dirty_super(handle, sb); + if (err) { + ext4_std_error(sb, err); + goto errout; + } + + if (inode) { + nr = le32_to_cpu(ei->i_data[EXT4_DIND_BLOCK]); + ext4_free_blocks(handle, inode, NULL, nr, 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); + ei->i_data[EXT4_DIND_BLOCK] = 0; + inode->i_blocks = 0; + + err = ext4_mark_inode_dirty(handle, inode); + if (err) + ext4_std_error(sb, err); + } + +errout: + ret = ext4_journal_stop(handle); + if (!err) + err = ret; + return ret; + +invalid_resize_inode: + ext4_error(sb, "corrupted/inconsistent resize inode"); + return -EINVAL; +} + /* * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count * @@ -1772,13 +1865,14 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_grpblk_t add, offset; unsigned long n_desc_blocks; unsigned long o_desc_blocks; - unsigned long desc_blocks; ext4_group_t o_group; ext4_group_t n_group; ext4_fsblk_t o_blocks_count; + ext4_fsblk_t n_blocks_count_retry = 0; int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; int meta_bg; +retry: o_blocks_count = ext4_blocks_count(es); if (test_opt(sb, DEBUG)) @@ -1798,11 +1892,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); - n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / - EXT4_DESC_PER_BLOCK(sb); - o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / - EXT4_DESC_PER_BLOCK(sb); - desc_blocks = n_desc_blocks - o_desc_blocks; + n_desc_blocks = num_desc_blocks(sb, n_group + 1); + o_desc_blocks = num_desc_blocks(sb, sbi->s_groups_count); meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); @@ -1812,20 +1903,37 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) "simultaneously"); return -EINVAL; } - if (le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks) { - ext4_warning(sb, - "No reserved GDT blocks, can't resize"); - return -EPERM; + if (n_desc_blocks > o_desc_blocks + + le16_to_cpu(es->s_reserved_gdt_blocks)) { + n_blocks_count_retry = n_blocks_count; + n_desc_blocks = o_desc_blocks + + le16_to_cpu(es->s_reserved_gdt_blocks); + n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); + n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb); + n_group--; /* set to last group number */ } - resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); + + if (!resize_inode) + resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); if (IS_ERR(resize_inode)) { ext4_warning(sb, "Error opening resize inode"); return PTR_ERR(resize_inode); } - } else if (!meta_bg) { - ext4_warning(sb, "File system features do not permit " - "online resize"); - return -EPERM; + } + + if ((!resize_inode && !meta_bg) || n_group == o_group) { + err = ext4_convert_meta_bg(sb, resize_inode); + if (err) + goto out; + if (resize_inode) { + iput(resize_inode); + resize_inode = NULL; + } + if (n_blocks_count_retry) { + n_blocks_count = n_blocks_count_retry; + n_blocks_count_retry = 0; + goto retry; + } } /* See if the device is actually as big as what was requested */ @@ -1876,13 +1984,21 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) break; } + if (!err && n_blocks_count_retry) { + n_blocks_count = n_blocks_count_retry; + n_blocks_count_retry = 0; + free_flex_gd(flex_gd); + flex_gd = NULL; + goto retry; + } + out: if (flex_gd) free_flex_gd(flex_gd); if (resize_inode != NULL) iput(resize_inode); if (test_opt(sb, DEBUG)) - ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu " - "upto %llu blocks", o_blocks_count, n_blocks_count); + ext4_msg(sb, KERN_DEBUG, "resized filesystem to %llu", + n_blocks_count); return err; } -- cgit v1.2.3 From 4da4a56e4f83f52d71e2c5fa86fb1ad77be09753 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 13 Sep 2012 10:24:21 -0400 Subject: ext4: log a resize update to the console every 10 seconds For very long online resizes, a periodic update to the console log is helpful for debugging and for progress reporting. Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 5932ab5ca53f..3c9367b9bebd 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1869,6 +1869,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_group_t n_group; ext4_fsblk_t o_blocks_count; ext4_fsblk_t n_blocks_count_retry = 0; + unsigned long last_update_time = 0; int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; int meta_bg; @@ -1977,6 +1978,13 @@ retry: */ while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, flexbg_size)) { + if (jiffies - last_update_time > HZ * 10) { + if (last_update_time) + ext4_msg(sb, KERN_INFO, + "resized to %llu blocks", + ext4_blocks_count(es)); + last_update_time = jiffies; + } if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0) break; err = ext4_flex_group_add(sb, resize_inode, flex_gd); -- cgit v1.2.3 From 5e7bbef19c8385895cb21c41a88bd937902e6316 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 13 Sep 2012 12:11:40 -0400 Subject: ext4: advertise the fact that the kernel supports meta_bg resizing Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b8de488889d6..eb7722ab771c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2585,10 +2585,12 @@ static struct attribute *ext4_attrs[] = { /* Features this copy of ext4 supports */ EXT4_INFO_ATTR(lazy_itable_init); EXT4_INFO_ATTR(batched_discard); +EXT4_INFO_ATTR(meta_bg_resize); static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(lazy_itable_init), ATTR_LIST(batched_discard), + ATTR_LIST(meta_bg_resize), NULL, }; -- cgit v1.2.3 From 4895768b6aab55bbdbebcf2da090cb1a5ccf5463 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Tue, 11 Sep 2012 16:28:11 -0400 Subject: fs: add missing documentation to simple_xattr functions v2: add function documentation instead of adding a separate file under Documentation/ tj: Updated comment a bit and rolled in Randy's suggestions. Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Cc: Randy Dunlap Signed-off-by: Aristeu Rozanski Signed-off-by: Tejun Heo --- fs/xattr.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xattr.c b/fs/xattr.c index e17e773517ef..f053c1135d0f 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -892,8 +892,19 @@ out: } -/* - * xattr SET operation for in-memory/pseudo filesystems +/** + * simple_xattr_set - xattr SET operation for in-memory/pseudo filesystems + * @xattrs: target simple_xattr list + * @name: name of the new extended attribute + * @value: value of the new xattr. If %NULL, will remove the attribute + * @size: size of the new xattr + * @flags: %XATTR_{CREATE|REPLACE} + * + * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails + * with -EEXIST. If %XATTR_REPLACE is set, the xattr should exist; + * otherwise, fails with -ENODATA. + * + * Returns 0 on success, -errno on failure. */ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags) @@ -950,6 +961,9 @@ ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer, return used; } +/* + * Adds an extended attribute to the list + */ void simple_xattr_list_add(struct simple_xattrs *xattrs, struct simple_xattr *new_xattr) { -- cgit v1.2.3 From b9d6cfdeaf67cc34cdfd53ab234358dd2910a0f4 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Wed, 12 Sep 2012 10:31:13 -0400 Subject: xattr: mark variable as uninitialized to make both gcc and smatch happy new_xattr in __simple_xattr_set() is only initialized with a valid pointer if value is not NULL, which only happens if this function is called directly with the intention to remove an existing extended attribute. Even being safe to be this way, smatch warns about possible NULL dereference. Dan Carpenter suggested using uninitialized_var() which will make both gcc and smatch happy. Cc: Fengguang Wu Cc: Dan Carpenter Signed-off-by: Aristeu Rozanski Signed-off-by: Tejun Heo --- fs/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xattr.c b/fs/xattr.c index f053c1135d0f..014f11321fd9 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -845,7 +845,7 @@ static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags) { struct simple_xattr *xattr; - struct simple_xattr *new_xattr = NULL; + struct simple_xattr *uninitialized_var(new_xattr); int err = 0; /* value == NULL means remove */ -- cgit v1.2.3 From 7149f2558d5b5b988726662fe58b1c388337805b Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Wed, 12 Sep 2012 18:02:46 -0700 Subject: eCryptfs: Write out all dirty pages just before releasing the lower file Fixes a regression caused by: 821f749 eCryptfs: Revert to a writethrough cache model That patch reverted some code (specifically, 32001d6f) that was necessary to properly handle open() -> mmap() -> close() -> dirty pages -> munmap(), because the lower file could be closed before the dirty pages are written out. Rather than reapplying 32001d6f, this approach is a better way of ensuring that the lower file is still open in order to handle writing out the dirty pages. It is called from ecryptfs_release(), while we have a lock on the lower file pointer, just before the lower file gets the final fput() and we overwrite the pointer. https://launchpad.net/bugs/1047261 Signed-off-by: Tyler Hicks Reported-by: Artemy Tregubenko Tested-by: Artemy Tregubenko Tested-by: Colin Ian King --- fs/ecryptfs/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 2768138eefee..9b627c15010a 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -162,6 +162,7 @@ void ecryptfs_put_lower_file(struct inode *inode) inode_info = ecryptfs_inode_to_private(inode); if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count, &inode_info->lower_file_mutex)) { + filemap_write_and_wait(inode->i_mapping); fput(inode_info->lower_file); inode_info->lower_file = NULL; mutex_unlock(&inode_info->lower_file_mutex); -- cgit v1.2.3 From 0753f70f07fbbd23a48d61ffea37028bd0bd6c7d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 19 Mar 2012 15:13:51 +0000 Subject: fs: Build sys_stat64() and friends if __ARCH_WANT_COMPAT_STAT64 On AArch64 Linux, we want the sys_stat64() and related functions for compat support but do not need the generic struct stat64, enabled automatically if __ARCH_WANT_STAT64. Signed-off-by: Catalin Marinas Acked-by: Arnd Bergmann --- fs/stat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/stat.c b/fs/stat.c index b6ff11825fc8..6126c5da22e7 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -326,7 +326,7 @@ SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf, /* ---------- LFS-64 ----------- */ -#ifdef __ARCH_WANT_STAT64 +#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) #ifndef INIT_STRUCT_STAT64_PADDING # define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st)) @@ -415,7 +415,7 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename, return error; return cp_new_stat64(&stat, statbuf); } -#endif /* __ARCH_WANT_STAT64 */ +#endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ void __inode_add_bytes(struct inode *inode, loff_t bytes) -- cgit v1.2.3 From 64e6651dcc10e9d2cc6230208a8e6c2cfd19ae18 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Wed, 12 Sep 2012 18:38:00 -0700 Subject: eCryptfs: Call lower ->flush() from ecryptfs_flush() Since eCryptfs only calls fput() on the lower file in ecryptfs_release(), eCryptfs should call the lower filesystem's ->flush() from ecryptfs_flush(). If the lower filesystem implements ->flush(), then eCryptfs should try to flush out any dirty pages prior to calling the lower ->flush(). If the lower filesystem does not implement ->flush(), then eCryptfs has no need to do anything in ecryptfs_flush() since dirty pages are now written out to the lower filesystem in ecryptfs_release(). Signed-off-by: Tyler Hicks --- fs/ecryptfs/file.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 44ce5c6a541d..d45ba4568128 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -275,8 +275,14 @@ out: static int ecryptfs_flush(struct file *file, fl_owner_t td) { - return file->f_mode & FMODE_WRITE - ? filemap_write_and_wait(file->f_mapping) : 0; + struct file *lower_file = ecryptfs_file_to_lower(file); + + if (lower_file->f_op && lower_file->f_op->flush) { + filemap_write_and_wait(file->f_mapping); + return lower_file->f_op->flush(lower_file, td); + } + + return 0; } static int ecryptfs_release(struct inode *inode, struct file *file) -- cgit v1.2.3 From 8335eafc2859e1a26282bef7c3d19f3d68868b8a Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 13 Sep 2012 12:00:56 -0700 Subject: eCryptfs: Copy up attributes of the lower target inode after rename After calling into the lower filesystem to do a rename, the lower target inode's attributes were not copied up to the eCryptfs target inode. This resulted in the eCryptfs target inode staying around, rather than being evicted, because i_nlink was not updated for the eCryptfs inode. This also meant that eCryptfs didn't do the final iput() on the lower target inode so it stayed around, as well. This would result in a failure to free up space occupied by the target file in the rename() operation. Both target inodes would eventually be evicted when the eCryptfs filesystem was unmounted. This patch calls fsstack_copy_attr_all() after the lower filesystem does its ->rename() so that important inode attributes, such as i_nlink, are updated at the eCryptfs layer. ecryptfs_evict_inode() is now called and eCryptfs can drop its final reference on the lower inode. http://launchpad.net/bugs/561129 Signed-off-by: Tyler Hicks Tested-by: Colin Ian King Cc: [2.6.39+] --- fs/ecryptfs/inode.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 534b129ea676..cc7709e7c508 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -619,6 +619,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_old_dir_dentry; struct dentry *lower_new_dir_dentry; struct dentry *trap = NULL; + struct inode *target_inode; lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); @@ -626,6 +627,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, dget(lower_new_dentry); lower_old_dir_dentry = dget_parent(lower_old_dentry); lower_new_dir_dentry = dget_parent(lower_new_dentry); + target_inode = new_dentry->d_inode; trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); /* source should not be ancestor of target */ if (trap == lower_old_dentry) { @@ -641,6 +643,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, lower_new_dir_dentry->d_inode, lower_new_dentry); if (rc) goto out_lock; + if (target_inode) + fsstack_copy_attr_all(target_inode, + ecryptfs_inode_to_lower(target_inode)); fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); if (new_dir != old_dir) fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); -- cgit v1.2.3 From 55815f70147dcfa3ead5738fd56d3574e2e3c1c2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 14 Sep 2012 14:48:21 -0700 Subject: vfs: make O_PATH file descriptors usable for 'fstat()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already use them for openat() and friends, but fstat() also wants to be able to use O_PATH file descriptors. This should make it more directly comparable to the O_SEARCH of Solaris. Note that you could already do the same thing with "fstatat()" and an empty path, but just doing "fstat()" directly is simpler and faster, so there is no reason not to just allow it directly. See also commit 332a2e1244bd, which did the same thing for fchdir, for the same reasons. Reported-by: ольга крыжановская Cc: Al Viro Cc: stable@kernel.org # O_PATH introduced in 3.0+ Signed-off-by: Linus Torvalds --- fs/stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/stat.c b/fs/stat.c index b6ff11825fc8..40780229a032 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -58,7 +58,7 @@ EXPORT_SYMBOL(vfs_getattr); int vfs_fstat(unsigned int fd, struct kstat *stat) { int fput_needed; - struct file *f = fget_light(fd, &fput_needed); + struct file *f = fget_raw_light(fd, &fput_needed); int error = -EBADF; if (f) { -- cgit v1.2.3 From f3a87f1b0c4086a408eda48e4c26e32ff80d3124 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 14 Sep 2012 20:06:30 -0400 Subject: Revert "Btrfs: fix some error codes in btrfs_qgroup_inherit()" This reverts commit 5986802c2fcc754040bb7ed95f30bb16c4a843b7. Both paths are not error paths but regular cases where non-qgroup subvols are involved. Signed-off-by: Chris Mason --- fs/btrfs/qgroup.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 38b42e7bc91d..b65015581744 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1371,10 +1371,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, if (srcid) { srcgroup = find_qgroup_rb(fs_info, srcid); - if (!srcgroup) { - ret = -EINVAL; + if (!srcgroup) goto unlock; - } dstgroup->rfer = srcgroup->rfer - level_size; dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; srcgroup->excl = level_size; @@ -1383,10 +1381,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, qgroup_dirty(fs_info, srcgroup); } - if (!inherit) { - ret = -EINVAL; + if (!inherit) goto unlock; - } i_qgroups = (u64 *)(inherit + 1); for (i = 0; i < inherit->num_qgroups; ++i) { -- cgit v1.2.3 From b40c2e665cd552eae5fbdbb878bc29a34357668e Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Mon, 17 Sep 2012 11:58:19 -0500 Subject: fs/jfs: TRIM support for JFS Filesystem This patch adds support for the two linux interfaces of the discard/TRIM command for SSD devices and sparse/thinly-provisioned LUNs. JFS will support batched discard via FITRIM ioctl and online discard with the discard mount option. Signed-off-by: Tino Reichardt Signed-off-by: Dave Kleikamp --- Documentation/filesystems/jfs.txt | 19 +++++- fs/jfs/Makefile | 2 +- fs/jfs/ioctl.c | 43 ++++++++++++- fs/jfs/jfs_discard.c | 117 +++++++++++++++++++++++++++++++++++ fs/jfs/jfs_discard.h | 26 ++++++++ fs/jfs/jfs_dmap.c | 125 ++++++++++++++++++++++++++++++++++++-- fs/jfs/jfs_dmap.h | 2 + fs/jfs/jfs_filsys.h | 3 + fs/jfs/jfs_incore.h | 1 + fs/jfs/super.c | 71 +++++++++++++++++----- 10 files changed, 385 insertions(+), 24 deletions(-) create mode 100644 fs/jfs/jfs_discard.c create mode 100644 fs/jfs/jfs_discard.h (limited to 'fs') diff --git a/Documentation/filesystems/jfs.txt b/Documentation/filesystems/jfs.txt index 26ebde77e821..2f94f9ca1794 100644 --- a/Documentation/filesystems/jfs.txt +++ b/Documentation/filesystems/jfs.txt @@ -3,6 +3,7 @@ IBM's Journaled File System (JFS) for Linux JFS Homepage: http://jfs.sourceforge.net/ The following mount options are supported: +(*) == default iocharset=name Character set to use for converting from Unicode to ASCII. The default is to do no conversion. Use @@ -21,12 +22,12 @@ nointegrity Do not write to the journal. The primary use of this option from backup media. The integrity of the volume is not guaranteed if the system abnormally abends. -integrity Default. Commit metadata changes to the journal. Use this - option to remount a volume where the nointegrity option was +integrity(*) Commit metadata changes to the journal. Use this option to + remount a volume where the nointegrity option was previously specified in order to restore normal behavior. errors=continue Keep going on a filesystem error. -errors=remount-ro Default. Remount the filesystem read-only on an error. +errors=remount-ro(*) Remount the filesystem read-only on an error. errors=panic Panic and halt the machine if an error occurs. uid=value Override on-disk uid with specified value @@ -35,6 +36,18 @@ umask=value Override on-disk umask with specified octal value. For directories, the execute bit will be set if the corresponding read bit is set. +discard=minlen This enables/disables the use of discard/TRIM commands. +discard The discard/TRIM commands are sent to the underlying +nodiscard(*) block device when blocks are freed. This is useful for SSD + devices and sparse/thinly-provisioned LUNs. The FITRIM ioctl + command is also available together with the nodiscard option. + The value of minlen specifies the minimum blockcount, when + a TRIM command to the block device is considered usefull. + When no value is given to the discard option, it defaults to + 64 blocks, which means 256KiB in JFS. + The minlen value of discard overrides the minlen value given + on an FITRIM ioctl(). + Please send bugs, comments, cards and letters to shaggy@linux.vnet.ibm.com. The JFS mailing list can be subscribed to by using the link labeled diff --git a/fs/jfs/Makefile b/fs/jfs/Makefile index a58fa72d7e59..d20d4737b3ef 100644 --- a/fs/jfs/Makefile +++ b/fs/jfs/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_JFS_FS) += jfs.o jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \ jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \ - jfs_unicode.o jfs_dtree.o jfs_inode.o \ + jfs_unicode.o jfs_dtree.o jfs_inode.o jfs_discard.o \ jfs_extent.o symlink.o jfs_metapage.o \ jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \ resize.o xattr.o ioctl.o diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index f19d1e04a374..bc555ff417e9 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -11,13 +11,17 @@ #include #include #include +#include #include #include +#include "jfs_filsys.h" +#include "jfs_debug.h" #include "jfs_incore.h" #include "jfs_dinode.h" #include "jfs_inode.h" - +#include "jfs_dmap.h" +#include "jfs_discard.h" static struct { long jfs_flag; @@ -123,6 +127,40 @@ setflags_out: mnt_drop_write_file(filp); return err; } + + case FITRIM: + { + struct super_block *sb = inode->i_sb; + struct request_queue *q = bdev_get_queue(sb->s_bdev); + struct fstrim_range range; + s64 ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!blk_queue_discard(q)) { + jfs_warn("FITRIM not supported on device"); + return -EOPNOTSUPP; + } + + if (copy_from_user(&range, (struct fstrim_range __user *)arg, + sizeof(range))) + return -EFAULT; + + range.minlen = max_t(unsigned int, range.minlen, + q->limits.discard_granularity); + + ret = jfs_ioc_trim(inode, &range); + if (ret < 0) + return ret; + + if (copy_to_user((struct fstrim_range __user *)arg, &range, + sizeof(range))) + return -EFAULT; + + return 0; + } + default: return -ENOTTY; } @@ -142,6 +180,9 @@ long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case JFS_IOC_SETFLAGS32: cmd = JFS_IOC_SETFLAGS; break; + case FITRIM: + cmd = FITRIM; + break; } return jfs_ioctl(filp, cmd, arg); } diff --git a/fs/jfs/jfs_discard.c b/fs/jfs/jfs_discard.c new file mode 100644 index 000000000000..9947563e4175 --- /dev/null +++ b/fs/jfs/jfs_discard.c @@ -0,0 +1,117 @@ +/* + * Copyright (C) Tino Reichardt, 2012 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include + +#include "jfs_incore.h" +#include "jfs_superblock.h" +#include "jfs_discard.h" +#include "jfs_dmap.h" +#include "jfs_debug.h" + + +/* + * NAME: jfs_issue_discard() + * + * FUNCTION: TRIM the specified block range on device, if supported + * + * PARAMETERS: + * ip - pointer to in-core inode + * blkno - starting block number to be trimmed (0..N) + * nblocks - number of blocks to be trimmed + * + * RETURN VALUES: + * none + * + * serialization: IREAD_LOCK(ipbmap) held on entry/exit; + */ +void jfs_issue_discard(struct inode *ip, u64 blkno, u64 nblocks) +{ + struct super_block *sb = ip->i_sb; + int r = 0; + + r = sb_issue_discard(sb, blkno, nblocks, GFP_NOFS, 0); + if (unlikely(r != 0)) { + jfs_err("JFS: sb_issue_discard" \ + "(%p, %llu, %llu, GFP_NOFS, 0) = %d => failed!\n", + sb, (unsigned long long)blkno, + (unsigned long long)nblocks, r); + } + + jfs_info("JFS: sb_issue_discard" \ + "(%p, %llu, %llu, GFP_NOFS, 0) = %d\n", + sb, (unsigned long long)blkno, + (unsigned long long)nblocks, r); + + return; +} + +/* + * NAME: jfs_ioc_trim() + * + * FUNCTION: attempt to discard (TRIM) all free blocks from the + * filesystem. + * + * PARAMETERS: + * ip - pointer to in-core inode; + * range - the range, given by user space + * + * RETURN VALUES: + * 0 - success + * -EIO - i/o error + */ +int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range) +{ + struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; + struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; + struct super_block *sb = ipbmap->i_sb; + int agno, agno_end; + s64 start, end, minlen; + u64 trimmed = 0; + + /** + * convert byte values to block size of filesystem: + * start: First Byte to trim + * len: number of Bytes to trim from start + * minlen: minimum extent length in Bytes + */ + start = range->start >> sb->s_blocksize_bits; + if (start < 0) + start = 0; + end = start + (range->len >> sb->s_blocksize_bits) - 1; + if (end >= bmp->db_mapsize) + end = bmp->db_mapsize - 1; + minlen = range->minlen >> sb->s_blocksize_bits; + if (minlen <= 0) + minlen = 1; + + /** + * we trim all ag's within the range + */ + agno = BLKTOAG(start, JFS_SBI(ip->i_sb)); + agno_end = BLKTOAG(end, JFS_SBI(ip->i_sb)); + while (agno <= agno_end) { + trimmed += dbDiscardAG(ip, agno, minlen); + agno++; + } + range->len = trimmed << sb->s_blocksize_bits; + + return 0; +} diff --git a/fs/jfs/jfs_discard.h b/fs/jfs/jfs_discard.h new file mode 100644 index 000000000000..40d1ee6081a0 --- /dev/null +++ b/fs/jfs/jfs_discard.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) Tino Reichardt, 2012 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _H_JFS_DISCARD +#define _H_JFS_DISCARD + +struct fstrim_range; + +extern void jfs_issue_discard(struct inode *ip, u64 blkno, u64 nblocks); +extern int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range); + +#endif /* _H_JFS_DISCARD */ diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 9cbd11a3f804..174feb6a73c1 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -1,5 +1,6 @@ /* * Copyright (C) International Business Machines Corp., 2000-2004 + * Portions Copyright (C) Tino Reichardt, 2012 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,6 +26,7 @@ #include "jfs_lock.h" #include "jfs_metapage.h" #include "jfs_debug.h" +#include "jfs_discard.h" /* * SERIALIZATION of the Block Allocation Map. @@ -104,7 +106,6 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks); static int dbMaxBud(u8 * cp); -s64 dbMapFileSizeToMapSize(struct inode *ipbmap); static int blkstol2(s64 nb); static int cntlz(u32 value); @@ -145,7 +146,6 @@ static const s8 budtab[256] = { 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 }; - /* * NAME: dbMount() * @@ -310,7 +310,6 @@ int dbSync(struct inode *ipbmap) return (0); } - /* * NAME: dbFree() * @@ -337,6 +336,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) s64 lblkno, rem; struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; + struct super_block *sb = ipbmap->i_sb; IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); @@ -351,6 +351,13 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) return -EIO; } + /** + * TRIM the blocks, when mounted with discard option + */ + if (JFS_SBI(sb)->flag & JFS_DISCARD) + if (JFS_SBI(sb)->minblks_trim <= nblocks) + jfs_issue_discard(ipbmap, blkno, nblocks); + /* * free the blocks a dmap at a time. */ @@ -1095,7 +1102,6 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) /* we were not successful */ release_metapage(mp); - return (rc); } @@ -1589,6 +1595,117 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) } +/* + * NAME: dbDiscardAG() + * + * FUNCTION: attempt to discard (TRIM) all free blocks of specific AG + * + * algorithm: + * 1) allocate blocks, as large as possible and save them + * while holding IWRITE_LOCK on ipbmap + * 2) trim all these saved block/length values + * 3) mark the blocks free again + * + * benefit: + * - we work only on one ag at some time, minimizing how long we + * need to lock ipbmap + * - reading / writing the fs is possible most time, even on + * trimming + * + * downside: + * - we write two times to the dmapctl and dmap pages + * - but for me, this seems the best way, better ideas? + * /TR 2012 + * + * PARAMETERS: + * ip - pointer to in-core inode + * agno - ag to trim + * minlen - minimum value of contiguous blocks + * + * RETURN VALUES: + * s64 - actual number of blocks trimmed + */ +s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) +{ + struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; + struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; + s64 nblocks, blkno; + u64 trimmed = 0; + int rc, l2nb; + struct super_block *sb = ipbmap->i_sb; + + struct range2trim { + u64 blkno; + u64 nblocks; + } *totrim, *tt; + + /* max blkno / nblocks pairs to trim */ + int count = 0, range_cnt; + + /* prevent others from writing new stuff here, while trimming */ + IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); + + nblocks = bmp->db_agfree[agno]; + range_cnt = nblocks; + do_div(range_cnt, (int)minlen); + range_cnt = min(range_cnt + 1, 32 * 1024); + totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS); + if (totrim == NULL) { + jfs_error(bmp->db_ipbmap->i_sb, + "dbDiscardAG: no memory for trim array"); + IWRITE_UNLOCK(ipbmap); + return 0; + } + + tt = totrim; + while (nblocks >= minlen) { + l2nb = BLKSTOL2(nblocks); + + /* 0 = okay, -EIO = fatal, -ENOSPC -> try smaller block */ + rc = dbAllocAG(bmp, agno, nblocks, l2nb, &blkno); + if (rc == 0) { + tt->blkno = blkno; + tt->nblocks = nblocks; + tt++; count++; + + /* the whole ag is free, trim now */ + if (bmp->db_agfree[agno] == 0) + break; + + /* give a hint for the next while */ + nblocks = bmp->db_agfree[agno]; + continue; + } else if (rc == -ENOSPC) { + /* search for next smaller log2 block */ + l2nb = BLKSTOL2(nblocks) - 1; + nblocks = 1 << l2nb; + } else { + /* Trim any already allocated blocks */ + jfs_error(bmp->db_ipbmap->i_sb, + "dbDiscardAG: -EIO"); + break; + } + + /* check, if our trim array is full */ + if (unlikely(count >= range_cnt - 1)) + break; + } + IWRITE_UNLOCK(ipbmap); + + tt->nblocks = 0; /* mark the current end */ + for (tt = totrim; tt->nblocks != 0; tt++) { + /* when mounted with online discard, dbFree() will + * call jfs_issue_discard() itself */ + if (!(JFS_SBI(sb)->flag & JFS_DISCARD)) + jfs_issue_discard(ip, tt->blkno, tt->nblocks); + dbFree(ip, tt->blkno, tt->nblocks); + trimmed += tt->nblocks; + } + kfree(totrim); + + return trimmed; +} + /* * NAME: dbFindCtl() * diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 6dcb906c55d8..562b9a7e4311 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h @@ -311,4 +311,6 @@ extern int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks); extern int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks); extern void dbFinalizeBmap(struct inode *ipbmap); extern s64 dbMapFileSizeToMapSize(struct inode *ipbmap); +extern s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen); + #endif /* _H_JFS_DMAP */ diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index b3f5463fbe52..b67d64671bb4 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h @@ -45,6 +45,9 @@ /* mount time flag to disable journaling to disk */ #define JFS_NOINTEGRITY 0x00000040 +/* mount time flag to enable TRIM to ssd disks */ +#define JFS_DISCARD 0x00000080 + /* commit option */ #define JFS_COMMIT 0x00000f00 /* commit option mask */ #define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */ diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 584a4a1a6e81..4fa958ae1986 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -195,6 +195,7 @@ struct jfs_sb_info { uint uid; /* uid to override on-disk uid */ uint gid; /* gid to override on-disk gid */ uint umask; /* umask to override on-disk umask */ + uint minblks_trim; /* minimum blocks, for online trim */ }; /* jfs_sb_info commit_state */ diff --git a/fs/jfs/super.c b/fs/jfs/super.c index c55c7452d285..6f4ac1c070f0 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_filsys.h" @@ -100,7 +101,7 @@ void jfs_error(struct super_block *sb, const char * function, ...) vsnprintf(error_buf, sizeof(error_buf), function, args); va_end(args); - printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf); + pr_err("ERROR: (device %s): %s\n", sb->s_id, error_buf); jfs_handle_error(sb); } @@ -197,7 +198,8 @@ static void jfs_put_super(struct super_block *sb) enum { Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask + Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask, + Opt_discard, Opt_nodiscard, Opt_discard_minblk }; static const match_table_t tokens = { @@ -214,6 +216,9 @@ static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_umask, "umask=%u"}, + {Opt_discard, "discard"}, + {Opt_nodiscard, "nodiscard"}, + {Opt_discard_minblk, "discard=%u"}, {Opt_err, NULL} }; @@ -255,8 +260,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, else { nls_map = load_nls(args[0].from); if (!nls_map) { - printk(KERN_ERR - "JFS: charset not found\n"); + pr_err("JFS: charset not found\n"); goto cleanup; } } @@ -272,8 +276,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, *newLVSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; if (*newLVSize == 0) - printk(KERN_ERR - "JFS: Cannot determine volume size\n"); + pr_err("JFS: Cannot determine volume size\n"); break; } case Opt_errors: @@ -294,8 +297,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, *flag &= ~JFS_ERR_REMOUNT_RO; *flag |= JFS_ERR_PANIC; } else { - printk(KERN_ERR - "JFS: %s is an invalid error handler\n", + pr_err("JFS: %s is an invalid error handler\n", errors); goto cleanup; } @@ -314,8 +316,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, case Opt_usrquota: case Opt_grpquota: case Opt_quota: - printk(KERN_ERR - "JFS: quota operations not supported\n"); + pr_err("JFS: quota operations not supported\n"); break; #endif case Opt_uid: @@ -324,23 +325,61 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, sbi->uid = simple_strtoul(uid, &uid, 0); break; } + case Opt_gid: { char *gid = args[0].from; sbi->gid = simple_strtoul(gid, &gid, 0); break; } + case Opt_umask: { char *umask = args[0].from; sbi->umask = simple_strtoul(umask, &umask, 8); if (sbi->umask & ~0777) { - printk(KERN_ERR - "JFS: Invalid value of umask\n"); + pr_err("JFS: Invalid value of umask\n"); goto cleanup; } break; } + + case Opt_discard: + { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + /* if set to 1, even copying files will cause + * trimming :O + * -> user has more control over the online trimming + */ + sbi->minblks_trim = 64; + if (blk_queue_discard(q)) { + *flag |= JFS_DISCARD; + } else { + pr_err("JFS: discard option " \ + "not supported on device\n"); + } + break; + } + + case Opt_nodiscard: + *flag &= ~JFS_DISCARD; + break; + + case Opt_discard_minblk: + { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + char *minblks_trim = args[0].from; + if (blk_queue_discard(q)) { + *flag |= JFS_DISCARD; + sbi->minblks_trim = simple_strtoull( + minblks_trim, &minblks_trim, 0); + } else { + pr_err("JFS: discard option " \ + "not supported on device\n"); + } + break; + } + default: printk("jfs: Unrecognized mount option \"%s\" " " or missing value\n", p); @@ -374,8 +413,8 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) if (newLVSize) { if (sb->s_flags & MS_RDONLY) { - printk(KERN_ERR - "JFS: resize requires volume to be mounted read-write\n"); + pr_err("JFS: resize requires volume" \ + " to be mounted read-write\n"); return -EROFS; } rc = jfs_extendfs(sb, newLVSize, 0); @@ -457,7 +496,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) #endif if (newLVSize) { - printk(KERN_ERR "resize option for remount only\n"); + pr_err("resize option for remount only\n"); goto out_kfree; } @@ -625,6 +664,8 @@ static int jfs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",umask=%03o", sbi->umask); if (sbi->flag & JFS_NOINTEGRITY) seq_puts(seq, ",nointegrity"); + if (sbi->flag & JFS_DISCARD) + seq_printf(seq, ",discard=%u", sbi->minblks_trim); if (sbi->nls_tab) seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset); if (sbi->flag & JFS_ERR_CONTINUE) -- cgit v1.2.3 From 550d6da288df57f154ca27c4acb1c398ced42ea9 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 6 Sep 2012 15:33:09 +0800 Subject: JFS: use list_move instead of list_del/list_add Using list_move() instead of list_del() + list_add(). spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: Dave Kleikamp --- fs/jfs/jfs_txnmgr.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index bb8b661bcc50..5fcc02eaa64c 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -2977,12 +2977,9 @@ int jfs_sync(void *arg) * put back on the anon_list. */ - /* Take off anon_list */ - list_del(&jfs_ip->anon_inode_list); - - /* Put on anon_list2 */ - list_add(&jfs_ip->anon_inode_list, - &TxAnchor.anon_list2); + /* Move from anon_list to anon_list2 */ + list_move(&jfs_ip->anon_inode_list, + &TxAnchor.anon_list2); TXN_UNLOCK(); iput(ip); -- cgit v1.2.3 From 4026c9fde9c67266932afd209e25bfef4474a1be Mon Sep 17 00:00:00 2001 From: Ben Myers Date: Thu, 13 Sep 2012 16:18:47 -0500 Subject: xfs: stop the sync worker before xfs_unmountfs Cancel work of the xfs_sync_worker before teardown of the log in xfs_unmountfs. This prevents occasional crashes on unmount like so: PID: 21602 TASK: ee9df060 CPU: 0 COMMAND: "kworker/0:3" #0 [c5377d28] crash_kexec at c0292c94 #1 [c5377d80] oops_end at c07090c2 #2 [c5377d98] no_context at c06f614e #3 [c5377dbc] __bad_area_nosemaphore at c06f6281 #4 [c5377df4] bad_area_nosemaphore at c06f629b #5 [c5377e00] do_page_fault at c070b0cb #6 [c5377e7c] error_code (via page_fault) at c070892c EAX: f300c6a8 EBX: f300c6a8 ECX: 000000c0 EDX: 000000c0 EBP: c5377ed0 DS: 007b ESI: 00000000 ES: 007b EDI: 00000001 GS: ffffad20 CS: 0060 EIP: c0481ad0 ERR: ffffffff EFLAGS: 00010246 #7 [c5377eb0] atomic64_read_cx8 at c0481ad0 #8 [c5377ebc] xlog_assign_tail_lsn_locked at f7cc7c6e [xfs] #9 [c5377ed4] xfs_trans_ail_delete_bulk at f7ccd520 [xfs] #10 [c5377f0c] xfs_buf_iodone at f7ccb602 [xfs] #11 [c5377f24] xfs_buf_do_callbacks at f7cca524 [xfs] #12 [c5377f30] xfs_buf_iodone_callbacks at f7cca5da [xfs] #13 [c5377f4c] xfs_buf_iodone_work at f7c718d0 [xfs] #14 [c5377f58] process_one_work at c024ee4c #15 [c5377f98] worker_thread at c024f43d #16 [c5377fbc] kthread at c025326b #17 [c5377fe8] kernel_thread_helper at c070e834 PID: 26653 TASK: e79143b0 CPU: 3 COMMAND: "umount" #0 [cde0fda0] __schedule at c0706595 #1 [cde0fe28] schedule at c0706b89 #2 [cde0fe30] schedule_timeout at c0705600 #3 [cde0fe94] __down_common at c0706098 #4 [cde0fec8] __down at c0706122 #5 [cde0fed0] down at c025936f #6 [cde0fee0] xfs_buf_lock at f7c7131d [xfs] #7 [cde0ff00] xfs_freesb at f7cc2236 [xfs] #8 [cde0ff10] xfs_fs_put_super at f7c80f21 [xfs] #9 [cde0ff1c] generic_shutdown_super at c0333d7a #10 [cde0ff38] kill_block_super at c0333e0f #11 [cde0ff48] deactivate_locked_super at c0334218 #12 [cde0ff58] deactivate_super at c033495d #13 [cde0ff68] mntput_no_expire at c034bc13 #14 [cde0ff7c] sys_umount at c034cc69 #15 [cde0ffa0] sys_oldumount at c034ccd4 #16 [cde0ffb0] system_call at c0707e66 commit 11159a05 added this to xfs_log_unmount and needs to be cleaned up at a later date. Signed-off-by: Ben Myers Reviewed-by: Dave Chinner Reviewed-by: Mark Tinguely --- fs/xfs/xfs_super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index bdaf4cb9f4a2..19e2380fb867 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -919,6 +919,7 @@ xfs_fs_put_super( struct xfs_mount *mp = XFS_M(sb); xfs_filestream_unmount(mp); + cancel_delayed_work_sync(&mp->m_sync_work); xfs_unmountfs(mp); xfs_syncd_stop(mp); xfs_freesb(mp); -- cgit v1.2.3 From 6bf6104573482570f7103d3e5ddf9574db43a363 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Thu, 13 Sep 2012 15:03:37 -0700 Subject: fs/proc: fix potential unregister_sysctl_table hang The unregister_sysctl_table() function hangs if all references to its ctl_table_header structure are not dropped. This can happen sometimes because of a leak in proc_sys_lookup(): proc_sys_lookup() gets a reference to the table via lookup_entry(), but it does not release it when a subsequent call to sysctl_follow_link() fails. This patch fixes this leak by making sure the reference is always dropped on return. See also commit 076c3eed2c31 ("sysctl: Rewrite proc_sys_lookup introducing find_entry and lookup_entry") which reorganized this code in 3.4. Tested in Linux 3.4.4. Signed-off-by: Francesco Ruggeri Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index dfafeb2b05a0..eb7cc91b7258 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -462,9 +462,6 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, err = ERR_PTR(-ENOMEM); inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); - if (h) - sysctl_head_finish(h); - if (!inode) goto out; @@ -473,6 +470,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, d_add(dentry, inode); out: + if (h) + sysctl_head_finish(h); sysctl_head_finish(head); return err; } -- cgit v1.2.3 From e1760bd5ffae8cb98cffb030ee8e631eba28f3d8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Sep 2012 22:39:43 -0700 Subject: userns: Convert the audit loginuid to be a kuid Always store audit loginuids in type kuid_t. Print loginuids by converting them into uids in the appropriate user namespace, and then printing the resulting uid. Modify audit_get_loginuid to return a kuid_t. Modify audit_set_loginuid to take a kuid_t. Modify /proc//loginuid on read to convert the loginuid into the user namespace of the opener of the file. Modify /proc//loginud on write to convert the loginuid rom the user namespace of the opener of the file. Cc: Al Viro Cc: Eric Paris Cc: Paul Moore ? Cc: David Miller Signed-off-by: Eric W. Biederman --- drivers/tty/tty_audit.c | 14 ++++++++------ fs/proc/base.c | 12 ++++++++++-- include/linux/audit.h | 6 +++--- include/linux/init_task.h | 2 +- include/linux/sched.h | 2 +- include/linux/tty.h | 4 ++-- include/net/netlabel.h | 2 +- include/net/xfrm.h | 23 ++++++++++++----------- kernel/audit.c | 20 ++++++++++---------- kernel/audit_watch.c | 2 +- kernel/auditfilter.c | 7 ++++--- kernel/auditsc.c | 20 +++++++++++--------- net/core/dev.c | 2 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/netlabel/netlabel_user.c | 2 +- net/xfrm/xfrm_policy.c | 8 ++++---- net/xfrm/xfrm_state.c | 6 +++--- net/xfrm/xfrm_user.c | 12 ++++++------ 18 files changed, 80 insertions(+), 66 deletions(-) (limited to 'fs') diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c index 7c5866920622..5b59bd7f4227 100644 --- a/drivers/tty/tty_audit.c +++ b/drivers/tty/tty_audit.c @@ -61,7 +61,7 @@ static void tty_audit_buf_put(struct tty_audit_buf *buf) } static void tty_audit_log(const char *description, struct task_struct *tsk, - uid_t loginuid, unsigned sessionid, int major, + kuid_t loginuid, unsigned sessionid, int major, int minor, unsigned char *data, size_t size) { struct audit_buffer *ab; @@ -73,7 +73,9 @@ static void tty_audit_log(const char *description, struct task_struct *tsk, audit_log_format(ab, "%s pid=%u uid=%u auid=%u ses=%u " "major=%d minor=%d comm=", description, - tsk->pid, uid, loginuid, sessionid, + tsk->pid, uid, + from_kuid(&init_user_ns, loginuid), + sessionid, major, minor); get_task_comm(name, tsk); audit_log_untrustedstring(ab, name); @@ -89,7 +91,7 @@ static void tty_audit_log(const char *description, struct task_struct *tsk, * Generate an audit message from the contents of @buf, which is owned by * @tsk with @loginuid. @buf->mutex must be locked. */ -static void tty_audit_buf_push(struct task_struct *tsk, uid_t loginuid, +static void tty_audit_buf_push(struct task_struct *tsk, kuid_t loginuid, unsigned int sessionid, struct tty_audit_buf *buf) { @@ -112,7 +114,7 @@ static void tty_audit_buf_push(struct task_struct *tsk, uid_t loginuid, */ static void tty_audit_buf_push_current(struct tty_audit_buf *buf) { - uid_t auid = audit_get_loginuid(current); + kuid_t auid = audit_get_loginuid(current); unsigned int sessionid = audit_get_sessionid(current); tty_audit_buf_push(current, auid, sessionid, buf); } @@ -179,7 +181,7 @@ void tty_audit_tiocsti(struct tty_struct *tty, char ch) } if (should_audit && audit_enabled) { - uid_t auid; + kuid_t auid; unsigned int sessionid; auid = audit_get_loginuid(current); @@ -199,7 +201,7 @@ void tty_audit_tiocsti(struct tty_struct *tty, char ch) * reference to the tty audit buffer if available. * Flush the buffer or return an appropriate error code. */ -int tty_audit_push_task(struct task_struct *tsk, uid_t loginuid, u32 sessionid) +int tty_audit_push_task(struct task_struct *tsk, kuid_t loginuid, u32 sessionid) { struct tty_audit_buf *buf = ERR_PTR(-EPERM); unsigned long flags; diff --git a/fs/proc/base.c b/fs/proc/base.c index 1b6c84cbdb73..138cff4b05dd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1089,7 +1089,8 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf, if (!task) return -ESRCH; length = scnprintf(tmpbuf, TMPBUFLEN, "%u", - audit_get_loginuid(task)); + from_kuid(file->f_cred->user_ns, + audit_get_loginuid(task))); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -1101,6 +1102,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, char *page, *tmp; ssize_t length; uid_t loginuid; + kuid_t kloginuid; rcu_read_lock(); if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { @@ -1130,7 +1132,13 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, goto out_free_page; } - length = audit_set_loginuid(loginuid); + kloginuid = make_kuid(file->f_cred->user_ns, loginuid); + if (!uid_valid(kloginuid)) { + length = -EINVAL; + goto out_free_page; + } + + length = audit_set_loginuid(kloginuid); if (likely(length == 0)) length = count; diff --git a/include/linux/audit.h b/include/linux/audit.h index ca019bb74da3..12367cbadfe1 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -527,7 +527,7 @@ static inline void audit_ptrace(struct task_struct *t) extern unsigned int audit_serial(void); extern int auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial); -extern int audit_set_loginuid(uid_t loginuid); +extern int audit_set_loginuid(kuid_t loginuid); #define audit_get_loginuid(t) ((t)->loginuid) #define audit_get_sessionid(t) ((t)->sessionid) extern void audit_log_task_context(struct audit_buffer *ab); @@ -639,7 +639,7 @@ extern int audit_signals; #define audit_core_dumps(i) do { ; } while (0) #define audit_seccomp(i,s,c) do { ; } while (0) #define auditsc_get_stamp(c,t,s) (0) -#define audit_get_loginuid(t) (-1) +#define audit_get_loginuid(t) (INVALID_UID) #define audit_get_sessionid(t) (-1) #define audit_log_task_context(b) do { ; } while (0) #define audit_ipc_obj(i) ((void)0) @@ -705,7 +705,7 @@ extern int audit_update_lsm_rules(void); extern int audit_filter_user(void); extern int audit_filter_type(int type); extern int audit_receive_filter(int type, int pid, int seq, - void *data, size_t datasz, uid_t loginuid, + void *data, size_t datasz, kuid_t loginuid, u32 sessionid, u32 sid); extern int audit_enabled; #else diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 89f1cb1056f0..6d087c5f57f7 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -92,7 +92,7 @@ extern struct group_info init_groups; #ifdef CONFIG_AUDITSYSCALL #define INIT_IDS \ - .loginuid = -1, \ + .loginuid = INVALID_UID, \ .sessionid = -1, #else #define INIT_IDS diff --git a/include/linux/sched.h b/include/linux/sched.h index c147e7024f11..f64d092f2bed 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1426,7 +1426,7 @@ struct task_struct { struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL - uid_t loginuid; + kuid_t loginuid; unsigned int sessionid; #endif struct seccomp seccomp; diff --git a/include/linux/tty.h b/include/linux/tty.h index 9f47ab540f65..7298385815e6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -553,7 +553,7 @@ extern void tty_audit_fork(struct signal_struct *sig); extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern void tty_audit_push(struct tty_struct *tty); extern int tty_audit_push_task(struct task_struct *tsk, - uid_t loginuid, u32 sessionid); + kuid_t loginuid, u32 sessionid); #else static inline void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, size_t size) @@ -572,7 +572,7 @@ static inline void tty_audit_push(struct tty_struct *tty) { } static inline int tty_audit_push_task(struct task_struct *tsk, - uid_t loginuid, u32 sessionid) + kuid_t loginuid, u32 sessionid) { return 0; } diff --git a/include/net/netlabel.h b/include/net/netlabel.h index f67440970d7e..2c95d55f7914 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -110,7 +110,7 @@ struct cipso_v4_doi; /* NetLabel audit information */ struct netlbl_audit { u32 secid; - uid_t loginuid; + kuid_t loginuid; u32 sessionid; }; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d9509eb29b80..1f217e2c5d82 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -662,7 +662,7 @@ struct xfrm_spi_skb_cb { /* Audit Information */ struct xfrm_audit { u32 secid; - uid_t loginuid; + kuid_t loginuid; u32 sessionid; }; @@ -681,13 +681,14 @@ static inline struct audit_buffer *xfrm_audit_start(const char *op) return audit_buf; } -static inline void xfrm_audit_helper_usrinfo(uid_t auid, u32 ses, u32 secid, +static inline void xfrm_audit_helper_usrinfo(kuid_t auid, u32 ses, u32 secid, struct audit_buffer *audit_buf) { char *secctx; u32 secctx_len; - audit_log_format(audit_buf, " auid=%u ses=%u", auid, ses); + audit_log_format(audit_buf, " auid=%u ses=%u", + from_kuid(&init_user_ns, auid), ses); if (secid != 0 && security_secid_to_secctx(secid, &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " subj=%s", secctx); @@ -697,13 +698,13 @@ static inline void xfrm_audit_helper_usrinfo(uid_t auid, u32 ses, u32 secid, } extern void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - u32 auid, u32 ses, u32 secid); + kuid_t auid, u32 ses, u32 secid); extern void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - u32 auid, u32 ses, u32 secid); + kuid_t auid, u32 ses, u32 secid); extern void xfrm_audit_state_add(struct xfrm_state *x, int result, - u32 auid, u32 ses, u32 secid); + kuid_t auid, u32 ses, u32 secid); extern void xfrm_audit_state_delete(struct xfrm_state *x, int result, - u32 auid, u32 ses, u32 secid); + kuid_t auid, u32 ses, u32 secid); extern void xfrm_audit_state_replay_overflow(struct xfrm_state *x, struct sk_buff *skb); extern void xfrm_audit_state_replay(struct xfrm_state *x, @@ -716,22 +717,22 @@ extern void xfrm_audit_state_icvfail(struct xfrm_state *x, #else static inline void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - u32 auid, u32 ses, u32 secid) + kuid_t auid, u32 ses, u32 secid) { } static inline void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - u32 auid, u32 ses, u32 secid) + kuid_t auid, u32 ses, u32 secid) { } static inline void xfrm_audit_state_add(struct xfrm_state *x, int result, - u32 auid, u32 ses, u32 secid) + kuid_t auid, u32 ses, u32 secid) { } static inline void xfrm_audit_state_delete(struct xfrm_state *x, int result, - u32 auid, u32 ses, u32 secid) + kuid_t auid, u32 ses, u32 secid) { } diff --git a/kernel/audit.c b/kernel/audit.c index 2e0dd5edf69b..44a4b13c9f00 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -265,7 +265,7 @@ void audit_log_lost(const char *message) } static int audit_log_config_change(char *function_name, int new, int old, - uid_t loginuid, u32 sessionid, u32 sid, + kuid_t loginuid, u32 sessionid, u32 sid, int allow_changes) { struct audit_buffer *ab; @@ -273,7 +273,7 @@ static int audit_log_config_change(char *function_name, int new, int old, ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new, - old, loginuid, sessionid); + old, from_kuid(&init_user_ns, loginuid), sessionid); if (sid) { char *ctx = NULL; u32 len; @@ -293,7 +293,7 @@ static int audit_log_config_change(char *function_name, int new, int old, } static int audit_do_config_change(char *function_name, int *to_change, - int new, uid_t loginuid, u32 sessionid, + int new, kuid_t loginuid, u32 sessionid, u32 sid) { int allow_changes, rc = 0, old = *to_change; @@ -320,21 +320,21 @@ static int audit_do_config_change(char *function_name, int *to_change, return rc; } -static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sessionid, +static int audit_set_rate_limit(int limit, kuid_t loginuid, u32 sessionid, u32 sid) { return audit_do_config_change("audit_rate_limit", &audit_rate_limit, limit, loginuid, sessionid, sid); } -static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sessionid, +static int audit_set_backlog_limit(int limit, kuid_t loginuid, u32 sessionid, u32 sid) { return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit, limit, loginuid, sessionid, sid); } -static int audit_set_enabled(int state, uid_t loginuid, u32 sessionid, u32 sid) +static int audit_set_enabled(int state, kuid_t loginuid, u32 sessionid, u32 sid) { int rc; if (state < AUDIT_OFF || state > AUDIT_LOCKED) @@ -349,7 +349,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sessionid, u32 sid) return rc; } -static int audit_set_failure(int state, uid_t loginuid, u32 sessionid, u32 sid) +static int audit_set_failure(int state, kuid_t loginuid, u32 sessionid, u32 sid) { if (state != AUDIT_FAIL_SILENT && state != AUDIT_FAIL_PRINTK @@ -607,7 +607,7 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) } static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, - uid_t auid, u32 ses, u32 sid) + kuid_t auid, u32 ses, u32 sid) { int rc = 0; char *ctx = NULL; @@ -622,7 +622,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u", task_tgid_vnr(current), from_kuid(&init_user_ns, current_uid()), - auid, ses); + from_kuid(&init_user_ns, auid), ses); if (sid) { rc = security_secid_to_secctx(sid, &ctx, &len); if (rc) @@ -644,7 +644,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) int err; struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; - uid_t loginuid; /* loginuid of sender */ + kuid_t loginuid; /* loginuid of sender */ u32 sessionid; struct audit_sig_info *sig_data; char *ctx = NULL; diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 3823281401b5..1c22ec3d87bc 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -241,7 +241,7 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc struct audit_buffer *ab; ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); audit_log_format(ab, "auid=%u ses=%u op=", - audit_get_loginuid(current), + from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); audit_log_string(ab, op); audit_log_format(ab, " path="); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index b30320cea26f..c4bcdbaf4d4d 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1109,7 +1109,7 @@ static void audit_list_rules(int pid, int seq, struct sk_buff_head *q) } /* Log rule additions and removals */ -static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, +static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid, char *action, struct audit_krule *rule, int res) { @@ -1121,7 +1121,8 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); if (!ab) return; - audit_log_format(ab, "auid=%u ses=%u", loginuid, sessionid); + audit_log_format(ab, "auid=%u ses=%u", + from_kuid(&init_user_ns, loginuid), sessionid); if (sid) { char *ctx = NULL; u32 len; @@ -1152,7 +1153,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, * @sid: SE Linux Security ID of sender */ int audit_receive_filter(int type, int pid, int seq, void *data, - size_t datasz, uid_t loginuid, u32 sessionid, u32 sid) + size_t datasz, kuid_t loginuid, u32 sessionid, u32 sid) { struct task_struct *tsk; struct audit_netlink_list *dest; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 0b5b8a232b55..26fdfc092e35 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -149,7 +149,7 @@ struct audit_aux_data_execve { struct audit_aux_data_pids { struct audit_aux_data d; pid_t target_pid[AUDIT_AUX_PIDS]; - uid_t target_auid[AUDIT_AUX_PIDS]; + kuid_t target_auid[AUDIT_AUX_PIDS]; uid_t target_uid[AUDIT_AUX_PIDS]; unsigned int target_sessionid[AUDIT_AUX_PIDS]; u32 target_sid[AUDIT_AUX_PIDS]; @@ -214,7 +214,7 @@ struct audit_context { int arch; pid_t target_pid; - uid_t target_auid; + kuid_t target_auid; uid_t target_uid; unsigned int target_sessionid; u32 target_sid; @@ -1176,7 +1176,7 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk } static int audit_log_pid_context(struct audit_context *context, pid_t pid, - uid_t auid, uid_t uid, unsigned int sessionid, + kuid_t auid, uid_t uid, unsigned int sessionid, u32 sid, char *comm) { struct audit_buffer *ab; @@ -1188,7 +1188,8 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, if (!ab) return rc; - audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid, auid, + audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid, + from_kuid(&init_user_ns, auid), uid, sessionid); if (security_secid_to_secctx(sid, &ctx, &len)) { audit_log_format(ab, " obj=(none)"); @@ -1630,7 +1631,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->name_count, context->ppid, context->pid, - tsk->loginuid, + from_kuid(&init_user_ns, tsk->loginuid), context->uid, context->gid, context->euid, context->suid, context->fsuid, @@ -2291,14 +2292,14 @@ static atomic_t session_id = ATOMIC_INIT(0); * * Called (set) from fs/proc/base.c::proc_loginuid_write(). */ -int audit_set_loginuid(uid_t loginuid) +int audit_set_loginuid(kuid_t loginuid) { struct task_struct *task = current; struct audit_context *context = task->audit_context; unsigned int sessionid; #ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE - if (task->loginuid != -1) + if (uid_valid(task->loginuid)) return -EPERM; #else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */ if (!capable(CAP_AUDIT_CONTROL)) @@ -2315,7 +2316,8 @@ int audit_set_loginuid(uid_t loginuid) "old auid=%u new auid=%u" " old ses=%u new ses=%u", task->pid, task_uid(task), - task->loginuid, loginuid, + from_kuid(&init_user_ns, task->loginuid), + from_kuid(&init_user_ns, loginuid), task->sessionid, sessionid); audit_log_end(ab); } @@ -2543,7 +2545,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (audit_pid && t->tgid == audit_pid) { if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) { audit_sig_pid = tsk->pid; - if (tsk->loginuid != -1) + if (uid_valid(tsk->loginuid)) audit_sig_uid = tsk->loginuid; else audit_sig_uid = uid; diff --git a/net/core/dev.c b/net/core/dev.c index 026bb4a37665..1c0d0823a5a4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4524,7 +4524,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), - audit_get_loginuid(current), + from_kuid(&init_user_ns, audit_get_loginuid(current)), from_kuid(&init_user_ns, uid), from_kgid(&init_user_ns, gid), audit_get_sessionid(current)); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index e7ff694f1049..729a345c75a4 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1541,7 +1541,7 @@ int __init netlbl_unlabel_defconf(void) * it is called is at bootup before the audit subsystem is reporting * messages so don't worry to much about these values. */ security_task_getsecid(current, &audit_info.secid); - audit_info.loginuid = 0; + audit_info.loginuid = GLOBAL_ROOT_UID; audit_info.sessionid = 0; entry = kzalloc(sizeof(*entry), GFP_KERNEL); diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 9fae63f10298..9650c4ad5f88 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -109,7 +109,7 @@ struct audit_buffer *netlbl_audit_start_common(int type, return NULL; audit_log_format(audit_buf, "netlabel: auid=%u ses=%u", - audit_info->loginuid, + from_kuid(&init_user_ns, audit_info->loginuid), audit_info->sessionid); if (audit_info->secid != 0 && diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c5a5165a5927..2f475151cea1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2630,12 +2630,12 @@ static void xfrm_policy_fini(struct net *net) flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY - audit_info.loginuid = -1; + audit_info.loginuid = INVALID_UID; audit_info.sessionid = -1; audit_info.secid = 0; xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); #endif - audit_info.loginuid = -1; + audit_info.loginuid = INVALID_UID; audit_info.sessionid = -1; audit_info.secid = 0; xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); @@ -2742,7 +2742,7 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, } void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; @@ -2757,7 +2757,7 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5b228f97d4b3..fce6a49bc7c6 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2045,7 +2045,7 @@ void xfrm_state_fini(struct net *net) unsigned int sz; flush_work(&net->xfrm.state_hash_work); - audit_info.loginuid = -1; + audit_info.loginuid = INVALID_UID; audit_info.sessionid = -1; audit_info.secid = 0; xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info); @@ -2112,7 +2112,7 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, } void xfrm_audit_state_add(struct xfrm_state *x, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; @@ -2127,7 +2127,7 @@ void xfrm_audit_state_add(struct xfrm_state *x, int result, EXPORT_SYMBOL_GPL(xfrm_audit_state_add); void xfrm_audit_state_delete(struct xfrm_state *x, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e75d8e47f35c..9ea55db737b4 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -575,7 +575,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_state *x; int err; struct km_event c; - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -654,7 +654,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = nlmsg_data(nlh); - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1369,7 +1369,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int err; int excl; - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1624,7 +1624,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).pid); } } else { - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1918,7 +1918,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = 0; if (up->hard) { - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1961,7 +1961,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_expired(x, ue->hard, current->pid); if (ue->hard) { - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; -- cgit v1.2.3 From bc0b75f77a944b482293972eb8fd5c88c576eb46 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 17 Sep 2012 22:54:36 -0400 Subject: ext4: do not enable delalloc by default for ext2 Signed-off-by: Brian Foster Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index eb7722ab771c..e6784b3276be 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3411,7 +3411,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * enable delayed allocation by default * Use -o nodelalloc to turn it off */ - if (!IS_EXT3_SB(sb) && + if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) && ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) set_opt(sb, DELALLOC); -- cgit v1.2.3 From 90b0a97323f42ead278bbccbdf0e123db2add400 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Mon, 17 Sep 2012 23:39:12 -0400 Subject: ext4: fix possible non-initialized variable in htree_dirblock_to_tree() htree_dirblock_to_tree() declares a non-initialized 'err' variable, which is passed as a reference to another functions expecting them to set this variable with their error codes. It's passed to ext4_bread(), which then passes it to ext4_getblk(). If ext4_map_blocks() returns 0 due to a lookup failure, leaving the ext4_getblk() buffer_head uninitialized, it will make ext4_getblk() return to ext4_bread() without initialize the 'err' variable, and ext4_bread() will return to htree_dirblock_to_tree() with this variable still uninitialized. htree_dirblock_to_tree() will pass this variable with garbage back to ext4_htree_fill_tree(), which expects a number of directory entries added to the rb-tree. which, in case, might return a fake non-zero value due the garbage left in the 'err' variable, leading the kernel to an Oops in ext4_dx_readdir(), once this is expecting a filled rb-tree node, when in turn it will have a NULL-ed one, causing an invalid page request when trying to get a fname struct from this NULL-ed rb-tree node in this line: fname = rb_entry(info->curr_node, struct fname, rb_hash); The patch itself initializes the err variable in htree_dirblock_to_tree() to avoid usage mistakes by the called functions, and also fix ext4_getblk() to return a initialized 'err' variable when ext4_map_blocks() fails a lookup. Signed-off-by: Carlos Maiolino Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 4 +++- fs/ext4/namei.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b4effbda7a96..ca76b5ed6c9e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -732,11 +732,13 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, err = ext4_map_blocks(handle, inode, &map, create ? EXT4_GET_BLOCKS_CREATE : 0); + /* ensure we send some value back into *errp */ + *errp = 0; + if (err < 0) *errp = err; if (err <= 0) return NULL; - *errp = 0; bh = sb_getblk(inode->i_sb, map.m_pblk); if (!bh) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 7450ff01c3c4..37c03b32e194 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -846,7 +846,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, { struct buffer_head *bh; struct ext4_dir_entry_2 *de, *top; - int err, count = 0; + int err = 0, count = 0; dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", (unsigned long)block)); -- cgit v1.2.3 From 2f6f0654ab61961fd0f7701fe3be89ea111f0cda Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 18:52:57 -0800 Subject: userns: Convert vfs posix_acl support to use kuids and kgids - In setxattr if we are setting a posix acl convert uids and gids from the current user namespace into the initial user namespace, before the xattrs are passed to the underlying filesystem. Untranslatable uids and gids are represented as -1 which posix_acl_from_xattr will represent as INVALID_UID or INVALID_GID. posix_acl_valid will fail if an acl from userspace has any INVALID_UID or INVALID_GID values. In net this guarantees that untranslatable posix acls will not be stored by filesystems. - In getxattr if we are reading a posix acl convert uids and gids from the initial user namespace into the current user namespace. Uids and gids that can not be tranlsated into the current user namespace will be represented as -1. - Replace e_id in struct posix_acl_entry with an anymouns union of e_uid and e_gid. For the short term retain the e_id field until all of the users are converted. - Don't set struct posix_acl.e_id in the cases where the acl type does not use e_id. Greatly reducing the use of ACL_UNDEFINED_ID. - Rework the ordering checks in posix_acl_valid so that I use kuid_t and kgid_t types throughout the code, and so that I don't need arithmetic on uid and gid types. Cc: Theodore Tso Cc: Andrew Morton Cc: Andreas Dilger Cc: Jan Kara Cc: Al Viro Signed-off-by: Eric W. Biederman --- fs/posix_acl.c | 30 +++++++------- fs/xattr.c | 7 ++++ fs/xattr_acl.c | 90 ++++++++++++++++++++++++++++++++++++++--- include/linux/posix_acl.h | 8 +++- include/linux/posix_acl_xattr.h | 12 ++++++ 5 files changed, 126 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 5e325a42e33d..8bd2135b7f82 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -78,7 +78,8 @@ posix_acl_valid(const struct posix_acl *acl) { const struct posix_acl_entry *pa, *pe; int state = ACL_USER_OBJ; - unsigned int id = 0; /* keep gcc happy */ + kuid_t prev_uid = INVALID_UID; + kgid_t prev_gid = INVALID_GID; int needs_mask = 0; FOREACH_ACL_ENTRY(pa, acl, pe) { @@ -87,7 +88,6 @@ posix_acl_valid(const struct posix_acl *acl) switch (pa->e_tag) { case ACL_USER_OBJ: if (state == ACL_USER_OBJ) { - id = 0; state = ACL_USER; break; } @@ -96,16 +96,17 @@ posix_acl_valid(const struct posix_acl *acl) case ACL_USER: if (state != ACL_USER) return -EINVAL; - if (pa->e_id == ACL_UNDEFINED_ID || - pa->e_id < id) + if (!uid_valid(pa->e_uid)) return -EINVAL; - id = pa->e_id + 1; + if (uid_valid(prev_uid) && + uid_lte(pa->e_uid, prev_uid)) + return -EINVAL; + prev_uid = pa->e_uid; needs_mask = 1; break; case ACL_GROUP_OBJ: if (state == ACL_USER) { - id = 0; state = ACL_GROUP; break; } @@ -114,10 +115,12 @@ posix_acl_valid(const struct posix_acl *acl) case ACL_GROUP: if (state != ACL_GROUP) return -EINVAL; - if (pa->e_id == ACL_UNDEFINED_ID || - pa->e_id < id) + if (!gid_valid(pa->e_gid)) + return -EINVAL; + if (gid_valid(prev_gid) && + gid_lte(pa->e_gid, prev_gid)) return -EINVAL; - id = pa->e_id + 1; + prev_gid = pa->e_gid; needs_mask = 1; break; @@ -195,15 +198,12 @@ posix_acl_from_mode(umode_t mode, gfp_t flags) return ERR_PTR(-ENOMEM); acl->a_entries[0].e_tag = ACL_USER_OBJ; - acl->a_entries[0].e_id = ACL_UNDEFINED_ID; acl->a_entries[0].e_perm = (mode & S_IRWXU) >> 6; acl->a_entries[1].e_tag = ACL_GROUP_OBJ; - acl->a_entries[1].e_id = ACL_UNDEFINED_ID; acl->a_entries[1].e_perm = (mode & S_IRWXG) >> 3; acl->a_entries[2].e_tag = ACL_OTHER; - acl->a_entries[2].e_id = ACL_UNDEFINED_ID; acl->a_entries[2].e_perm = (mode & S_IRWXO); return acl; } @@ -224,11 +224,11 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want) switch(pa->e_tag) { case ACL_USER_OBJ: /* (May have been checked already) */ - if (inode->i_uid == current_fsuid()) + if (uid_eq(inode->i_uid, current_fsuid())) goto check_perm; break; case ACL_USER: - if (pa->e_id == current_fsuid()) + if (uid_eq(pa->e_uid, current_fsuid())) goto mask; break; case ACL_GROUP_OBJ: @@ -239,7 +239,7 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want) } break; case ACL_GROUP: - if (in_group_p(pa->e_id)) { + if (in_group_p(pa->e_gid)) { found = 1; if ((pa->e_perm & want) == want) goto mask; diff --git a/fs/xattr.c b/fs/xattr.c index 4d45b7189e7e..c111745c2da9 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -347,6 +348,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value, error = -EFAULT; goto out; } + if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || + (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) + posix_acl_fix_xattr_from_user(kvalue, size); } error = vfs_setxattr(d, kname, kvalue, size, flags); @@ -450,6 +454,9 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, error = vfs_getxattr(d, kname, kvalue, size); if (error > 0) { + if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || + (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) + posix_acl_fix_xattr_to_user(kvalue, size); if (size && copy_to_user(value, kvalue, error)) error = -EFAULT; } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c index 69d06b07b169..bf472ca1b348 100644 --- a/fs/xattr_acl.c +++ b/fs/xattr_acl.c @@ -9,7 +9,65 @@ #include #include #include +#include +/* + * Fix up the uids and gids in posix acl extended attributes in place. + */ +static void posix_acl_fix_xattr_userns( + struct user_namespace *to, struct user_namespace *from, + void *value, size_t size) +{ + posix_acl_xattr_header *header = (posix_acl_xattr_header *)value; + posix_acl_xattr_entry *entry = (posix_acl_xattr_entry *)(header+1), *end; + int count; + kuid_t uid; + kgid_t gid; + + if (!value) + return; + if (size < sizeof(posix_acl_xattr_header)) + return; + if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION)) + return; + + count = posix_acl_xattr_count(size); + if (count < 0) + return; + if (count == 0) + return; + + for (end = entry + count; entry != end; entry++) { + switch(le16_to_cpu(entry->e_tag)) { + case ACL_USER: + uid = make_kuid(from, le32_to_cpu(entry->e_id)); + entry->e_id = cpu_to_le32(from_kuid(to, uid)); + break; + case ACL_GROUP: + gid = make_kgid(from, le32_to_cpu(entry->e_id)); + entry->e_id = cpu_to_le32(from_kuid(to, uid)); + break; + default: + break; + } + } +} + +void posix_acl_fix_xattr_from_user(void *value, size_t size) +{ + struct user_namespace *user_ns = current_user_ns(); + if (user_ns == &init_user_ns) + return; + posix_acl_fix_xattr_userns(&init_user_ns, user_ns, value, size); +} + +void posix_acl_fix_xattr_to_user(void *value, size_t size) +{ + struct user_namespace *user_ns = current_user_ns(); + if (user_ns == &init_user_ns) + return; + posix_acl_fix_xattr_userns(user_ns, &init_user_ns, value, size); +} /* * Convert from extended attribute to in-memory representation. @@ -50,12 +108,21 @@ posix_acl_from_xattr(const void *value, size_t size) case ACL_GROUP_OBJ: case ACL_MASK: case ACL_OTHER: - acl_e->e_id = ACL_UNDEFINED_ID; break; case ACL_USER: + acl_e->e_uid = + make_kuid(&init_user_ns, + le32_to_cpu(entry->e_id)); + if (!uid_valid(acl_e->e_uid)) + goto fail; + break; case ACL_GROUP: - acl_e->e_id = le32_to_cpu(entry->e_id); + acl_e->e_gid = + make_kgid(&init_user_ns, + le32_to_cpu(entry->e_id)); + if (!gid_valid(acl_e->e_gid)) + goto fail; break; default: @@ -89,9 +156,22 @@ posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size) ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); for (n=0; n < acl->a_count; n++, ext_entry++) { - ext_entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); - ext_entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); - ext_entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); + const struct posix_acl_entry *acl_e = &acl->a_entries[n]; + ext_entry->e_tag = cpu_to_le16(acl_e->e_tag); + ext_entry->e_perm = cpu_to_le16(acl_e->e_perm); + switch(acl_e->e_tag) { + case ACL_USER: + ext_entry->e_id = + cpu_to_le32(from_kuid(&init_user_ns, acl_e->e_uid)); + break; + case ACL_GROUP: + ext_entry->e_id = + cpu_to_le32(from_kgid(&init_user_ns, acl_e->e_gid)); + break; + default: + ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); + break; + } } return real_size; } diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 11bad91c4433..7931efe71175 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -36,7 +36,13 @@ struct posix_acl_entry { short e_tag; unsigned short e_perm; - unsigned int e_id; + union { + kuid_t e_uid; + kgid_t e_gid; +#ifndef CONFIG_UIDGID_STRICT_TYPE_CHECKS + unsigned int e_id; +#endif + }; }; struct posix_acl { diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index 6e53c34035cd..8bd5fcf06916 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -52,6 +52,18 @@ posix_acl_xattr_count(size_t size) return size / sizeof(posix_acl_xattr_entry); } +#ifdef CONFIG_FS_POSIX_ACL +void posix_acl_fix_xattr_from_user(void *value, size_t size); +void posix_acl_fix_xattr_to_user(void *value, size_t size); +#else +static inline void posix_acl_fix_xattr_from_user(void *value, size_t size) +{ +} +static inline void posix_acl_fix_xattr_to_user(void *value, size_t size) +{ +} +#endif + struct posix_acl *posix_acl_from_xattr(const void *value, size_t size); int posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size); -- cgit v1.2.3 From 5f3a4a28ec140a90e6058d1d09f6b1f235d485e5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Sep 2012 20:17:44 -0700 Subject: userns: Pass a userns parameter into posix_acl_to_xattr and posix_acl_from_xattr - Pass the user namespace the uid and gid values in the xattr are stored in into posix_acl_from_xattr. - Pass the user namespace kuid and kgid values should be converted into when storing uid and gid values in an xattr in posix_acl_to_xattr. - Modify all callers of posix_acl_from_xattr and posix_acl_to_xattr to pass in &init_user_ns. In the short term this change is not strictly needed but it makes the code clearer. In the longer term this change is necessary to be able to mount filesystems outside of the initial user namespace that natively store posix acls in the linux xattr format. Cc: Theodore Tso Cc: Andrew Morton Cc: Andreas Dilger Cc: Jan Kara Cc: Al Viro Signed-off-by: "Eric W. Biederman" --- fs/9p/acl.c | 8 ++++---- fs/btrfs/acl.c | 8 ++++---- fs/ext2/acl.c | 4 ++-- fs/ext3/acl.c | 4 ++-- fs/ext4/acl.c | 4 ++-- fs/generic_acl.c | 4 ++-- fs/gfs2/acl.c | 14 +++++++------- fs/jffs2/acl.c | 4 ++-- fs/jfs/acl.c | 4 ++-- fs/jfs/xattr.c | 4 ++-- fs/nfs/nfs3acl.c | 4 ++-- fs/nfsd/vfs.c | 8 ++++---- fs/ocfs2/acl.c | 4 ++-- fs/reiserfs/xattr_acl.c | 4 ++-- fs/xattr_acl.c | 14 ++++++++------ fs/xfs/xfs_acl.c | 4 ++-- include/linux/posix_acl_xattr.h | 6 ++++-- 17 files changed, 53 insertions(+), 49 deletions(-) (limited to 'fs') diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 9a1d42630751..15b679166201 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -37,7 +37,7 @@ static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name) return ERR_PTR(-ENOMEM); size = v9fs_fid_xattr_get(fid, name, value, size); if (size > 0) { - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) goto err_out; } @@ -131,7 +131,7 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl) buffer = kmalloc(size, GFP_KERNEL); if (!buffer) return -ENOMEM; - retval = posix_acl_to_xattr(acl, buffer, size); + retval = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); if (retval < 0) goto err_free_out; switch (type) { @@ -251,7 +251,7 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name, return PTR_ERR(acl); if (acl == NULL) return -ENODATA; - error = posix_acl_to_xattr(acl, buffer, size); + error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); return error; @@ -304,7 +304,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, return -EPERM; if (value) { /* update the cached acl value */ - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); else if (acl) { diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 761e2cd8fed1..0c16e3dbfd56 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -61,7 +61,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type) size = __btrfs_getxattr(inode, name, value, size); } if (size > 0) { - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); } else if (size == -ENOENT || size == -ENODATA || size == 0) { /* FIXME, who returns -ENOENT? I think nobody */ acl = NULL; @@ -91,7 +91,7 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name, return PTR_ERR(acl); if (acl == NULL) return -ENODATA; - ret = posix_acl_to_xattr(acl, value, size); + ret = posix_acl_to_xattr(&init_user_ns, acl, value, size); posix_acl_release(acl); return ret; @@ -141,7 +141,7 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, goto out; } - ret = posix_acl_to_xattr(acl, value, size); + ret = posix_acl_to_xattr(&init_user_ns, acl, value, size); if (ret < 0) goto out; } @@ -169,7 +169,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, return -EOPNOTSUPP; if (value) { - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 35d6a3cfd9ff..70bb1bccc957 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -350,7 +350,7 @@ ext2_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer, return PTR_ERR(acl); if (acl == NULL) return -ENODATA; - error = posix_acl_to_xattr(acl, buffer, size); + error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); return error; @@ -371,7 +371,7 @@ ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, return -EPERM; if (value) { - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); else if (acl) { diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index c76832c8d192..2cf6a8044c80 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -369,7 +369,7 @@ ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer, return PTR_ERR(acl); if (acl == NULL) return -ENODATA; - error = posix_acl_to_xattr(acl, buffer, size); + error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); return error; @@ -392,7 +392,7 @@ ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, return -EPERM; if (value) { - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); else if (acl) { diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index a5c29bb3b835..42b95fccfb2f 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -374,7 +374,7 @@ ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer, return PTR_ERR(acl); if (acl == NULL) return -ENODATA; - error = posix_acl_to_xattr(acl, buffer, size); + error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); return error; @@ -397,7 +397,7 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, return -EPERM; if (value) { - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); else if (acl) { diff --git a/fs/generic_acl.c b/fs/generic_acl.c index d0dddaceac59..b3f3676796d3 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -56,7 +56,7 @@ generic_acl_get(struct dentry *dentry, const char *name, void *buffer, acl = get_cached_acl(dentry->d_inode, type); if (!acl) return -ENODATA; - error = posix_acl_to_xattr(acl, buffer, size); + error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); return error; @@ -77,7 +77,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, if (!inode_owner_or_capable(inode)) return -EPERM; if (value) { - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); } diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index bd4a5892c93c..f850020ad906 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -63,7 +63,7 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) if (len == 0) return NULL; - acl = posix_acl_from_xattr(data, len); + acl = posix_acl_from_xattr(&init_user_ns, data, len); kfree(data); return acl; } @@ -88,13 +88,13 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl) const char *name = gfs2_acl_name(type); BUG_ON(name == NULL); - len = posix_acl_to_xattr(acl, NULL, 0); + len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); if (len == 0) return 0; data = kmalloc(len, GFP_NOFS); if (data == NULL) return -ENOMEM; - error = posix_acl_to_xattr(acl, data, len); + error = posix_acl_to_xattr(&init_user_ns, acl, data, len); if (error < 0) goto out; error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); @@ -166,12 +166,12 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) if (error) return error; - len = posix_acl_to_xattr(acl, NULL, 0); + len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); data = kmalloc(len, GFP_NOFS); error = -ENOMEM; if (data == NULL) goto out; - posix_acl_to_xattr(acl, data, len); + posix_acl_to_xattr(&init_user_ns, acl, data, len); error = gfs2_xattr_acl_chmod(ip, attr, data); kfree(data); set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl); @@ -212,7 +212,7 @@ static int gfs2_xattr_system_get(struct dentry *dentry, const char *name, if (acl == NULL) return -ENODATA; - error = posix_acl_to_xattr(acl, buffer, size); + error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); return error; @@ -245,7 +245,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name, if (!value) goto set_acl; - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (!acl) { /* * acl_set_file(3) may request that we set default ACLs with diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 922f146e4235..42e4edc17a90 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -363,7 +363,7 @@ static int jffs2_acl_getxattr(struct dentry *dentry, const char *name, return PTR_ERR(acl); if (!acl) return -ENODATA; - rc = posix_acl_to_xattr(acl, buffer, size); + rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); return rc; @@ -381,7 +381,7 @@ static int jffs2_acl_setxattr(struct dentry *dentry, const char *name, return -EPERM; if (value) { - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 45559dc3ea2f..d254d6d35995 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -64,7 +64,7 @@ struct posix_acl *jfs_get_acl(struct inode *inode, int type) else acl = ERR_PTR(size); } else { - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); } kfree(value); if (!IS_ERR(acl)) @@ -100,7 +100,7 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type, value = kmalloc(size, GFP_KERNEL); if (!value) return -ENOMEM; - rc = posix_acl_to_xattr(acl, value, size); + rc = posix_acl_to_xattr(&init_user_ns, acl, value, size); if (rc < 0) goto out; } diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 26683e15b3ac..42d67f9757bf 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -685,7 +685,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, * POSIX_ACL_XATTR_ACCESS is tied to i_mode */ if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) { - acl = posix_acl_from_xattr(value, value_len); + acl = posix_acl_from_xattr(&init_user_ns, value, value_len); if (IS_ERR(acl)) { rc = PTR_ERR(acl); printk(KERN_ERR "posix_acl_from_xattr returned %d\n", @@ -710,7 +710,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, return 0; } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { - acl = posix_acl_from_xattr(value, value_len); + acl = posix_acl_from_xattr(&init_user_ns, value, value_len); if (IS_ERR(acl)) { rc = PTR_ERR(acl); printk(KERN_ERR "posix_acl_from_xattr returned %d\n", diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index e4498dc351a8..4a1aafba6a20 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -70,7 +70,7 @@ ssize_t nfs3_getxattr(struct dentry *dentry, const char *name, if (type == ACL_TYPE_ACCESS && acl->a_count == 0) error = -ENODATA; else - error = posix_acl_to_xattr(acl, buffer, size); + error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); } else error = -ENODATA; @@ -92,7 +92,7 @@ int nfs3_setxattr(struct dentry *dentry, const char *name, else return -EOPNOTSUPP; - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); error = nfs3_proc_setacl(inode, type, acl); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a9269f142cc4..3f67b8e12251 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -480,7 +480,7 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) if (buf == NULL) goto out; - len = posix_acl_to_xattr(pacl, buf, buflen); + len = posix_acl_to_xattr(&init_user_ns, pacl, buf, buflen); if (len < 0) { error = len; goto out; @@ -549,7 +549,7 @@ _get_posix_acl(struct dentry *dentry, char *key) if (buflen <= 0) return ERR_PTR(buflen); - pacl = posix_acl_from_xattr(buf, buflen); + pacl = posix_acl_from_xattr(&init_user_ns, buf, buflen); kfree(buf); return pacl; } @@ -2264,7 +2264,7 @@ nfsd_get_posix_acl(struct svc_fh *fhp, int type) if (size < 0) return ERR_PTR(size); - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); kfree(value); return acl; } @@ -2297,7 +2297,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) value = kmalloc(size, GFP_KERNEL); if (!value) return -ENOMEM; - error = posix_acl_to_xattr(acl, value, size); + error = posix_acl_to_xattr(&init_user_ns, acl, value, size); if (error < 0) goto getout; size = error; diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index a7219075b4de..260b16281fc3 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -452,7 +452,7 @@ static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name, return PTR_ERR(acl); if (acl == NULL) return -ENODATA; - ret = posix_acl_to_xattr(acl, buffer, size); + ret = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); return ret; @@ -475,7 +475,7 @@ static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name, return -EPERM; if (value) { - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); else if (acl) { diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 44474f9b990d..87d6911c659d 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -30,7 +30,7 @@ posix_acl_set(struct dentry *dentry, const char *name, const void *value, return -EPERM; if (value) { - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) { return PTR_ERR(acl); } else if (acl) { @@ -77,7 +77,7 @@ posix_acl_get(struct dentry *dentry, const char *name, void *buffer, return PTR_ERR(acl); if (acl == NULL) return -ENODATA; - error = posix_acl_to_xattr(acl, buffer, size); + error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); return error; diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c index bf472ca1b348..11efd830b5f5 100644 --- a/fs/xattr_acl.c +++ b/fs/xattr_acl.c @@ -73,7 +73,8 @@ void posix_acl_fix_xattr_to_user(void *value, size_t size) * Convert from extended attribute to in-memory representation. */ struct posix_acl * -posix_acl_from_xattr(const void *value, size_t size) +posix_acl_from_xattr(struct user_namespace *user_ns, + const void *value, size_t size) { posix_acl_xattr_header *header = (posix_acl_xattr_header *)value; posix_acl_xattr_entry *entry = (posix_acl_xattr_entry *)(header+1), *end; @@ -112,14 +113,14 @@ posix_acl_from_xattr(const void *value, size_t size) case ACL_USER: acl_e->e_uid = - make_kuid(&init_user_ns, + make_kuid(user_ns, le32_to_cpu(entry->e_id)); if (!uid_valid(acl_e->e_uid)) goto fail; break; case ACL_GROUP: acl_e->e_gid = - make_kgid(&init_user_ns, + make_kgid(user_ns, le32_to_cpu(entry->e_id)); if (!gid_valid(acl_e->e_gid)) goto fail; @@ -141,7 +142,8 @@ EXPORT_SYMBOL (posix_acl_from_xattr); * Convert from in-memory to extended attribute representation. */ int -posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size) +posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, + void *buffer, size_t size) { posix_acl_xattr_header *ext_acl = (posix_acl_xattr_header *)buffer; posix_acl_xattr_entry *ext_entry = ext_acl->a_entries; @@ -162,11 +164,11 @@ posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size) switch(acl_e->e_tag) { case ACL_USER: ext_entry->e_id = - cpu_to_le32(from_kuid(&init_user_ns, acl_e->e_uid)); + cpu_to_le32(from_kuid(user_ns, acl_e->e_uid)); break; case ACL_GROUP: ext_entry->e_id = - cpu_to_le32(from_kgid(&init_user_ns, acl_e->e_gid)); + cpu_to_le32(from_kgid(user_ns, acl_e->e_gid)); break; default: ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index ac702a6eab9b..1d32f1d52763 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -337,7 +337,7 @@ xfs_xattr_acl_get(struct dentry *dentry, const char *name, if (acl == NULL) return -ENODATA; - error = posix_acl_to_xattr(acl, value, size); + error = posix_acl_to_xattr(&init_user_ns, acl, value, size); posix_acl_release(acl); return error; @@ -361,7 +361,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name, if (!value) goto set_acl; - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (!acl) { /* * acl_set_file(3) may request that we set default ACLs with diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index 8bd5fcf06916..ad93ad0f1db0 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -64,7 +64,9 @@ static inline void posix_acl_fix_xattr_to_user(void *value, size_t size) } #endif -struct posix_acl *posix_acl_from_xattr(const void *value, size_t size); -int posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size); +struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns, + const void *value, size_t size); +int posix_acl_to_xattr(struct user_namespace *user_ns, + const struct posix_acl *acl, void *buffer, size_t size); #endif /* _POSIX_ACL_XATTR_H */ -- cgit v1.2.3 From af84df93ffe3603fc6fc40a4338f9e740aad3b4e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Sep 2012 20:44:54 -0700 Subject: userns: Convert extN to support kuids and kgids in posix acls Convert ext2, ext3, and ext4 to fully support the posix acl changes, using e_uid e_gid instead e_id. Enabled building with posix acls enabled, all filesystems supporting user namespaces, now also support posix acls when user namespaces are enabled. Cc: Theodore Tso Cc: Andrew Morton Cc: Andreas Dilger Cc: Jan Kara Cc: Al Viro Signed-off-by: Eric W. Biederman --- fs/ext2/acl.c | 28 ++++++++++++++++++++-------- fs/ext3/acl.c | 28 ++++++++++++++++++++-------- fs/ext4/acl.c | 27 ++++++++++++++++++++------- init/Kconfig | 1 - 4 files changed, 60 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 70bb1bccc957..110b6b371a4e 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -53,16 +53,23 @@ ext2_acl_from_disk(const void *value, size_t size) case ACL_OTHER: value = (char *)value + sizeof(ext2_acl_entry_short); - acl->a_entries[n].e_id = ACL_UNDEFINED_ID; break; case ACL_USER: + value = (char *)value + sizeof(ext2_acl_entry); + if ((char *)value > end) + goto fail; + acl->a_entries[n].e_uid = + make_kuid(&init_user_ns, + le32_to_cpu(entry->e_id)); + break; case ACL_GROUP: value = (char *)value + sizeof(ext2_acl_entry); if ((char *)value > end) goto fail; - acl->a_entries[n].e_id = - le32_to_cpu(entry->e_id); + acl->a_entries[n].e_gid = + make_kgid(&init_user_ns, + le32_to_cpu(entry->e_id)); break; default: @@ -96,14 +103,19 @@ ext2_acl_to_disk(const struct posix_acl *acl, size_t *size) ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION); e = (char *)ext_acl + sizeof(ext2_acl_header); for (n=0; n < acl->a_count; n++) { + const struct posix_acl_entry *acl_e = &acl->a_entries[n]; ext2_acl_entry *entry = (ext2_acl_entry *)e; - entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); - entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); - switch(acl->a_entries[n].e_tag) { + entry->e_tag = cpu_to_le16(acl_e->e_tag); + entry->e_perm = cpu_to_le16(acl_e->e_perm); + switch(acl_e->e_tag) { case ACL_USER: + entry->e_id = cpu_to_le32( + from_kuid(&init_user_ns, acl_e->e_uid)); + e += sizeof(ext2_acl_entry); + break; case ACL_GROUP: - entry->e_id = - cpu_to_le32(acl->a_entries[n].e_id); + entry->e_id = cpu_to_le32( + from_kgid(&init_user_ns, acl_e->e_gid)); e += sizeof(ext2_acl_entry); break; diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 2cf6a8044c80..dbb5ad59a7fc 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -48,16 +48,23 @@ ext3_acl_from_disk(const void *value, size_t size) case ACL_OTHER: value = (char *)value + sizeof(ext3_acl_entry_short); - acl->a_entries[n].e_id = ACL_UNDEFINED_ID; break; case ACL_USER: + value = (char *)value + sizeof(ext3_acl_entry); + if ((char *)value > end) + goto fail; + acl->a_entries[n].e_uid = + make_kuid(&init_user_ns, + le32_to_cpu(entry->e_id)); + break; case ACL_GROUP: value = (char *)value + sizeof(ext3_acl_entry); if ((char *)value > end) goto fail; - acl->a_entries[n].e_id = - le32_to_cpu(entry->e_id); + acl->a_entries[n].e_gid = + make_kgid(&init_user_ns, + le32_to_cpu(entry->e_id)); break; default: @@ -91,14 +98,19 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size) ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION); e = (char *)ext_acl + sizeof(ext3_acl_header); for (n=0; n < acl->a_count; n++) { + const struct posix_acl_entry *acl_e = &acl->a_entries[n]; ext3_acl_entry *entry = (ext3_acl_entry *)e; - entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); - entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); - switch(acl->a_entries[n].e_tag) { + entry->e_tag = cpu_to_le16(acl_e->e_tag); + entry->e_perm = cpu_to_le16(acl_e->e_perm); + switch(acl_e->e_tag) { case ACL_USER: + entry->e_id = cpu_to_le32( + from_kuid(&init_user_ns, acl_e->e_uid)); + e += sizeof(ext3_acl_entry); + break; case ACL_GROUP: - entry->e_id = - cpu_to_le32(acl->a_entries[n].e_id); + entry->e_id = cpu_to_le32( + from_kgid(&init_user_ns, acl_e->e_gid)); e += sizeof(ext3_acl_entry); break; diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 42b95fccfb2f..d3c5b88fd89f 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -55,16 +55,23 @@ ext4_acl_from_disk(const void *value, size_t size) case ACL_OTHER: value = (char *)value + sizeof(ext4_acl_entry_short); - acl->a_entries[n].e_id = ACL_UNDEFINED_ID; break; case ACL_USER: + value = (char *)value + sizeof(ext4_acl_entry); + if ((char *)value > end) + goto fail; + acl->a_entries[n].e_uid = + make_kuid(&init_user_ns, + le32_to_cpu(entry->e_id)); + break; case ACL_GROUP: value = (char *)value + sizeof(ext4_acl_entry); if ((char *)value > end) goto fail; - acl->a_entries[n].e_id = - le32_to_cpu(entry->e_id); + acl->a_entries[n].e_gid = + make_kgid(&init_user_ns, + le32_to_cpu(entry->e_id)); break; default: @@ -98,13 +105,19 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size) ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); e = (char *)ext_acl + sizeof(ext4_acl_header); for (n = 0; n < acl->a_count; n++) { + const struct posix_acl_entry *acl_e = &acl->a_entries[n]; ext4_acl_entry *entry = (ext4_acl_entry *)e; - entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); - entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); - switch (acl->a_entries[n].e_tag) { + entry->e_tag = cpu_to_le16(acl_e->e_tag); + entry->e_perm = cpu_to_le16(acl_e->e_perm); + switch (acl_e->e_tag) { case ACL_USER: + entry->e_id = cpu_to_le32( + from_kuid(&init_user_ns, acl_e->e_uid)); + e += sizeof(ext4_acl_entry); + break; case ACL_GROUP: - entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); + entry->e_id = cpu_to_le32( + from_kgid(&init_user_ns, acl_e->e_gid)); e += sizeof(ext4_acl_entry); break; diff --git a/init/Kconfig b/init/Kconfig index 2a388e569a28..ed6310b6042b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -927,7 +927,6 @@ config UIDGID_CONVERTED # Features depends on IMA = n depends on EVM = n - depends on FS_POSIX_ACL = n depends on QUOTA = n depends on QUOTACTL = n -- cgit v1.2.3 From 69552c0c50f3f950f304fb07a4320e46f7f60c21 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:28:09 -0800 Subject: userns: Convert configfs to use kuid and kgid where appropriate Cc: Joel Becker Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/configfs/inode.c | 4 ++-- init/Kconfig | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 0074362d9f7f..a9d35b0e06cf 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -79,8 +79,8 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr) return -ENOMEM; /* assign default attributes */ sd_iattr->ia_mode = sd->s_mode; - sd_iattr->ia_uid = 0; - sd_iattr->ia_gid = 0; + sd_iattr->ia_uid = GLOBAL_ROOT_UID; + sd_iattr->ia_gid = GLOBAL_ROOT_GID; sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; sd->s_iattr = sd_iattr; } diff --git a/init/Kconfig b/init/Kconfig index ed6310b6042b..33d231cd3cc6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -950,7 +950,6 @@ config UIDGID_CONVERTED depends on CEPH_FS = n depends on CIFS = n depends on CODA_FS = n - depends on CONFIGFS_FS = n depends on CRAMFS = n depends on ECRYPT_FS = n depends on EFS_FS = n -- cgit v1.2.3 From f76d207a66c3a53defea67e7d36c3eb1b7d6d61d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 30 Aug 2012 01:24:05 -0700 Subject: userns: Add kprojid_t and associated infrastructure in projid.h Implement kprojid_t a cousin of the kuid_t and kgid_t. The per user namespace mapping of project id values can be set with /proc//projid_map. A full compliment of helpers is provided: make_kprojid, from_kprojid, from_kprojid_munged, kporjid_has_mapping, projid_valid, projid_eq, projid_eq, projid_lt. Project identifiers are part of the generic disk quota interface, although it appears only xfs implements project identifiers currently. The xfs code allows anyone who has permission to set the project identifier on a file to use any project identifier so when setting up the user namespace project identifier mappings I do not require a capability. Cc: Dave Chinner Cc: Jan Kara Signed-off-by: "Eric W. Biederman" --- fs/proc/base.c | 15 +++++ include/linux/projid.h | 104 +++++++++++++++++++++++++++++++++ include/linux/user_namespace.h | 3 + kernel/user.c | 8 +++ kernel/user_namespace.c | 128 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 include/linux/projid.h (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 138cff4b05dd..acd1960c28a2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2991,6 +2991,11 @@ static int proc_gid_map_open(struct inode *inode, struct file *file) return proc_id_map_open(inode, file, &proc_gid_seq_operations); } +static int proc_projid_map_open(struct inode *inode, struct file *file) +{ + return proc_id_map_open(inode, file, &proc_projid_seq_operations); +} + static const struct file_operations proc_uid_map_operations = { .open = proc_uid_map_open, .write = proc_uid_map_write, @@ -3006,6 +3011,14 @@ static const struct file_operations proc_gid_map_operations = { .llseek = seq_lseek, .release = proc_id_map_release, }; + +static const struct file_operations proc_projid_map_operations = { + .open = proc_projid_map_open, + .write = proc_projid_map_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_id_map_release, +}; #endif /* CONFIG_USER_NS */ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, @@ -3113,6 +3126,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), + REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), #endif }; @@ -3476,6 +3490,7 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), + REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), #endif }; diff --git a/include/linux/projid.h b/include/linux/projid.h new file mode 100644 index 000000000000..36517b95be5c --- /dev/null +++ b/include/linux/projid.h @@ -0,0 +1,104 @@ +#ifndef _LINUX_PROJID_H +#define _LINUX_PROJID_H + +/* + * A set of types for the internal kernel types representing project ids. + * + * The types defined in this header allow distinguishing which project ids in + * the kernel are values used by userspace and which project id values are + * the internal kernel values. With the addition of user namespaces the values + * can be different. Using the type system makes it possible for the compiler + * to detect when we overlook these differences. + * + */ +#include + +struct user_namespace; +extern struct user_namespace init_user_ns; + +typedef __kernel_uid32_t projid_t; + +#ifdef CONFIG_UIDGID_STRICT_TYPE_CHECKS + +typedef struct { + projid_t val; +} kprojid_t; + +static inline projid_t __kprojid_val(kprojid_t projid) +{ + return projid.val; +} + +#define KPROJIDT_INIT(value) (kprojid_t){ value } + +#else + +typedef projid_t kprojid_t; + +static inline projid_t __kprojid_val(kprojid_t projid) +{ + return projid; +} + +#define KPROJIDT_INIT(value) ((kprojid_t) value ) + +#endif + +#define INVALID_PROJID KPROJIDT_INIT(-1) +#define OVERFLOW_PROJID 65534 + +static inline bool projid_eq(kprojid_t left, kprojid_t right) +{ + return __kprojid_val(left) == __kprojid_val(right); +} + +static inline bool projid_lt(kprojid_t left, kprojid_t right) +{ + return __kprojid_val(left) < __kprojid_val(right); +} + +static inline bool projid_valid(kprojid_t projid) +{ + return !projid_eq(projid, INVALID_PROJID); +} + +#ifdef CONFIG_USER_NS + +extern kprojid_t make_kprojid(struct user_namespace *from, projid_t projid); + +extern projid_t from_kprojid(struct user_namespace *to, kprojid_t projid); +extern projid_t from_kprojid_munged(struct user_namespace *to, kprojid_t projid); + +static inline bool kprojid_has_mapping(struct user_namespace *ns, kprojid_t projid) +{ + return from_kprojid(ns, projid) != (projid_t)-1; +} + +#else + +static inline kprojid_t make_kprojid(struct user_namespace *from, projid_t projid) +{ + return KPROJIDT_INIT(projid); +} + +static inline projid_t from_kprojid(struct user_namespace *to, kprojid_t kprojid) +{ + return __kprojid_val(kprojid); +} + +static inline projid_t from_kprojid_munged(struct user_namespace *to, kprojid_t kprojid) +{ + projid_t projid = from_kprojid(to, kprojid); + if (projid == (projid_t)-1) + projid = OVERFLOW_PROJID; + return projid; +} + +static inline bool kprojid_has_mapping(struct user_namespace *ns, kprojid_t projid) +{ + return true; +} + +#endif /* CONFIG_USER_NS */ + +#endif /* _LINUX_PROJID_H */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 4e72922e5a75..95142cae446a 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -20,6 +20,7 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */ struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; + struct uid_gid_map projid_map; struct kref kref; struct user_namespace *parent; kuid_t owner; @@ -49,8 +50,10 @@ static inline void put_user_ns(struct user_namespace *ns) struct seq_operations; extern struct seq_operations proc_uid_seq_operations; extern struct seq_operations proc_gid_seq_operations; +extern struct seq_operations proc_projid_seq_operations; extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); +extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) diff --git a/kernel/user.c b/kernel/user.c index b815fefbe76f..750acffbe9ec 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -38,6 +38,14 @@ struct user_namespace init_user_ns = { .count = 4294967295U, }, }, + .projid_map = { + .nr_extents = 1, + .extent[0] = { + .first = 0, + .lower_first = 0, + .count = 4294967295U, + }, + }, .kref = { .refcount = ATOMIC_INIT(3), }, diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 86602316422d..456a6b9fba34 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -19,6 +19,7 @@ #include #include #include +#include static struct kmem_cache *user_ns_cachep __read_mostly; @@ -295,6 +296,75 @@ gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid) } EXPORT_SYMBOL(from_kgid_munged); +/** + * make_kprojid - Map a user-namespace projid pair into a kprojid. + * @ns: User namespace that the projid is in + * @projid: Project identifier + * + * Maps a user-namespace uid pair into a kernel internal kuid, + * and returns that kuid. + * + * When there is no mapping defined for the user-namespace projid + * pair INVALID_PROJID is returned. Callers are expected to test + * for and handle handle INVALID_PROJID being returned. INVALID_PROJID + * may be tested for using projid_valid(). + */ +kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid) +{ + /* Map the uid to a global kernel uid */ + return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid)); +} +EXPORT_SYMBOL(make_kprojid); + +/** + * from_kprojid - Create a projid from a kprojid user-namespace pair. + * @targ: The user namespace we want a projid in. + * @kprojid: The kernel internal project identifier to start with. + * + * Map @kprojid into the user-namespace specified by @targ and + * return the resulting projid. + * + * There is always a mapping into the initial user_namespace. + * + * If @kprojid has no mapping in @targ (projid_t)-1 is returned. + */ +projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid) +{ + /* Map the uid from a global kernel uid */ + return map_id_up(&targ->projid_map, __kprojid_val(kprojid)); +} +EXPORT_SYMBOL(from_kprojid); + +/** + * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair. + * @targ: The user namespace we want a projid in. + * @kprojid: The kernel internal projid to start with. + * + * Map @kprojid into the user-namespace specified by @targ and + * return the resulting projid. + * + * There is always a mapping into the initial user_namespace. + * + * Unlike from_kprojid from_kprojid_munged never fails and always + * returns a valid projid. This makes from_kprojid_munged + * appropriate for use in syscalls like stat and where + * failing the system call and failing to provide a valid projid are + * not an options. + * + * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned. + */ +projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid) +{ + projid_t projid; + projid = from_kprojid(targ, kprojid); + + if (projid == (projid_t) -1) + projid = OVERFLOW_PROJID; + return projid; +} +EXPORT_SYMBOL(from_kprojid_munged); + + static int uid_m_show(struct seq_file *seq, void *v) { struct user_namespace *ns = seq->private; @@ -337,6 +407,27 @@ static int gid_m_show(struct seq_file *seq, void *v) return 0; } +static int projid_m_show(struct seq_file *seq, void *v) +{ + struct user_namespace *ns = seq->private; + struct uid_gid_extent *extent = v; + struct user_namespace *lower_ns; + projid_t lower; + + lower_ns = seq_user_ns(seq); + if ((lower_ns == ns) && lower_ns->parent) + lower_ns = lower_ns->parent; + + lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first)); + + seq_printf(seq, "%10u %10u %10u\n", + extent->first, + lower, + extent->count); + + return 0; +} + static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map) { struct uid_gid_extent *extent = NULL; @@ -362,6 +453,13 @@ static void *gid_m_start(struct seq_file *seq, loff_t *ppos) return m_start(seq, ppos, &ns->gid_map); } +static void *projid_m_start(struct seq_file *seq, loff_t *ppos) +{ + struct user_namespace *ns = seq->private; + + return m_start(seq, ppos, &ns->projid_map); +} + static void *m_next(struct seq_file *seq, void *v, loff_t *pos) { (*pos)++; @@ -387,6 +485,13 @@ struct seq_operations proc_gid_seq_operations = { .show = gid_m_show, }; +struct seq_operations proc_projid_seq_operations = { + .start = projid_m_start, + .stop = m_stop, + .next = m_next, + .show = projid_m_show, +}; + static DEFINE_MUTEX(id_map_mutex); static ssize_t map_write(struct file *file, const char __user *buf, @@ -434,7 +539,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, /* Require the appropriate privilege CAP_SETUID or CAP_SETGID * over the user namespace in order to set the id mapping. */ - if (!ns_capable(ns, cap_setid)) + if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid)) goto out; /* Get a buffer */ @@ -584,9 +689,30 @@ ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t siz &ns->gid_map, &ns->parent->gid_map); } +ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + struct user_namespace *seq_ns = seq_user_ns(seq); + + if (!ns->parent) + return -EPERM; + + if ((seq_ns != ns) && (seq_ns != ns->parent)) + return -EPERM; + + /* Anyone can set any valid project id no capability needed */ + return map_write(file, buf, size, ppos, -1, + &ns->projid_map, &ns->parent->projid_map); +} + static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { + /* Allow anyone to set a mapping that doesn't require privilege */ + if (!cap_valid(cap_setid)) + return true; + /* Allow the specified ids if we have the appropriate capability * (CAP_SETUID or CAP_SETGID) over the parent user namespace. */ -- cgit v1.2.3 From e8a3e4719b7ec19288c56f22623f537cb78885c1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 Sep 2012 01:11:45 -0700 Subject: userns: Implement struct kqid Add the data type struct kqid which holds the kernel internal form of the owning identifier of a quota. struct kqid is a replacement for the implicit union of uid, gid and project id stored in an unsigned int and the quota type field that is was used in the quota data structures. Making the data type explicit allows the kuid_t and kgid_t type safety to propogate more thoroughly through the code, revealing more places where uid/gid conversions need be made. Along with the data type struct kqid comes the helper functions qid_eq, qid_lt, from_kqid, from_kqid_munged, qid_valid, make_kqid, make_kqid_invalid, make_kqid_uid, make_kqid_gid. Cc: Jan Kara Cc: Dave Chinner Signed-off-by: "Eric W. Biederman" --- fs/quota/Makefile | 2 +- fs/quota/kqid.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/quota.h | 125 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 258 insertions(+), 1 deletion(-) create mode 100644 fs/quota/kqid.c (limited to 'fs') diff --git a/fs/quota/Makefile b/fs/quota/Makefile index 5f9e9e276af0..c66c37cdaa39 100644 --- a/fs/quota/Makefile +++ b/fs/quota/Makefile @@ -2,6 +2,6 @@ obj-$(CONFIG_QUOTA) += dquot.o obj-$(CONFIG_QFMT_V1) += quota_v1.o obj-$(CONFIG_QFMT_V2) += quota_v2.o obj-$(CONFIG_QUOTA_TREE) += quota_tree.o -obj-$(CONFIG_QUOTACTL) += quota.o +obj-$(CONFIG_QUOTACTL) += quota.o kqid.o obj-$(CONFIG_QUOTACTL_COMPAT) += compat.o obj-$(CONFIG_QUOTA_NETLINK_INTERFACE) += netlink.o diff --git a/fs/quota/kqid.c b/fs/quota/kqid.c new file mode 100644 index 000000000000..2f97b0e2c501 --- /dev/null +++ b/fs/quota/kqid.c @@ -0,0 +1,132 @@ +#include +#include +#include + +/** + * qid_eq - Test to see if to kquid values are the same + * @left: A qid value + * @right: Another quid value + * + * Return true if the two qid values are equal and false otherwise. + */ +bool qid_eq(struct kqid left, struct kqid right) +{ + if (left.type != right.type) + return false; + switch(left.type) { + case USRQUOTA: + return uid_eq(left.uid, right.uid); + case GRPQUOTA: + return gid_eq(left.gid, right.gid); + case PRJQUOTA: + return projid_eq(left.projid, right.projid); + default: + BUG(); + } +} +EXPORT_SYMBOL(qid_eq); + +/** + * qid_lt - Test to see if one qid value is less than another + * @left: The possibly lesser qid value + * @right: The possibly greater qid value + * + * Return true if left is less than right and false otherwise. + */ +bool qid_lt(struct kqid left, struct kqid right) +{ + if (left.type < right.type) + return true; + if (left.type > right.type) + return false; + switch (left.type) { + case USRQUOTA: + return uid_lt(left.uid, right.uid); + case GRPQUOTA: + return gid_lt(left.gid, right.gid); + case PRJQUOTA: + return projid_lt(left.projid, right.projid); + default: + BUG(); + } +} +EXPORT_SYMBOL(qid_lt); + +/** + * from_kqid - Create a qid from a kqid user-namespace pair. + * @targ: The user namespace we want a qid in. + * @kuid: The kernel internal quota identifier to start with. + * + * Map @kqid into the user-namespace specified by @targ and + * return the resulting qid. + * + * There is always a mapping into the initial user_namespace. + * + * If @kqid has no mapping in @targ (qid_t)-1 is returned. + */ +qid_t from_kqid(struct user_namespace *targ, struct kqid kqid) +{ + switch (kqid.type) { + case USRQUOTA: + return from_kuid(targ, kqid.uid); + case GRPQUOTA: + return from_kgid(targ, kqid.gid); + case PRJQUOTA: + return from_kprojid(targ, kqid.projid); + default: + BUG(); + } +} +EXPORT_SYMBOL(from_kqid); + +/** + * from_kqid_munged - Create a qid from a kqid user-namespace pair. + * @targ: The user namespace we want a qid in. + * @kqid: The kernel internal quota identifier to start with. + * + * Map @kqid into the user-namespace specified by @targ and + * return the resulting qid. + * + * There is always a mapping into the initial user_namespace. + * + * Unlike from_kqid from_kqid_munged never fails and always + * returns a valid projid. This makes from_kqid_munged + * appropriate for use in places where failing to provide + * a qid_t is not a good option. + * + * If @kqid has no mapping in @targ the kqid.type specific + * overflow identifier is returned. + */ +qid_t from_kqid_munged(struct user_namespace *targ, struct kqid kqid) +{ + switch (kqid.type) { + case USRQUOTA: + return from_kuid_munged(targ, kqid.uid); + case GRPQUOTA: + return from_kgid_munged(targ, kqid.gid); + case PRJQUOTA: + return from_kprojid_munged(targ, kqid.projid); + default: + BUG(); + } +} +EXPORT_SYMBOL(from_kqid_munged); + +/** + * qid_valid - Report if a valid value is stored in a kqid. + * @qid: The kernel internal quota identifier to test. + */ +bool qid_valid(struct kqid qid) +{ + switch (qid.type) { + case USRQUOTA: + return uid_valid(qid.uid); + case GRPQUOTA: + return gid_valid(qid.gid); + case PRJQUOTA: + return projid_valid(qid.projid); + default: + BUG(); + } +} +EXPORT_SYMBOL(qid_valid); diff --git a/include/linux/quota.h b/include/linux/quota.h index 524ede8a160a..00ac8d846c14 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -181,10 +181,135 @@ enum { #include #include +#include +#include + +#undef USRQUOTA +#undef GRPQUOTA +enum quota_type { + USRQUOTA = 0, /* element used for user quotas */ + GRPQUOTA = 1, /* element used for group quotas */ + PRJQUOTA = 2, /* element used for project quotas */ +}; typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */ typedef long long qsize_t; /* Type in which we store sizes */ +struct kqid { /* Type in which we store the quota identifier */ + union { + kuid_t uid; + kgid_t gid; + kprojid_t projid; + }; + enum quota_type type; /* USRQUOTA (uid) or GRPQUOTA (gid) or PRJQUOTA (projid) */ +}; + +extern bool qid_eq(struct kqid left, struct kqid right); +extern bool qid_lt(struct kqid left, struct kqid right); +extern qid_t from_kqid(struct user_namespace *to, struct kqid qid); +extern qid_t from_kqid_munged(struct user_namespace *to, struct kqid qid); +extern bool qid_valid(struct kqid qid); + +/** + * make_kqid - Map a user-namespace, type, qid tuple into a kqid. + * @from: User namespace that the qid is in + * @type: The type of quota + * @qid: Quota identifier + * + * Maps a user-namespace, type qid tuple into a kernel internal + * kqid, and returns that kqid. + * + * When there is no mapping defined for the user-namespace, type, + * qid tuple an invalid kqid is returned. Callers are expected to + * test for and handle handle invalid kqids being returned. + * Invalid kqids may be tested for using qid_valid(). + */ +static inline struct kqid make_kqid(struct user_namespace *from, + enum quota_type type, qid_t qid) +{ + struct kqid kqid; + + kqid.type = type; + switch (type) { + case USRQUOTA: + kqid.uid = make_kuid(from, qid); + break; + case GRPQUOTA: + kqid.gid = make_kgid(from, qid); + break; + case PRJQUOTA: + kqid.projid = make_kprojid(from, qid); + break; + default: + BUG(); + } + return kqid; +} + +/** + * make_kqid_invalid - Explicitly make an invalid kqid + * @type: The type of quota identifier + * + * Returns an invalid kqid with the specified type. + */ +static inline struct kqid make_kqid_invalid(enum quota_type type) +{ + struct kqid kqid; + + kqid.type = type; + switch (type) { + case USRQUOTA: + kqid.uid = INVALID_UID; + break; + case GRPQUOTA: + kqid.gid = INVALID_GID; + break; + case PRJQUOTA: + kqid.projid = INVALID_PROJID; + break; + default: + BUG(); + } + return kqid; +} + +/** + * make_kqid_uid - Make a kqid from a kuid + * @uid: The kuid to make the quota identifier from + */ +static inline struct kqid make_kqid_uid(kuid_t uid) +{ + struct kqid kqid; + kqid.type = USRQUOTA; + kqid.uid = uid; + return kqid; +} + +/** + * make_kqid_gid - Make a kqid from a kgid + * @gid: The kgid to make the quota identifier from + */ +static inline struct kqid make_kqid_gid(kgid_t gid) +{ + struct kqid kqid; + kqid.type = GRPQUOTA; + kqid.gid = gid; + return kqid; +} + +/** + * make_kqid_projid - Make a kqid from a projid + * @projid: The kprojid to make the quota identifier from + */ +static inline struct kqid make_kqid_projid(kprojid_t projid) +{ + struct kqid kqid; + kqid.type = PRJQUOTA; + kqid.projid = projid; + return kqid; +} + + extern spinlock_t dq_data_lock; /* Maximal numbers of writes for quota operation (insert/delete/update) -- cgit v1.2.3 From 74a8a103789465c4e67f38d1abb5cea770002601 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 Sep 2012 02:07:49 -0700 Subject: userns: Convert qutoactl Update the quotactl user space interface to successfull compile with user namespaces support enabled and to hand off quota identifiers to lower layers of the kernel in struct kqid instead of type and qid pairs. The quota on function is not converted because while it takes a quota type and an id. The id is the on disk quota format to use, which is something completely different. The signature of two struct quotactl_ops methods were changed to take struct kqid argumetns get_dqblk and set_dqblk. The dquot, xfs, and ocfs2 implementations of get_dqblk and set_dqblk are minimally changed so that the code continues to work with the change in parameter type. This is the first in a series of changes to always store quota identifiers in the kernel in struct kqid and only use raw type and qid values when interacting with on disk structures or userspace. Always using struct kqid internally makes it hard to miss places that need conversion to or from the kernel internal values. Cc: Jan Kara Cc: Dave Chinner Cc: Mark Fasheh Cc: Joel Becker Cc: Ben Myers Cc: Alex Elder Signed-off-by: "Eric W. Biederman" --- fs/gfs2/quota.c | 20 +++++++++++--------- fs/quota/dquot.c | 8 ++++---- fs/quota/quota.c | 28 ++++++++++++++++++++++------ fs/xfs/xfs_quotaops.c | 12 ++++++------ include/linux/quota.h | 4 ++-- include/linux/quotaops.h | 4 ++-- init/Kconfig | 2 +- 7 files changed, 48 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a3bde91645c2..b3115392d68f 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1469,7 +1469,7 @@ static int gfs2_quota_get_xstate(struct super_block *sb, return 0; } -static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, +static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, struct fs_disk_quota *fdq) { struct gfs2_sbd *sdp = sb->s_fs_info; @@ -1477,20 +1477,21 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, struct gfs2_quota_data *qd; struct gfs2_holder q_gh; int error; + int type; memset(fdq, 0, sizeof(struct fs_disk_quota)); if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return -ESRCH; /* Crazy XFS error code */ - if (type == USRQUOTA) + if (qid.type == USRQUOTA) type = QUOTA_USER; - else if (type == GRPQUOTA) + else if (qid.type == GRPQUOTA) type = QUOTA_GROUP; else return -EINVAL; - error = qd_get(sdp, type, id, &qd); + error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd); if (error) return error; error = do_glock(qd, FORCE, &q_gh); @@ -1500,7 +1501,7 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; fdq->d_version = FS_DQUOT_VERSION; fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; - fdq->d_id = id; + fdq->d_id = from_kqid(&init_user_ns, qid); fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift; fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift; fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift; @@ -1514,7 +1515,7 @@ out: /* GFS2 only supports a subset of the XFS fields */ #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT) -static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, +static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, struct fs_disk_quota *fdq) { struct gfs2_sbd *sdp = sb->s_fs_info; @@ -1526,11 +1527,12 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, int alloc_required; loff_t offset; int error; + int type; if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return -ESRCH; /* Crazy XFS error code */ - switch(type) { + switch(qid.type) { case USRQUOTA: type = QUOTA_USER; if (fdq->d_flags != FS_USER_QUOTA) @@ -1547,10 +1549,10 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, if (fdq->d_fieldmask & ~GFS2_FIELDMASK) return -EINVAL; - if (fdq->d_id != id) + if (fdq->d_id != from_kqid(&init_user_ns, qid)) return -EINVAL; - error = qd_get(sdp, type, id, &qd); + error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd); if (error) return error; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 36a29b753c79..7714b169d646 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2376,12 +2376,12 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) spin_unlock(&dq_data_lock); } -int dquot_get_dqblk(struct super_block *sb, int type, qid_t id, +int dquot_get_dqblk(struct super_block *sb, struct kqid qid, struct fs_disk_quota *di) { struct dquot *dquot; - dquot = dqget(sb, id, type); + dquot = dqget(sb, qid.type, from_kqid(&init_user_ns, qid)); if (!dquot) return -ESRCH; do_get_dqblk(dquot, di); @@ -2488,13 +2488,13 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) return 0; } -int dquot_set_dqblk(struct super_block *sb, int type, qid_t id, +int dquot_set_dqblk(struct super_block *sb, struct kqid qid, struct fs_disk_quota *di) { struct dquot *dquot; int rc; - dquot = dqget(sb, id, type); + dquot = dqget(sb, qid.type, from_kqid(&init_user_ns, qid)); if (!dquot) { rc = -ESRCH; goto out; diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 6f155788cbc6..ff0135d6bc51 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -32,8 +32,8 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd, /* allow to query information for dquots we "own" */ case Q_GETQUOTA: case Q_XGETQUOTA: - if ((type == USRQUOTA && current_euid() == id) || - (type == GRPQUOTA && in_egroup_p(id))) + if ((type == USRQUOTA && uid_eq(current_euid(), make_kuid(current_user_ns(), id))) || + (type == GRPQUOTA && in_egroup_p(make_kgid(current_user_ns(), id)))) break; /*FALLTHROUGH*/ default: @@ -130,13 +130,17 @@ static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src) static int quota_getquota(struct super_block *sb, int type, qid_t id, void __user *addr) { + struct kqid qid; struct fs_disk_quota fdq; struct if_dqblk idq; int ret; if (!sb->s_qcop->get_dqblk) return -ENOSYS; - ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq); + qid = make_kqid(current_user_ns(), type, id); + if (!qid_valid(qid)) + return -EINVAL; + ret = sb->s_qcop->get_dqblk(sb, qid, &fdq); if (ret) return ret; copy_to_if_dqblk(&idq, &fdq); @@ -176,13 +180,17 @@ static int quota_setquota(struct super_block *sb, int type, qid_t id, { struct fs_disk_quota fdq; struct if_dqblk idq; + struct kqid qid; if (copy_from_user(&idq, addr, sizeof(idq))) return -EFAULT; if (!sb->s_qcop->set_dqblk) return -ENOSYS; + qid = make_kqid(current_user_ns(), type, id); + if (!qid_valid(qid)) + return -EINVAL; copy_from_if_dqblk(&fdq, &idq); - return sb->s_qcop->set_dqblk(sb, type, id, &fdq); + return sb->s_qcop->set_dqblk(sb, qid, &fdq); } static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) @@ -213,23 +221,31 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; + struct kqid qid; if (copy_from_user(&fdq, addr, sizeof(fdq))) return -EFAULT; if (!sb->s_qcop->set_dqblk) return -ENOSYS; - return sb->s_qcop->set_dqblk(sb, type, id, &fdq); + qid = make_kqid(current_user_ns(), type, id); + if (!qid_valid(qid)) + return -EINVAL; + return sb->s_qcop->set_dqblk(sb, qid, &fdq); } static int quota_getxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; + struct kqid qid; int ret; if (!sb->s_qcop->get_dqblk) return -ENOSYS; - ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq); + qid = make_kqid(current_user_ns(), type, id); + if (!qid_valid(qid)) + return -EINVAL; + ret = sb->s_qcop->get_dqblk(sb, qid, &fdq); if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) return -EFAULT; return ret; diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index fed504fc2999..71926d630527 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -97,8 +97,7 @@ xfs_fs_set_xstate( STATIC int xfs_fs_get_dqblk( struct super_block *sb, - int type, - qid_t id, + struct kqid qid, struct fs_disk_quota *fdq) { struct xfs_mount *mp = XFS_M(sb); @@ -108,14 +107,14 @@ xfs_fs_get_dqblk( if (!XFS_IS_QUOTA_ON(mp)) return -ESRCH; - return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq); + return -xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid), + xfs_quota_type(qid.type), fdq); } STATIC int xfs_fs_set_dqblk( struct super_block *sb, - int type, - qid_t id, + struct kqid qid, struct fs_disk_quota *fdq) { struct xfs_mount *mp = XFS_M(sb); @@ -127,7 +126,8 @@ xfs_fs_set_dqblk( if (!XFS_IS_QUOTA_ON(mp)) return -ESRCH; - return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); + return -xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid), + xfs_quota_type(qid.type), fdq); } const struct quotactl_ops xfs_quotactl_operations = { diff --git a/include/linux/quota.h b/include/linux/quota.h index 00ac8d846c14..f96427a949b2 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -461,8 +461,8 @@ struct quotactl_ops { int (*quota_sync)(struct super_block *, int); int (*get_info)(struct super_block *, int, struct if_dqinfo *); int (*set_info)(struct super_block *, int, struct if_dqinfo *); - int (*get_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *); - int (*set_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *); + int (*get_dqblk)(struct super_block *, struct kqid, struct fs_disk_quota *); + int (*set_dqblk)(struct super_block *, struct kqid, struct fs_disk_quota *); int (*get_xstate)(struct super_block *, struct fs_quota_stat *); int (*set_xstate)(struct super_block *, unsigned int, int); }; diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index ec6b65feaaba..bd3730d76fd9 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -87,9 +87,9 @@ int dquot_writeback_dquots(struct super_block *sb, int type); int dquot_quota_sync(struct super_block *sb, int type); int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); -int dquot_get_dqblk(struct super_block *sb, int type, qid_t id, +int dquot_get_dqblk(struct super_block *sb, struct kqid id, struct fs_disk_quota *di); -int dquot_set_dqblk(struct super_block *sb, int type, qid_t id, +int dquot_set_dqblk(struct super_block *sb, struct kqid id, struct fs_disk_quota *di); int __dquot_transfer(struct inode *inode, struct dquot **transfer_to); diff --git a/init/Kconfig b/init/Kconfig index 33d231cd3cc6..15bb1dcdebef 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -928,7 +928,7 @@ config UIDGID_CONVERTED depends on IMA = n depends on EVM = n depends on QUOTA = n - depends on QUOTACTL = n + depends on QUOTA_NETLINK_INTERFACE = n # Networking depends on NET_9P = n -- cgit v1.2.3 From 431f19744d15531825cdbc8e771b43854b0d005b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 Sep 2012 02:32:43 -0700 Subject: userns: Convert quota netlink aka quota_send_warning Modify quota_send_warning to take struct kqid instead a type and identifier pair. When sending netlink broadcasts always convert uids and quota identifiers into the intial user namespace. There is as yet no way to send a netlink broadcast message with different contents to receivers in different namespaces, so for the time being just map all of the identifiers into the initial user namespace which preserves the current behavior. Change the callers of quota_send_warning in gfs2, xfs and dquot to generate a struct kqid to pass to quota send warning. When all of the user namespaces convesions are complete a struct kqid values will be availbe without need for conversion, but a conversion is needed now to avoid needing to convert everything at once. Cc: Ben Myers Cc: Alex Elder Cc: Dave Chinner Cc: Jan Kara Cc: Steven Whitehouse Signed-off-by: "Eric W. Biederman" --- fs/gfs2/quota.c | 12 ++++++++---- fs/quota/dquot.c | 2 +- fs/quota/netlink.c | 10 ++++++---- fs/xfs/xfs_trans_dquot.c | 8 +++++--- include/linux/quota.h | 4 ++-- init/Kconfig | 1 - 6 files changed, 22 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index b3115392d68f..d554dfff58e3 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1070,8 +1070,10 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { print_message(qd, "exceeded"); - quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ? - USRQUOTA : GRPQUOTA, qd->qd_id, + quota_send_warning(make_kqid(&init_user_ns, + test_bit(QDF_USER, &qd->qd_flags) ? + USRQUOTA : GRPQUOTA, + qd->qd_id), sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); error = -EDQUOT; @@ -1081,8 +1083,10 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) time_after_eq(jiffies, qd->qd_last_warn + gfs2_tune_get(sdp, gt_quota_warn_period) * HZ)) { - quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ? - USRQUOTA : GRPQUOTA, qd->qd_id, + quota_send_warning(make_kqid(&init_user_ns, + test_bit(QDF_USER, &qd->qd_flags) ? + USRQUOTA : GRPQUOTA, + qd->qd_id), sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); error = print_message(qd, "warning"); qd->qd_last_warn = jiffies; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 7714b169d646..80d337822462 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1236,7 +1236,7 @@ static void flush_warnings(struct dquot_warn *warn) #ifdef CONFIG_PRINT_QUOTA_WARNING print_warning(&warn[i]); #endif - quota_send_warning(warn[i].w_dq_type, warn[i].w_dq_id, + quota_send_warning(make_kqid(&init_user_ns, warn[i].w_dq_type, warn[i].w_dq_id), warn[i].w_sb->s_dev, warn[i].w_type); } } diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index d67908b407d9..16e8abb7709b 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -30,7 +30,7 @@ static struct genl_family quota_genl_family = { * */ -void quota_send_warning(short type, unsigned int id, dev_t dev, +void quota_send_warning(struct kqid qid, dev_t dev, const char warntype) { static atomic_t seq; @@ -56,10 +56,11 @@ void quota_send_warning(short type, unsigned int id, dev_t dev, "VFS: Cannot store netlink header in quota warning.\n"); goto err_out; } - ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type); + ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, qid.type); if (ret) goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id); + ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, + from_kqid_munged(&init_user_ns, qid)); if (ret) goto attr_err_out; ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); @@ -71,7 +72,8 @@ void quota_send_warning(short type, unsigned int id, dev_t dev, ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev)); if (ret) goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); + ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, + from_kuid_munged(&init_user_ns, current_uid())); if (ret) goto attr_err_out; genlmsg_end(skb, msg_head); diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index bcb60542fcf1..0c7fa54f309e 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -578,9 +578,11 @@ xfs_quota_warn( /* no warnings for project quotas - we just return ENOSPC later */ if (dqp->dq_flags & XFS_DQ_PROJ) return; - quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA, - be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev, - type); + quota_send_warning(make_kqid(&init_user_ns, + (dqp->dq_flags & XFS_DQ_USER) ? + USRQUOTA : GRPQUOTA, + be32_to_cpu(dqp->q_core.d_id)), + mp->m_super->s_dev, type); } /* diff --git a/include/linux/quota.h b/include/linux/quota.h index f96427a949b2..8b2760427252 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -511,10 +511,10 @@ static inline unsigned int dquot_generic_flag(unsigned int flags, int type) } #ifdef CONFIG_QUOTA_NETLINK_INTERFACE -extern void quota_send_warning(short type, unsigned int id, dev_t dev, +extern void quota_send_warning(struct kqid qid, dev_t dev, const char warntype); #else -static inline void quota_send_warning(short type, unsigned int id, dev_t dev, +static inline void quota_send_warning(struct kqid qid, dev_t dev, const char warntype) { return; diff --git a/init/Kconfig b/init/Kconfig index 15bb1dcdebef..9c8aa8c49443 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -928,7 +928,6 @@ config UIDGID_CONVERTED depends on IMA = n depends on EVM = n depends on QUOTA = n - depends on QUOTA_NETLINK_INTERFACE = n # Networking depends on NET_9P = n -- cgit v1.2.3 From aca645a6a54e001e004f1f1e0eafd94f994bb1b3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 Sep 2012 03:11:50 -0700 Subject: userns: Modify dqget to take struct kqid Modify dqget to take struct kqid instead of a type and an identifier pair. Modify the callers of dqget in ocfs2 and dquot to take generate a struct kqid so they can continue to call dqget. The conversion to create struct kqid should all be the final conversions that are needed in those code paths. Cc: Jan Kara Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: "Eric W. Biederman" --- fs/ocfs2/file.c | 6 ++---- fs/ocfs2/quota_local.c | 4 +++- fs/quota/dquot.c | 20 +++++++++++--------- include/linux/quotaops.h | 2 +- 4 files changed, 17 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 46a1f6d75104..5a4ee77cec51 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1184,8 +1184,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid && OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { - transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, - USRQUOTA); + transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid)); if (!transfer_to[USRQUOTA]) { status = -ESRCH; goto bail_unlock; @@ -1194,8 +1193,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid && OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { - transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, - GRPQUOTA); + transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid)); if (!transfer_to[GRPQUOTA]) { status = -ESRCH; goto bail_unlock; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index f100bf70a906..020f0ba29ee5 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -501,7 +501,9 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, } dqblk = (struct ocfs2_local_disk_dqblk *)(qbh->b_data + ol_dqblk_block_off(sb, chunk, bit)); - dquot = dqget(sb, le64_to_cpu(dqblk->dqb_id), type); + dquot = dqget(sb, + make_kqid(&init_user_ns, type, + le64_to_cpu(dqblk->dqb_id))); if (!dquot) { status = -EIO; mlog(ML_ERROR, "Failed to get quota structure " diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 80d337822462..53e377a59b05 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -829,8 +829,10 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) * a) checking for quota flags under dq_list_lock and * b) getting a reference to dquot before we release dq_list_lock */ -struct dquot *dqget(struct super_block *sb, unsigned int id, int type) +struct dquot *dqget(struct super_block *sb, struct kqid qid) { + unsigned int type = qid.type; + unsigned int id = from_kqid(&init_user_ns, qid); unsigned int hashent = hashfn(sb, id, type); struct dquot *dquot = NULL, *empty = NULL; @@ -1390,7 +1392,6 @@ static int dquot_active(const struct inode *inode) */ static void __dquot_initialize(struct inode *inode, int type) { - unsigned int id = 0; int cnt; struct dquot *got[MAXQUOTAS]; struct super_block *sb = inode->i_sb; @@ -1403,18 +1404,19 @@ static void __dquot_initialize(struct inode *inode, int type) /* First get references to structures we might need. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + struct kqid qid; got[cnt] = NULL; if (type != -1 && cnt != type) continue; switch (cnt) { case USRQUOTA: - id = inode->i_uid; + qid = make_kqid_uid(inode->i_uid); break; case GRPQUOTA: - id = inode->i_gid; + qid = make_kqid_gid(inode->i_gid); break; } - got[cnt] = dqget(sb, id, cnt); + got[cnt] = dqget(sb, qid); } down_write(&sb_dqopt(sb)->dqptr_sem); @@ -1898,9 +1900,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) return 0; if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) - transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA); + transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(iattr->ia_uid)); if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) - transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_gid, GRPQUOTA); + transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(iattr->ia_gid)); ret = __dquot_transfer(inode, transfer_to); dqput_all(transfer_to); @@ -2381,7 +2383,7 @@ int dquot_get_dqblk(struct super_block *sb, struct kqid qid, { struct dquot *dquot; - dquot = dqget(sb, qid.type, from_kqid(&init_user_ns, qid)); + dquot = dqget(sb, qid); if (!dquot) return -ESRCH; do_get_dqblk(dquot, di); @@ -2494,7 +2496,7 @@ int dquot_set_dqblk(struct super_block *sb, struct kqid qid, struct dquot *dquot; int rc; - dquot = dqget(sb, qid.type, from_kqid(&init_user_ns, qid)); + dquot = dqget(sb, qid); if (!dquot) { rc = -ESRCH; goto out; diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index bd3730d76fd9..1c50093ae656 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -44,7 +44,7 @@ void inode_sub_rsv_space(struct inode *inode, qsize_t number); void dquot_initialize(struct inode *inode); void dquot_drop(struct inode *inode); -struct dquot *dqget(struct super_block *sb, unsigned int id, int type); +struct dquot *dqget(struct super_block *sb, struct kqid qid); void dqput(struct dquot *dquot); int dquot_scan_active(struct super_block *sb, int (*fn)(struct dquot *dquot, unsigned long priv), -- cgit v1.2.3 From 4c376dcae892e5b5daf8576c864061d076d4e4dc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 Sep 2012 03:56:19 -0700 Subject: userns: Convert struct dquot dq_id to be a struct kqid Change struct dquot dq_id to a struct kqid and remove the now unecessary dq_type. Make minimal changes to dquot, quota_tree, quota_v1, quota_v2, ext3, ext4, and ocfs2 to deal with the change in quota structures and signatures. The ocfs2 changes are larger than most because of the extensive tracing throughout the ocfs2 quota code that prints out dq_id. quota_tree.c:get_index is modified to take a struct kqid instead of a qid_t because all of it's callers pass in dquot->dq_id and it allows me to introduce only a single conversion. The rest of the changes are either just replacing dq_type with dq_id.type, adding conversions to deal with the change in type and occassionally adding qid_eq to allow quota id comparisons in a user namespace safe way. Cc: Mark Fasheh Cc: Joel Becker Cc: Jan Kara Cc: Andrew Morton Cc: Andreas Dilger Cc: Theodore Tso Signed-off-by: "Eric W. Biederman" --- fs/ext3/super.c | 2 +- fs/ext4/super.c | 2 +- fs/ocfs2/quota_global.c | 43 ++++++++++++++++++++------------- fs/ocfs2/quota_local.c | 11 +++++---- fs/quota/dquot.c | 63 +++++++++++++++++++++++++------------------------ fs/quota/quota_tree.c | 22 ++++++++++------- fs/quota/quota_v1.c | 12 ++++++---- fs/quota/quota_v2.c | 26 +++++++++++--------- include/linux/quota.h | 3 +-- 9 files changed, 102 insertions(+), 82 deletions(-) (limited to 'fs') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index ff9bcdc5b0d5..73e42f5c7009 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2814,7 +2814,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) static inline struct inode *dquot_to_inode(struct dquot *dquot) { - return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; + return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type]; } static int ext3_write_dquot(struct dquot *dquot) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d76ec8277d3f..78e6036ff244 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4796,7 +4796,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) static inline struct inode *dquot_to_inode(struct dquot *dquot) { - return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; + return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type]; } static int ext4_write_dquot(struct dquot *dquot) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 0a86e302655f..332a281f217e 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -95,7 +95,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot) struct ocfs2_global_disk_dqblk *d = dp; struct mem_dqblk *m = &dquot->dq_dqb; - d->dqb_id = cpu_to_le32(dquot->dq_id); + d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id)); d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count); d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); @@ -112,11 +112,14 @@ static int ocfs2_global_is_id(void *dp, struct dquot *dquot) { struct ocfs2_global_disk_dqblk *d = dp; struct ocfs2_mem_dqinfo *oinfo = - sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; if (qtree_entry_unused(&oinfo->dqi_gi, dp)) return 0; - return le32_to_cpu(d->dqb_id) == dquot->dq_id; + + return qid_eq(make_kqid(&init_user_ns, dquot->dq_id.type, + le32_to_cpu(d->dqb_id)), + dquot->dq_id); } struct qtree_fmt_operations ocfs2_global_ops = { @@ -475,7 +478,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) { int err, err2; struct super_block *sb = dquot->dq_sb; - int type = dquot->dq_type; + int type = dquot->dq_id.type; struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; struct ocfs2_global_disk_dqblk dqblk; s64 spacechange, inodechange; @@ -504,7 +507,8 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) olditime = dquot->dq_dqb.dqb_itime; oldbtime = dquot->dq_dqb.dqb_btime; ocfs2_global_disk2memdqb(dquot, &dqblk); - trace_ocfs2_sync_dquot(dquot->dq_id, dquot->dq_dqb.dqb_curspace, + trace_ocfs2_sync_dquot(from_kqid(&init_user_ns, dquot->dq_id), + dquot->dq_dqb.dqb_curspace, (long long)spacechange, dquot->dq_dqb.dqb_curinodes, (long long)inodechange); @@ -555,8 +559,8 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) err = ocfs2_qinfo_lock(info, freeing); if (err < 0) { mlog(ML_ERROR, "Failed to lock quota info, losing quota write" - " (type=%d, id=%u)\n", dquot->dq_type, - (unsigned)dquot->dq_id); + " (type=%d, id=%u)\n", dquot->dq_id.type, + (unsigned)from_kqid(&init_user_ns, dquot->dq_id)); goto out; } if (freeing) @@ -591,9 +595,10 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type) struct ocfs2_super *osb = OCFS2_SB(sb); int status = 0; - trace_ocfs2_sync_dquot_helper(dquot->dq_id, dquot->dq_type, + trace_ocfs2_sync_dquot_helper(from_kqid(&init_user_ns, dquot->dq_id), + dquot->dq_id.type, type, sb->s_id); - if (type != dquot->dq_type) + if (type != dquot->dq_id.type) goto out; status = ocfs2_lock_global_qf(oinfo, 1); if (status < 0) @@ -643,7 +648,8 @@ static int ocfs2_write_dquot(struct dquot *dquot) struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); int status = 0; - trace_ocfs2_write_dquot(dquot->dq_id, dquot->dq_type); + trace_ocfs2_write_dquot(from_kqid(&init_user_ns, dquot->dq_id), + dquot->dq_id.type); handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS); if (IS_ERR(handle)) { @@ -677,11 +683,12 @@ static int ocfs2_release_dquot(struct dquot *dquot) { handle_t *handle; struct ocfs2_mem_dqinfo *oinfo = - sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); int status = 0; - trace_ocfs2_release_dquot(dquot->dq_id, dquot->dq_type); + trace_ocfs2_release_dquot(from_kqid(&init_user_ns, dquot->dq_id), + dquot->dq_id.type); mutex_lock(&dquot->dq_lock); /* Check whether we are not racing with some other dqget() */ @@ -691,7 +698,7 @@ static int ocfs2_release_dquot(struct dquot *dquot) if (status < 0) goto out; handle = ocfs2_start_trans(osb, - ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_type)); + ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_id.type)); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); @@ -733,13 +740,14 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) int ex = 0; struct super_block *sb = dquot->dq_sb; struct ocfs2_super *osb = OCFS2_SB(sb); - int type = dquot->dq_type; + int type = dquot->dq_id.type; struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; struct inode *gqinode = info->dqi_gqinode; int need_alloc = ocfs2_global_qinit_alloc(sb, type); handle_t *handle; - trace_ocfs2_acquire_dquot(dquot->dq_id, type); + trace_ocfs2_acquire_dquot(from_kqid(&init_user_ns, dquot->dq_id), + type); mutex_lock(&dquot->dq_lock); /* * We need an exclusive lock, because we're going to update use count @@ -821,12 +829,13 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) int sync = 0; int status; struct super_block *sb = dquot->dq_sb; - int type = dquot->dq_type; + int type = dquot->dq_id.type; struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; handle_t *handle; struct ocfs2_super *osb = OCFS2_SB(sb); - trace_ocfs2_mark_dquot_dirty(dquot->dq_id, type); + trace_ocfs2_mark_dquot_dirty(from_kqid(&init_user_ns, dquot->dq_id), + type); /* In case user set some limits, sync dquot immediately to global * quota file so that information propagates quicker */ diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 020f0ba29ee5..27fe7ee4874c 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -883,7 +883,8 @@ static void olq_set_dquot(struct buffer_head *bh, void *private) dqblk = (struct ocfs2_local_disk_dqblk *)(bh->b_data + ol_dqblk_block_offset(sb, od->dq_local_off)); - dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id); + dqblk->dqb_id = cpu_to_le64(from_kqid(&init_user_ns, + od->dq_dquot.dq_id)); spin_lock(&dq_data_lock); dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace - od->dq_origspace); @@ -893,7 +894,7 @@ static void olq_set_dquot(struct buffer_head *bh, void *private) trace_olq_set_dquot( (unsigned long long)le64_to_cpu(dqblk->dqb_spacemod), (unsigned long long)le64_to_cpu(dqblk->dqb_inodemod), - od->dq_dquot.dq_id); + from_kqid(&init_user_ns, od->dq_dquot.dq_id)); } /* Write dquot to local quota file */ @@ -902,7 +903,7 @@ int ocfs2_local_write_dquot(struct dquot *dquot) struct super_block *sb = dquot->dq_sb; struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); struct buffer_head *bh; - struct inode *lqinode = sb_dqopt(sb)->files[dquot->dq_type]; + struct inode *lqinode = sb_dqopt(sb)->files[dquot->dq_id.type]; int status; status = ocfs2_read_quota_phys_block(lqinode, od->dq_local_phys_blk, @@ -1223,7 +1224,7 @@ static void olq_alloc_dquot(struct buffer_head *bh, void *private) int ocfs2_create_local_dquot(struct dquot *dquot) { struct super_block *sb = dquot->dq_sb; - int type = dquot->dq_type; + int type = dquot->dq_id.type; struct inode *lqinode = sb_dqopt(sb)->files[type]; struct ocfs2_quota_chunk *chunk; struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); @@ -1277,7 +1278,7 @@ out: int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) { int status; - int type = dquot->dq_type; + int type = dquot->dq_id.type; struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); struct super_block *sb = dquot->dq_sb; struct ocfs2_local_disk_chunk *dchunk; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 53e377a59b05..efaeed35476f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -267,7 +267,7 @@ hashfn(const struct super_block *sb, unsigned int id, int type) static inline void insert_dquot_hash(struct dquot *dquot) { struct hlist_head *head; - head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id, dquot->dq_type); + head = dquot_hash + hashfn(dquot->dq_sb, from_kqid(&init_user_ns, dquot->dq_id), dquot->dq_id.type); hlist_add_head(&dquot->dq_hash, head); } @@ -279,13 +279,13 @@ static inline void remove_dquot_hash(struct dquot *dquot) static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, unsigned int id, int type) { + struct kqid qid = make_kqid(&init_user_ns, type, id); struct hlist_node *node; struct dquot *dquot; hlist_for_each (node, dquot_hash+hashent) { dquot = hlist_entry(node, struct dquot, dq_hash); - if (dquot->dq_sb == sb && dquot->dq_id == id && - dquot->dq_type == type) + if (dquot->dq_sb == sb && qid_eq(dquot->dq_id, qid)) return dquot; } return NULL; @@ -351,7 +351,7 @@ int dquot_mark_dquot_dirty(struct dquot *dquot) spin_lock(&dq_list_lock); if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) { list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> - info[dquot->dq_type].dqi_dirty_list); + info[dquot->dq_id.type].dqi_dirty_list); ret = 0; } spin_unlock(&dq_list_lock); @@ -410,17 +410,17 @@ int dquot_acquire(struct dquot *dquot) mutex_lock(&dquot->dq_lock); mutex_lock(&dqopt->dqio_mutex); if (!test_bit(DQ_READ_B, &dquot->dq_flags)) - ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot); + ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot); if (ret < 0) goto out_iolock; set_bit(DQ_READ_B, &dquot->dq_flags); /* Instantiate dquot if needed */ if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && !dquot->dq_off) { - ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); + ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot); /* Write the info if needed */ - if (info_dirty(&dqopt->info[dquot->dq_type])) { - ret2 = dqopt->ops[dquot->dq_type]->write_file_info( - dquot->dq_sb, dquot->dq_type); + if (info_dirty(&dqopt->info[dquot->dq_id.type])) { + ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info( + dquot->dq_sb, dquot->dq_id.type); } if (ret < 0) goto out_iolock; @@ -455,7 +455,7 @@ int dquot_commit(struct dquot *dquot) /* Inactive dquot can be only if there was error during read/init * => we have better not writing it */ if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) - ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); + ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot); else ret = -EIO; out_sem: @@ -477,12 +477,12 @@ int dquot_release(struct dquot *dquot) if (atomic_read(&dquot->dq_count) > 1) goto out_dqlock; mutex_lock(&dqopt->dqio_mutex); - if (dqopt->ops[dquot->dq_type]->release_dqblk) { - ret = dqopt->ops[dquot->dq_type]->release_dqblk(dquot); + if (dqopt->ops[dquot->dq_id.type]->release_dqblk) { + ret = dqopt->ops[dquot->dq_id.type]->release_dqblk(dquot); /* Write the info */ - if (info_dirty(&dqopt->info[dquot->dq_type])) { - ret2 = dqopt->ops[dquot->dq_type]->write_file_info( - dquot->dq_sb, dquot->dq_type); + if (info_dirty(&dqopt->info[dquot->dq_id.type])) { + ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info( + dquot->dq_sb, dquot->dq_id.type); } if (ret >= 0) ret = ret2; @@ -521,7 +521,7 @@ restart: list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) { if (dquot->dq_sb != sb) continue; - if (dquot->dq_type != type) + if (dquot->dq_id.type != type) continue; /* Wait for dquot users */ if (atomic_read(&dquot->dq_count)) { @@ -741,7 +741,8 @@ void dqput(struct dquot *dquot) #ifdef CONFIG_QUOTA_DEBUG if (!atomic_read(&dquot->dq_count)) { quota_error(dquot->dq_sb, "trying to free free dquot of %s %d", - quotatypes[dquot->dq_type], dquot->dq_id); + quotatypes[dquot->dq_id.type], + from_kqid(&init_user_ns, dquot->dq_id)); BUG(); } #endif @@ -752,7 +753,7 @@ we_slept: /* We have more than one user... nothing to do */ atomic_dec(&dquot->dq_count); /* Releasing dquot during quotaoff phase? */ - if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_type) && + if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_id.type) && atomic_read(&dquot->dq_count) == 1) wake_up(&dquot->dq_wait_unused); spin_unlock(&dq_list_lock); @@ -815,7 +816,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) INIT_LIST_HEAD(&dquot->dq_dirty); init_waitqueue_head(&dquot->dq_wait_unused); dquot->dq_sb = sb; - dquot->dq_type = type; + dquot->dq_id.type = type; atomic_set(&dquot->dq_count, 1); return dquot; @@ -859,7 +860,7 @@ we_slept: } dquot = empty; empty = NULL; - dquot->dq_id = id; + dquot->dq_id = qid; /* all dquots go on the inuse_list */ put_inuse(dquot); /* hash it first so it can be found */ @@ -1219,8 +1220,8 @@ static void prepare_warning(struct dquot_warn *warn, struct dquot *dquot, return; warn->w_type = warntype; warn->w_sb = dquot->dq_sb; - warn->w_dq_id = dquot->dq_id; - warn->w_dq_type = dquot->dq_type; + warn->w_dq_id = from_kqid(&init_user_ns, dquot->dq_id); + warn->w_dq_type = dquot->dq_id.type; } /* @@ -1245,7 +1246,7 @@ static void flush_warnings(struct dquot_warn *warn) static int ignore_hardlimit(struct dquot *dquot) { - struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; + struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; return capable(CAP_SYS_RESOURCE) && (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || @@ -1258,7 +1259,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, { qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes; - if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) || + if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) return 0; @@ -1283,7 +1284,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, dquot->dq_dqb.dqb_itime == 0) { prepare_warning(warn, dquot, QUOTA_NL_ISOFTWARN); dquot->dq_dqb.dqb_itime = get_seconds() + - sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; + sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type].dqi_igrace; } return 0; @@ -1296,7 +1297,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, qsize_t tspace; struct super_block *sb = dquot->dq_sb; - if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) || + if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) return 0; @@ -1327,7 +1328,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, if (!prealloc) { prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN); dquot->dq_dqb.dqb_btime = get_seconds() + - sb_dqopt(sb)->info[dquot->dq_type].dqi_bgrace; + sb_dqopt(sb)->info[dquot->dq_id.type].dqi_bgrace; } else /* @@ -1346,7 +1347,7 @@ static int info_idq_free(struct dquot *dquot, qsize_t inodes) if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit || - !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type)) + !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type)) return QUOTA_NL_NOWARN; newinodes = dquot->dq_dqb.dqb_curinodes - inodes; @@ -2362,9 +2363,9 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) memset(di, 0, sizeof(*di)); di->d_version = FS_DQUOT_VERSION; - di->d_flags = dquot->dq_type == USRQUOTA ? + di->d_flags = dquot->dq_id.type == USRQUOTA ? FS_USER_QUOTA : FS_GROUP_QUOTA; - di->d_id = dquot->dq_id; + di->d_id = from_kqid_munged(current_user_ns(), dquot->dq_id); spin_lock(&dq_data_lock); di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); @@ -2403,7 +2404,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) { struct mem_dqblk *dm = &dquot->dq_dqb; int check_blim = 0, check_ilim = 0; - struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; + struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; if (di->d_fieldmask & ~VFS_FS_DQ_MASK) return -EINVAL; diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index e41c1becf096..d65877fbe8f4 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -22,9 +22,10 @@ MODULE_LICENSE("GPL"); #define __QUOTA_QT_PARANOIA -static int get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth) +static int get_index(struct qtree_mem_dqinfo *info, struct kqid qid, int depth) { unsigned int epb = info->dqi_usable_bs >> 2; + qid_t id = from_kqid(&init_user_ns, qid); depth = info->dqi_qtree_depth - depth - 1; while (depth--) @@ -244,7 +245,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, /* This is enough as the block is already zeroed and the entry * list is empty... */ info->dqi_free_entry = blk; - mark_info_dirty(dquot->dq_sb, dquot->dq_type); + mark_info_dirty(dquot->dq_sb, dquot->dq_id.type); } /* Block will be full? */ if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { @@ -357,7 +358,7 @@ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info, */ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) { - int type = dquot->dq_type; + int type = dquot->dq_id.type; struct super_block *sb = dquot->dq_sb; ssize_t ret; char *ddquot = getdqbuf(info->dqi_entry_size); @@ -538,8 +539,9 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, ddquot += info->dqi_entry_size; } if (i == qtree_dqstr_in_blk(info)) { - quota_error(dquot->dq_sb, "Quota for id %u referenced " - "but not present", dquot->dq_id); + quota_error(dquot->dq_sb, + "Quota for id %u referenced but not present", + from_kqid(&init_user_ns, dquot->dq_id)); ret = -EIO; goto out_buf; } else { @@ -589,7 +591,7 @@ static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info, int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) { - int type = dquot->dq_type; + int type = dquot->dq_id.type; struct super_block *sb = dquot->dq_sb; loff_t offset; char *ddquot; @@ -607,8 +609,10 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) offset = find_dqentry(info, dquot); if (offset <= 0) { /* Entry not present? */ if (offset < 0) - quota_error(sb, "Can't read quota structure " - "for id %u", dquot->dq_id); + quota_error(sb,"Can't read quota structure " + "for id %u", + from_kqid(&init_user_ns, + dquot->dq_id)); dquot->dq_off = 0; set_bit(DQ_FAKE_B, &dquot->dq_flags); memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); @@ -626,7 +630,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) if (ret >= 0) ret = -EIO; quota_error(sb, "Error while reading quota structure for id %u", - dquot->dq_id); + from_kqid(&init_user_ns, dquot->dq_id)); set_bit(DQ_FAKE_B, &dquot->dq_flags); memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); kfree(ddquot); diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c index 34b37a67bb16..469c6848b322 100644 --- a/fs/quota/quota_v1.c +++ b/fs/quota/quota_v1.c @@ -54,7 +54,7 @@ static void v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m) static int v1_read_dqblk(struct dquot *dquot) { - int type = dquot->dq_type; + int type = dquot->dq_id.type; struct v1_disk_dqblk dqblk; if (!sb_dqopt(dquot->dq_sb)->files[type]) @@ -63,7 +63,8 @@ static int v1_read_dqblk(struct dquot *dquot) /* Set structure to 0s in case read fails/is after end of file */ memset(&dqblk, 0, sizeof(struct v1_disk_dqblk)); dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, (char *)&dqblk, - sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id)); + sizeof(struct v1_disk_dqblk), + v1_dqoff(from_kqid(&init_user_ns, dquot->dq_id))); v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk); if (dquot->dq_dqb.dqb_bhardlimit == 0 && @@ -78,12 +79,13 @@ static int v1_read_dqblk(struct dquot *dquot) static int v1_commit_dqblk(struct dquot *dquot) { - short type = dquot->dq_type; + short type = dquot->dq_id.type; ssize_t ret; struct v1_disk_dqblk dqblk; v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb); - if (dquot->dq_id == 0) { + if (((type == USRQUOTA) && uid_eq(dquot->dq_id.uid, GLOBAL_ROOT_UID)) || + ((type == GRPQUOTA) && gid_eq(dquot->dq_id.gid, GLOBAL_ROOT_GID))) { dqblk.dqb_btime = sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace; dqblk.dqb_itime = @@ -93,7 +95,7 @@ static int v1_commit_dqblk(struct dquot *dquot) if (sb_dqopt(dquot->dq_sb)->files[type]) ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), - v1_dqoff(dquot->dq_id)); + v1_dqoff(from_kqid(&init_user_ns, dquot->dq_id))); if (ret != sizeof(struct v1_disk_dqblk)) { quota_error(dquot->dq_sb, "dquota write failed"); if (ret >= 0) diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index f1ab3604db5a..02751ec695c5 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c @@ -196,7 +196,7 @@ static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot) struct v2r0_disk_dqblk *d = dp; struct mem_dqblk *m = &dquot->dq_dqb; struct qtree_mem_dqinfo *info = - sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit); d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit); @@ -206,7 +206,7 @@ static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot) d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit)); d->dqb_curspace = cpu_to_le64(m->dqb_curspace); d->dqb_btime = cpu_to_le64(m->dqb_btime); - d->dqb_id = cpu_to_le32(dquot->dq_id); + d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id)); if (qtree_entry_unused(info, dp)) d->dqb_itime = cpu_to_le64(1); } @@ -215,11 +215,13 @@ static int v2r0_is_id(void *dp, struct dquot *dquot) { struct v2r0_disk_dqblk *d = dp; struct qtree_mem_dqinfo *info = - sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; if (qtree_entry_unused(info, dp)) return 0; - return le32_to_cpu(d->dqb_id) == dquot->dq_id; + return qid_eq(make_kqid(&init_user_ns, dquot->dq_id.type, + le32_to_cpu(d->dqb_id)), + dquot->dq_id); } static void v2r1_disk2memdqb(struct dquot *dquot, void *dp) @@ -247,7 +249,7 @@ static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot) struct v2r1_disk_dqblk *d = dp; struct mem_dqblk *m = &dquot->dq_dqb; struct qtree_mem_dqinfo *info = - sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); @@ -257,7 +259,7 @@ static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot) d->dqb_bsoftlimit = cpu_to_le64(v2_stoqb(m->dqb_bsoftlimit)); d->dqb_curspace = cpu_to_le64(m->dqb_curspace); d->dqb_btime = cpu_to_le64(m->dqb_btime); - d->dqb_id = cpu_to_le32(dquot->dq_id); + d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id)); if (qtree_entry_unused(info, dp)) d->dqb_itime = cpu_to_le64(1); } @@ -266,26 +268,28 @@ static int v2r1_is_id(void *dp, struct dquot *dquot) { struct v2r1_disk_dqblk *d = dp; struct qtree_mem_dqinfo *info = - sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; if (qtree_entry_unused(info, dp)) return 0; - return le32_to_cpu(d->dqb_id) == dquot->dq_id; + return qid_eq(make_kqid(&init_user_ns, dquot->dq_id.type, + le32_to_cpu(d->dqb_id)), + dquot->dq_id); } static int v2_read_dquot(struct dquot *dquot) { - return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot); + return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot); } static int v2_write_dquot(struct dquot *dquot) { - return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot); + return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot); } static int v2_release_dquot(struct dquot *dquot) { - return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot); + return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot); } static int v2_free_file_info(struct super_block *sb, int type) diff --git a/include/linux/quota.h b/include/linux/quota.h index 8b2760427252..dcd5721e626d 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -419,10 +419,9 @@ struct dquot { atomic_t dq_count; /* Use count */ wait_queue_head_t dq_wait_unused; /* Wait queue for dquot to become unused */ struct super_block *dq_sb; /* superblock this applies to */ - unsigned int dq_id; /* ID this applies to (uid, gid) */ + struct kqid dq_id; /* ID this applies to (uid, gid, projid) */ loff_t dq_off; /* Offset of dquot on disk */ unsigned long dq_flags; /* See DQ_* */ - short dq_type; /* Type of quota */ struct mem_dqblk dq_dqb; /* Diskquota usage */ }; -- cgit v1.2.3 From 7b9c7321ca6b13d2baeb82b3c9c9f78e9885bcf5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 Sep 2012 04:05:34 -0700 Subject: userns: Convert struct dquot_warn Convert w_dq_id to be a struct kquid and remove the now unncessary w_dq_type. This is a simple conversion and enough other places have already been converted that this actually reduces the code complexity by a little bit, when removing now unnecessary type conversions. Cc: Jan Kara Signed-off-by: "Eric W. Biederman" --- fs/quota/dquot.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index efaeed35476f..f3db8456dc8a 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1132,8 +1132,7 @@ static void dquot_decr_space(struct dquot *dquot, qsize_t number) struct dquot_warn { struct super_block *w_sb; - qid_t w_dq_id; - short w_dq_type; + struct kqid w_dq_id; short w_type; }; @@ -1157,11 +1156,11 @@ static int need_print_warning(struct dquot_warn *warn) if (!flag_print_warnings) return 0; - switch (warn->w_dq_type) { + switch (warn->w_dq_id.type) { case USRQUOTA: - return current_fsuid() == warn->w_dq_id; + return current_fsuid() == warn->w_dq_id.uid; case GRPQUOTA: - return in_group_p(warn->w_dq_id); + return in_group_p(warn->w_dq_id.gid); } return 0; } @@ -1187,7 +1186,7 @@ static void print_warning(struct dquot_warn *warn) tty_write_message(tty, ": warning, "); else tty_write_message(tty, ": write failed, "); - tty_write_message(tty, quotatypes[warn->w_dq_type]); + tty_write_message(tty, quotatypes[warn->w_dq_id.type]); switch (warntype) { case QUOTA_NL_IHARDWARN: msg = " file limit reached.\r\n"; @@ -1220,8 +1219,7 @@ static void prepare_warning(struct dquot_warn *warn, struct dquot *dquot, return; warn->w_type = warntype; warn->w_sb = dquot->dq_sb; - warn->w_dq_id = from_kqid(&init_user_ns, dquot->dq_id); - warn->w_dq_type = dquot->dq_id.type; + warn->w_dq_id = dquot->dq_id; } /* @@ -1239,7 +1237,7 @@ static void flush_warnings(struct dquot_warn *warn) #ifdef CONFIG_PRINT_QUOTA_WARNING print_warning(&warn[i]); #endif - quota_send_warning(make_kqid(&init_user_ns, warn[i].w_dq_type, warn[i].w_dq_id), + quota_send_warning(warn[i].w_dq_id, warn[i].w_sb->s_dev, warn[i].w_type); } } -- cgit v1.2.3 From 1a06d420ce9d60b98f5bdf5fd6e4200abfbd3c35 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 Sep 2012 05:45:30 -0700 Subject: userns: Convert quota Now that the type changes are done, here is the final set of changes to make the quota code work when user namespaces are enabled. Small cleanups and fixes to make the code build when user namespaces are enabled. Cc: Jan Kara Signed-off-by: "Eric W. Biederman" --- fs/quota/dquot.c | 29 ++++++++++++++--------------- init/Kconfig | 1 - 2 files changed, 14 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index f3db8456dc8a..c4564d0a4a9b 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -253,8 +253,10 @@ static qsize_t inode_get_rsv_space(struct inode *inode); static void __dquot_initialize(struct inode *inode, int type); static inline unsigned int -hashfn(const struct super_block *sb, unsigned int id, int type) +hashfn(const struct super_block *sb, struct kqid qid) { + unsigned int id = from_kqid(&init_user_ns, qid); + int type = qid.type; unsigned long tmp; tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type); @@ -267,7 +269,7 @@ hashfn(const struct super_block *sb, unsigned int id, int type) static inline void insert_dquot_hash(struct dquot *dquot) { struct hlist_head *head; - head = dquot_hash + hashfn(dquot->dq_sb, from_kqid(&init_user_ns, dquot->dq_id), dquot->dq_id.type); + head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id); hlist_add_head(&dquot->dq_hash, head); } @@ -277,9 +279,8 @@ static inline void remove_dquot_hash(struct dquot *dquot) } static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, - unsigned int id, int type) + struct kqid qid) { - struct kqid qid = make_kqid(&init_user_ns, type, id); struct hlist_node *node; struct dquot *dquot; @@ -816,7 +817,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) INIT_LIST_HEAD(&dquot->dq_dirty); init_waitqueue_head(&dquot->dq_wait_unused); dquot->dq_sb = sb; - dquot->dq_id.type = type; + dquot->dq_id = make_kqid_invalid(type); atomic_set(&dquot->dq_count, 1); return dquot; @@ -832,28 +833,26 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) */ struct dquot *dqget(struct super_block *sb, struct kqid qid) { - unsigned int type = qid.type; - unsigned int id = from_kqid(&init_user_ns, qid); - unsigned int hashent = hashfn(sb, id, type); + unsigned int hashent = hashfn(sb, qid); struct dquot *dquot = NULL, *empty = NULL; - if (!sb_has_quota_active(sb, type)) + if (!sb_has_quota_active(sb, qid.type)) return NULL; we_slept: spin_lock(&dq_list_lock); spin_lock(&dq_state_lock); - if (!sb_has_quota_active(sb, type)) { + if (!sb_has_quota_active(sb, qid.type)) { spin_unlock(&dq_state_lock); spin_unlock(&dq_list_lock); goto out; } spin_unlock(&dq_state_lock); - dquot = find_dquot(hashent, sb, id, type); + dquot = find_dquot(hashent, sb, qid); if (!dquot) { if (!empty) { spin_unlock(&dq_list_lock); - empty = get_empty_dquot(sb, type); + empty = get_empty_dquot(sb, qid.type); if (!empty) schedule(); /* Try to wait for a moment... */ goto we_slept; @@ -1158,7 +1157,7 @@ static int need_print_warning(struct dquot_warn *warn) switch (warn->w_dq_id.type) { case USRQUOTA: - return current_fsuid() == warn->w_dq_id.uid; + return uid_eq(current_fsuid(), warn->w_dq_id.uid); case GRPQUOTA: return in_group_p(warn->w_dq_id.gid); } @@ -1898,9 +1897,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) if (!dquot_active(inode)) return 0; - if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) + if (iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(iattr->ia_uid)); - if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) + if (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)) transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(iattr->ia_gid)); ret = __dquot_transfer(inode, transfer_to); diff --git a/init/Kconfig b/init/Kconfig index 9c8aa8c49443..296d48b26034 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -927,7 +927,6 @@ config UIDGID_CONVERTED # Features depends on IMA = n depends on EVM = n - depends on QUOTA = n # Networking depends on NET_9P = n -- cgit v1.2.3 From 84f4141ee3ea11035f741d4298cb6bbad1850fcf Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Tue, 18 Sep 2012 11:27:22 -0500 Subject: jfs: Fix do_div precision in commit b40c2e66 In a hasty fix to replace a 64-bit division with do_div, I unintentionally assigned the divisor to a 32-bit variable. Signed-off-by: Dave Kleikamp Cc: Tino Reichardt --- fs/jfs/jfs_dmap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 174feb6a73c1..9a55f53be5ff 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -1641,14 +1641,15 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) /* max blkno / nblocks pairs to trim */ int count = 0, range_cnt; + u64 max_ranges; /* prevent others from writing new stuff here, while trimming */ IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); nblocks = bmp->db_agfree[agno]; - range_cnt = nblocks; - do_div(range_cnt, (int)minlen); - range_cnt = min(range_cnt + 1, 32 * 1024); + max_ranges = nblocks; + do_div(max_ranges, minlen); + range_cnt = min_t(u64, max_ranges + 1, 32 * 1024); totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS); if (totrim == NULL) { jfs_error(bmp->db_ipbmap->i_sb, -- cgit v1.2.3 From b5e2368baeddf401bf3da9e364fc1c96676279cd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 18 Sep 2012 13:33:44 -0400 Subject: ext4: re-enable -o discard functionality in no-journal mode This is a revert of commit b56ff9d397ce, which removed the call to ext4_issue_discard() to fix a BUG reported because ext4_issue_discard() was being called from inside a block group spinlock. As it turns out this bug had already been fixed by Lukas Czerner in commit 53fdcf992d61 by the simple expedient of moving when we call ext4_issue_discard() outside the spinlock. So it should be safe to re-enable this functionality, which I tested by putting an BUG_ON(in_atomic) just after the restored callsite to ext4_issue_discard(). Addresses-Google-Bug: #6750518 Signed-off-by: "Theodore Ts'o" Cc: Anatol Pomozov --- fs/ext4/mballoc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 2102c20f7e98..2c7c082b8169 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4656,6 +4656,8 @@ do_more: * with group lock held. generate_buddy look at * them with group lock_held */ + if (test_opt(sb, DISCARD)) + ext4_issue_discard(sb, block_group, bit, count); ext4_lock_group(sb, block_group); mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); mb_free_blocks(inode, &e4b, bit, count_clusters); -- cgit v1.2.3 From c9b92530a723ac5ef8e352885a1862b18f31b2f5 Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Tue, 18 Sep 2012 13:38:59 -0400 Subject: ext4: make orphan functions be no-op in no-journal mode Instead of checking whether the handle is valid, we check if journal is enabled. This avoids taking the s_orphan_lock mutex in all cases when there is no journal in use, including the error paths where ext4_orphan_del() is called with a handle set to NULL. Signed-off-by: Anatol Pomozov Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 37c03b32e194..8f4bda75122e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2369,7 +2369,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) struct ext4_iloc iloc; int err = 0, rc; - if (!ext4_handle_valid(handle)) + if (!EXT4_SB(sb)->s_journal) return 0; mutex_lock(&EXT4_SB(sb)->s_orphan_lock); @@ -2443,8 +2443,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) struct ext4_iloc iloc; int err = 0; - /* ext4_handle_valid() assumes a valid handle_t pointer */ - if (handle && !ext4_handle_valid(handle)) + if (!EXT4_SB(inode->i_sb)->s_journal) return 0; mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); @@ -2463,7 +2462,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) * transaction handle with which to update the orphan list on * disk, but we still need to remove the inode from the linked * list in memory. */ - if (sbi->s_journal && !handle) + if (!handle) goto out; err = ext4_reserve_inode_write(handle, inode, &iloc); -- cgit v1.2.3 From b161dfa6937ae46d50adce8a7c6b12233e96e7bd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 17 Sep 2012 22:31:38 +0200 Subject: vfs: dcache: use DCACHE_DENTRY_KILLED instead of DCACHE_DISCONNECTED in d_kill() IBM reported a soft lockup after applying the fix for the rename_lock deadlock. Commit c83ce989cb5f ("VFS: Fix the nfs sillyrename regression in kernel 2.6.38") was found to be the culprit. The nfs sillyrename fix used DCACHE_DISCONNECTED to indicate that the dentry was killed. This flag can be set on non-killed dentries too, which results in infinite retries when trying to traverse the dentry tree. This patch introduces a separate flag: DCACHE_DENTRY_KILLED, which is only set in d_kill() and makes try_to_ascend() test only this flag. IBM reported successful test results with this patch. Signed-off-by: Miklos Szeredi Cc: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/dcache.c | 4 ++-- include/linux/dcache.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 8086636bf796..16521a9f2038 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -389,7 +389,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) * Inform try_to_ascend() that we are no longer attached to the * dentry tree */ - dentry->d_flags |= DCACHE_DISCONNECTED; + dentry->d_flags |= DCACHE_DENTRY_KILLED; if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); @@ -1048,7 +1048,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq * or deletion */ if (new != old->d_parent || - (old->d_flags & DCACHE_DISCONNECTED) || + (old->d_flags & DCACHE_DENTRY_KILLED) || (!locked && read_seqretry(&rename_lock, seq))) { spin_unlock(&new->d_lock); new = NULL; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index caa34e50537e..59200795482e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -206,6 +206,8 @@ struct dentry_operations { #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) +#define DCACHE_DENTRY_KILLED 0x100000 + extern seqlock_t rename_lock; static inline int dname_external(struct dentry *dentry) -- cgit v1.2.3 From c73f693989d7a7d99ec66a7065295a0c93d0b127 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Sep 2012 14:21:01 -0400 Subject: cifs: fix return value in cifsConvertToUTF16 This function returns the wrong value, which causes the callers to get the length of the resulting pathname wrong when it contains non-ASCII characters. This seems to fix https://bugzilla.samba.org/show_bug.cgi?id=6767 Cc: Reported-by: Baldvin Kovacs Reported-and-Tested-by: Nicolas Lefebvre Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifs_unicode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 7dab9c04ad52..53cf2aabce87 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -328,7 +328,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, } ctoUTF16_out: - return i; + return j; } #ifdef CONFIG_CIFS_SMB2 -- cgit v1.2.3 From 0ba6e5368c302819da7aff537e0d7eff2616c6a6 Mon Sep 17 00:00:00 2001 From: Ben Myers Date: Thu, 13 Sep 2012 16:18:47 -0500 Subject: xfs: stop the sync worker before xfs_unmountfs Cancel work of the xfs_sync_worker before teardown of the log in xfs_unmountfs. This prevents occasional crashes on unmount like so: PID: 21602 TASK: ee9df060 CPU: 0 COMMAND: "kworker/0:3" #0 [c5377d28] crash_kexec at c0292c94 #1 [c5377d80] oops_end at c07090c2 #2 [c5377d98] no_context at c06f614e #3 [c5377dbc] __bad_area_nosemaphore at c06f6281 #4 [c5377df4] bad_area_nosemaphore at c06f629b #5 [c5377e00] do_page_fault at c070b0cb #6 [c5377e7c] error_code (via page_fault) at c070892c EAX: f300c6a8 EBX: f300c6a8 ECX: 000000c0 EDX: 000000c0 EBP: c5377ed0 DS: 007b ESI: 00000000 ES: 007b EDI: 00000001 GS: ffffad20 CS: 0060 EIP: c0481ad0 ERR: ffffffff EFLAGS: 00010246 #7 [c5377eb0] atomic64_read_cx8 at c0481ad0 #8 [c5377ebc] xlog_assign_tail_lsn_locked at f7cc7c6e [xfs] #9 [c5377ed4] xfs_trans_ail_delete_bulk at f7ccd520 [xfs] #10 [c5377f0c] xfs_buf_iodone at f7ccb602 [xfs] #11 [c5377f24] xfs_buf_do_callbacks at f7cca524 [xfs] #12 [c5377f30] xfs_buf_iodone_callbacks at f7cca5da [xfs] #13 [c5377f4c] xfs_buf_iodone_work at f7c718d0 [xfs] #14 [c5377f58] process_one_work at c024ee4c #15 [c5377f98] worker_thread at c024f43d #16 [c5377fbc] kthread at c025326b #17 [c5377fe8] kernel_thread_helper at c070e834 PID: 26653 TASK: e79143b0 CPU: 3 COMMAND: "umount" #0 [cde0fda0] __schedule at c0706595 #1 [cde0fe28] schedule at c0706b89 #2 [cde0fe30] schedule_timeout at c0705600 #3 [cde0fe94] __down_common at c0706098 #4 [cde0fec8] __down at c0706122 #5 [cde0fed0] down at c025936f #6 [cde0fee0] xfs_buf_lock at f7c7131d [xfs] #7 [cde0ff00] xfs_freesb at f7cc2236 [xfs] #8 [cde0ff10] xfs_fs_put_super at f7c80f21 [xfs] #9 [cde0ff1c] generic_shutdown_super at c0333d7a #10 [cde0ff38] kill_block_super at c0333e0f #11 [cde0ff48] deactivate_locked_super at c0334218 #12 [cde0ff58] deactivate_super at c033495d #13 [cde0ff68] mntput_no_expire at c034bc13 #14 [cde0ff7c] sys_umount at c034cc69 #15 [cde0ffa0] sys_oldumount at c034ccd4 #16 [cde0ffb0] system_call at c0707e66 commit 11159a05 added this to xfs_log_unmount and needs to be cleaned up at a later date. Signed-off-by: Ben Myers Reviewed-by: Dave Chinner Reviewed-by: Mark Tinguely --- fs/xfs/xfs_super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index bdaf4cb9f4a2..19e2380fb867 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -919,6 +919,7 @@ xfs_fs_put_super( struct xfs_mount *mp = XFS_M(sb); xfs_filestream_unmount(mp); + cancel_delayed_work_sync(&mp->m_sync_work); xfs_unmountfs(mp); xfs_syncd_stop(mp); xfs_freesb(mp); -- cgit v1.2.3 From 59e31c156a24d483bbd2ea07d4dc96043a55b6bc Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 19 Sep 2012 00:55:56 -0400 Subject: ext4: fix online resizing when the # of block groups is constant Commit 1c6bd7173d66b3 introduced a regression where an online resize operation which did not change the number of block groups would fail, i.e: mke2fs -t /dev/vdc 60000 mount /dev/vdc resize2fs /dev/vdc 60001 This was due to a bug in the logic regarding when to try converting the filesystem to use meta_bg. Also fix up a number of other minor issues with the online resizing code: (a) Fix a sparse warning; (b) only check to make sure the device is large enough once, instead of multiple times through the resize loop. Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3c9367b9bebd..ee985ca05100 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1772,23 +1772,18 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) handle_t *handle; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - struct ext4_inode_info *ei = 0; + struct ext4_inode_info *ei = EXT4_I(inode); ext4_fsblk_t nr; int i, ret, err = 0; int credits = 1; ext4_msg(sb, KERN_INFO, "Converting file system to meta_bg"); - if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) { + if (inode) { if (es->s_reserved_gdt_blocks) { ext4_error(sb, "Unexpected non-zero " "s_reserved_gdt_blocks"); return -EPERM; } - if (!inode) { - ext4_error(sb, "Unexpected NULL resize_inode"); - return -EPERM; - } - ei = EXT4_I(inode); /* Do a quick sanity check of the resize inode */ if (inode->i_blocks != 1 << (inode->i_blkbits - 9)) @@ -1873,12 +1868,19 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; int meta_bg; + /* See if the device is actually as big as what was requested */ + bh = sb_bread(sb, n_blocks_count - 1); + if (!bh) { + ext4_warning(sb, "can't read last block, resize aborted"); + return -ENOSPC; + } + brelse(bh); + retry: o_blocks_count = ext4_blocks_count(es); - if (test_opt(sb, DEBUG)) - ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu " - "to %llu blocks", o_blocks_count, n_blocks_count); + ext4_msg(sb, KERN_INFO, "resizing filesystem from %llu " + "to %llu blocks", o_blocks_count, n_blocks_count); if (n_blocks_count < o_blocks_count) { /* On-line shrinking not supported */ @@ -1922,7 +1924,7 @@ retry: } } - if ((!resize_inode && !meta_bg) || n_group == o_group) { + if ((!resize_inode && !meta_bg) || n_blocks_count == o_blocks_count) { err = ext4_convert_meta_bg(sb, resize_inode); if (err) goto out; @@ -1937,14 +1939,6 @@ retry: } } - /* See if the device is actually as big as what was requested */ - bh = sb_bread(sb, n_blocks_count - 1); - if (!bh) { - ext4_warning(sb, "can't read last block, resize aborted"); - return -ENOSPC; - } - brelse(bh); - /* extend the last group */ if (n_group == o_group) add = n_blocks_count - o_blocks_count; @@ -2005,8 +1999,6 @@ out: free_flex_gd(flex_gd); if (resize_inode != NULL) iput(resize_inode); - if (test_opt(sb, DEBUG)) - ext4_msg(sb, KERN_DEBUG, "resized filesystem to %llu", - n_blocks_count); + ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count); return err; } -- cgit v1.2.3 From 18888cf0883c286f238d44ee565530fe82752f06 Mon Sep 17 00:00:00 2001 From: Andrey Sidorov Date: Wed, 19 Sep 2012 14:14:53 -0400 Subject: ext4: speed up truncate/unlink by not using bforget() unless needed Do not iterate over data blocks scanning for bh's to forget as they're never exist. This improves time taken by unlink / truncate syscall. Tested by continuously truncating file that is being written by dd. Another test is rm -rf of linux tree while tar unpacks it. With ordered data mode condition unlikely(!tbh) was always met in ext4_free_blocks. With journal data mode tbh was found only few times, so optimisation is also possible. Unlinking fallocated 60G file after doing sync && echo 3 > /proc/sys/vm/drop_caches && time rm --help X86 before (linux 3.6-rc4): # time rm -f test1 real 0m2.710s user 0m0.000s sys 0m1.530s X86 after: # time rm -f test1 real 0m0.644s user 0m0.003s sys 0m0.060s MIPS before (linux 2.6.37): # time rm -f test1 real 0m 4.93s user 0m 0.00s sys 0m 4.61s MIPS after: # time rm -f test1 real 0m 0.16s user 0m 0.00s sys 0m 0.06s Signed-off-by: "Theodore Ts'o" Signed-off-by: Andrey Sidorov --- fs/ext4/extents.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index cc6d2b984e8f..a510917c175a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2318,10 +2318,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); unsigned short ee_len = ext4_ext_get_actual_len(ex); ext4_fsblk_t pblk; - int flags = EXT4_FREE_BLOCKS_FORGET; + int flags = 0; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - flags |= EXT4_FREE_BLOCKS_METADATA; + flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; + else if (ext4_should_journal_data(inode)) + flags |= EXT4_FREE_BLOCKS_FORGET; + /* * For bigalloc file systems, we never free a partial cluster * at the beginning of the extent. Instead, we make a note -- cgit v1.2.3 From 00d4e7362ed01987183e9528295de3213031309c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 19 Sep 2012 22:42:36 -0400 Subject: ext4: fix potential deadlock in ext4_nonda_switch() In ext4_nonda_switch(), if the file system is getting full we used to call writeback_inodes_sb_if_idle(). The problem is that we can be holding i_mutex already, and this causes a potential deadlock when writeback_inodes_sb_if_idle() when it tries to take s_umount. (See lockdep output below). As it turns out we don't need need to hold s_umount; the fact that we are in the middle of the write(2) system call will keep the superblock pinned. Unfortunately writeback_inodes_sb() checks to make sure s_umount is taken, and the VFS uses a different mechanism for making sure the file system doesn't get unmounted out from under us. The simplest way of dealing with this is to just simply grab s_umount using a trylock, and skip kicking the writeback flusher thread in the very unlikely case that we can't take a read lock on s_umount without blocking. Also, we now check the cirteria for kicking the writeback thread before we decide to whether to fall back to non-delayed writeback, so if there are any outstanding delayed allocation writes, we try to get them resolved as soon as possible. [ INFO: possible circular locking dependency detected ] 3.6.0-rc1-00042-gce894ca #367 Not tainted ------------------------------------------------------- dd/8298 is trying to acquire lock: (&type->s_umount_key#18){++++..}, at: [] writeback_inodes_sb_if_idle+0x28/0x46 but task is already holding lock: (&sb->s_type->i_mutex_key#8){+.+...}, at: [] generic_file_aio_write+0x5f/0xd3 which lock already depends on the new lock. 2 locks held by dd/8298: #0: (sb_writers#2){.+.+.+}, at: [] generic_file_aio_write+0x56/0xd3 #1: (&sb->s_type->i_mutex_key#8){+.+...}, at: [] generic_file_aio_write+0x5f/0xd3 stack backtrace: Pid: 8298, comm: dd Not tainted 3.6.0-rc1-00042-gce894ca #367 Call Trace: [] ? console_unlock+0x345/0x372 [] print_circular_bug+0x190/0x19d [] __lock_acquire+0x86d/0xb6c [] ? mark_held_locks+0x5c/0x7b [] lock_acquire+0x66/0xb9 [] ? writeback_inodes_sb_if_idle+0x28/0x46 [] down_read+0x28/0x58 [] ? writeback_inodes_sb_if_idle+0x28/0x46 [] writeback_inodes_sb_if_idle+0x28/0x46 [] ext4_nonda_switch+0xe1/0xf4 [] ext4_da_write_begin+0x27/0x193 [] generic_file_buffered_write+0xc8/0x1bb [] __generic_file_aio_write+0x1dd/0x205 [] generic_file_aio_write+0x78/0xd3 [] ext4_file_write+0x480/0x4a6 [] ? __lock_acquire+0x41e/0xb6c [] ? sched_clock_cpu+0x11a/0x13e [] ? trace_hardirqs_off+0xb/0xd [] ? local_clock+0x37/0x4e [] do_sync_write+0x67/0x9d [] ? wait_on_retry_sync_kiocb+0x44/0x44 [] vfs_write+0x7b/0xe6 [] sys_write+0x3b/0x64 [] syscall_call+0x7/0xb Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/inode.c | 17 ++++++++++------- fs/fs-writeback.c | 1 + 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ca76b5ed6c9e..0a31197590d7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2462,6 +2462,16 @@ static int ext4_nonda_switch(struct super_block *sb) free_blocks = EXT4_C2B(sbi, percpu_counter_read_positive(&sbi->s_freeclusters_counter)); dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); + /* + * Start pushing delalloc when 1/2 of free blocks are dirty. + */ + if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && + !writeback_in_progress(sb->s_bdi) && + down_read_trylock(&sb->s_umount)) { + writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); + up_read(&sb->s_umount); + } + if (2 * free_blocks < 3 * dirty_blocks || free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { /* @@ -2470,13 +2480,6 @@ static int ext4_nonda_switch(struct super_block *sb) */ return 1; } - /* - * Even if we don't switch but are nearing capacity, - * start pushing delalloc when 1/2 of free blocks are dirty. - */ - if (free_blocks < 2 * dirty_blocks) - writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE); - return 0; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index be3efc4f64f4..5602d73d4ec7 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -63,6 +63,7 @@ int writeback_in_progress(struct backing_dev_info *bdi) { return test_bit(BDI_writeback_running, &bdi->state); } +EXPORT_SYMBOL(writeback_in_progress); static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) { -- cgit v1.2.3 From 170782eb89462d30302cec12378253115b492b38 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:25:39 -0800 Subject: userns: Convert fat to use kuid/kgid where appropriate Acked-by: OGAWA Hirofumi Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/fat/fat.h | 4 ++-- fs/fat/file.c | 6 +++--- fs/fat/inode.c | 18 ++++++++++++------ init/Kconfig | 1 - 4 files changed, 17 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 2deeeb86f331..7d8e0dcac5d5 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -23,8 +23,8 @@ #define FAT_ERRORS_RO 3 /* remount r/o on error */ struct fat_mount_options { - uid_t fs_uid; - gid_t fs_gid; + kuid_t fs_uid; + kgid_t fs_gid; unsigned short fs_fmask; unsigned short fs_dmask; unsigned short codepage; /* Codepage for shortname conversions */ diff --git a/fs/fat/file.c b/fs/fat/file.c index e007b8bd8e5e..a62e0ecbe2db 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -352,7 +352,7 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) { umode_t allow_utime = sbi->options.allow_utime; - if (current_fsuid() != inode->i_uid) { + if (!uid_eq(current_fsuid(), inode->i_uid)) { if (in_group_p(inode->i_gid)) allow_utime >>= 3; if (allow_utime & MAY_WRITE) @@ -407,9 +407,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) } if (((attr->ia_valid & ATTR_UID) && - (attr->ia_uid != sbi->options.fs_uid)) || + (!uid_eq(attr->ia_uid, sbi->options.fs_uid))) || ((attr->ia_valid & ATTR_GID) && - (attr->ia_gid != sbi->options.fs_gid)) || + (!gid_eq(attr->ia_gid, sbi->options.fs_gid))) || ((attr->ia_valid & ATTR_MODE) && (attr->ia_mode & ~FAT_VALID_MODE))) error = -EPERM; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 05e897fe9866..47d9eb0be886 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -791,10 +791,12 @@ static int fat_show_options(struct seq_file *m, struct dentry *root) struct fat_mount_options *opts = &sbi->options; int isvfat = opts->isvfat; - if (opts->fs_uid != 0) - seq_printf(m, ",uid=%u", opts->fs_uid); - if (opts->fs_gid != 0) - seq_printf(m, ",gid=%u", opts->fs_gid); + if (!uid_eq(opts->fs_uid, GLOBAL_ROOT_UID)) + seq_printf(m, ",uid=%u", + from_kuid_munged(&init_user_ns, opts->fs_uid)); + if (!gid_eq(opts->fs_gid, GLOBAL_ROOT_GID)) + seq_printf(m, ",gid=%u", + from_kgid_munged(&init_user_ns, opts->fs_gid)); seq_printf(m, ",fmask=%04o", opts->fs_fmask); seq_printf(m, ",dmask=%04o", opts->fs_dmask); if (opts->allow_utime) @@ -1037,12 +1039,16 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, case Opt_uid: if (match_int(&args[0], &option)) return 0; - opts->fs_uid = option; + opts->fs_uid = make_kuid(current_user_ns(), option); + if (!uid_valid(opts->fs_uid)) + return 0; break; case Opt_gid: if (match_int(&args[0], &option)) return 0; - opts->fs_gid = option; + opts->fs_gid = make_kgid(current_user_ns(), option); + if (!gid_valid(opts->fs_gid)) + return 0; break; case Opt_umask: if (match_octal(&args[0], &option)) diff --git a/init/Kconfig b/init/Kconfig index 296d48b26034..60bdff259b27 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -952,7 +952,6 @@ config UIDGID_CONVERTED depends on ECRYPT_FS = n depends on EFS_FS = n depends on EXOFS_FS = n - depends on FAT_FS = n depends on FUSE_FS = n depends on GFS2_FS = n depends on HFS_FS = n -- cgit v1.2.3 From bef53b01faeb791e27605cba1a71ba21364cb23e Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 20 Sep 2012 11:35:38 -0400 Subject: ext4: remove erroneous ext4_superblock_csum_set() in update_backups() The update_backups() function is used to backup all the metadata blocks, so we should not take it for granted that 'data' is pointed to a super block and use ext4_superblock_csum_set to calculate the checksum there. In case where the data is a group descriptor block, it will corrupt the last group descriptor, and then e2fsck will complain about it it. As all the metadata checksums should already be OK when we do the backup, remove the wrong ext4_superblock_csum_set and it should be just fine. Reported-by: "Theodore Ts'o" Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/resize.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ee985ca05100..9f821ce39800 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1049,8 +1049,6 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, goto exit_err; } - ext4_superblock_csum_set(sb, (struct ext4_super_block *)data); - if (meta_bg == 0) { group = ext4_list_backups(sb, &three, &five, &seven); last = sbi->s_groups_count; -- cgit v1.2.3 From 80c9d03c22f13a17df67b4b99a83ed5e9acf6093 Mon Sep 17 00:00:00 2001 From: Chuansheng Liu Date: Tue, 18 Sep 2012 01:43:44 +0800 Subject: pstore: Avoid recursive spinlocks in the oops_in_progress case Like 8250 driver, when pstore is registered as a console, to avoid recursive spinlocks when panic happening, change the spin_lock_irqsave to spin_trylock_irqsave when oops_in_progress is true. Signed-off-by: liu chuansheng Signed-off-by: Anton Vorontsov --- fs/pstore/platform.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 6c23eab7f76c..a40da07e93d6 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -164,7 +164,13 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) if (c > psinfo->bufsize) c = psinfo->bufsize; - spin_lock_irqsave(&psinfo->buf_lock, flags); + + if (oops_in_progress) { + if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) + break; + } else { + spin_lock_irqsave(&psinfo->buf_lock, flags); + } memcpy(psinfo->buf, s, c); psinfo->write(PSTORE_TYPE_CONSOLE, 0, NULL, 0, c, psinfo); spin_unlock_irqrestore(&psinfo->buf_lock, flags); -- cgit v1.2.3 From 44a075bde9ab32d13bb2ff89f3e72fd869f272c4 Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Fri, 21 Sep 2012 08:21:01 +0800 Subject: btrfs: fix the commment for the action flags in delayed-ref.h The action field has been merged into struct btrfs_delayed_ref_node, and no struct btrfs_delayed_ref is available now. Signed-off-by: Wang Sheng-Hui Signed-off-by: Jiri Kosina --- fs/btrfs/delayed-ref.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 413927fb9957..25589456d1c9 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -18,7 +18,7 @@ #ifndef __DELAYED_REF__ #define __DELAYED_REF__ -/* these are the possible values of struct btrfs_delayed_ref->action */ +/* these are the possible values of struct btrfs_delayed_ref_node->action */ #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ -- cgit v1.2.3 From 7dfd8cc536dc4984ee92d9a8863ea705b955bc37 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 20 Sep 2012 15:49:57 +0800 Subject: fs/fs-writeback.c: cleanup riteback_sb_inodes kerneldoc Argument @only_this_sb has been removed. Signed-off-by: Liu Bo Signed-off-by: Jiri Kosina --- fs/fs-writeback.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8f660dd6137a..3451d3769598 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -582,10 +582,6 @@ static long writeback_chunk_size(struct backing_dev_info *bdi, /* * Write a portion of b_io inodes which belong to @sb. * - * If @only_this_sb is true, then find and write all such - * inodes. Otherwise write only ones which go sequentially - * in reverse order. - * * Return the number of pages and/or inodes written. */ static long writeback_sb_inodes(struct super_block *sb, -- cgit v1.2.3 From a0eb3a05a8cbe9cd1a41dde3d1b2e5bcc10634f2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:19:25 -0800 Subject: userns: Convert hugetlbfs to use kuid/kgid where appropriate Note sysctl_hugetlb_shm_group can only be written in the root user in the initial user namespace, so we can assume sysctl_hugetlb_shm_group is in the initial user namespace. Cc: William Irwin Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/hugetlbfs/inode.c | 16 +++++++++++----- init/Kconfig | 1 - 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8349a899912e..6e572c4fbf68 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -42,8 +42,8 @@ static const struct inode_operations hugetlbfs_dir_inode_operations; static const struct inode_operations hugetlbfs_inode_operations; struct hugetlbfs_config { - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; umode_t mode; long nr_blocks; long nr_inodes; @@ -785,13 +785,17 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) case Opt_uid: if (match_int(&args[0], &option)) goto bad_val; - pconfig->uid = option; + pconfig->uid = make_kuid(current_user_ns(), option); + if (!uid_valid(pconfig->uid)) + goto bad_val; break; case Opt_gid: if (match_int(&args[0], &option)) goto bad_val; - pconfig->gid = option; + pconfig->gid = make_kgid(current_user_ns(), option); + if (!gid_valid(pconfig->gid)) + goto bad_val; break; case Opt_mode: @@ -924,7 +928,9 @@ static struct vfsmount *hugetlbfs_vfsmount; static int can_do_hugetlb_shm(void) { - return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); + kgid_t shm_group; + shm_group = make_kgid(&init_user_ns, sysctl_hugetlb_shm_group); + return capable(CAP_IPC_LOCK) || in_group_p(shm_group); } struct file *hugetlb_file_setup(const char *name, unsigned long addr, diff --git a/init/Kconfig b/init/Kconfig index 96007af67645..7ee6e19632dd 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -954,7 +954,6 @@ config UIDGID_CONVERTED depends on HFS_FS = n depends on HFSPLUS_FS = n depends on HPFS_FS = n - depends on HUGETLBFS = n depends on ISO9660_FS = n depends on JFFS2_FS = n depends on JFS_FS = n -- cgit v1.2.3 From c010d1ff4f69c9f4aa331dfd8266262fb1b478ce Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 15:58:38 -0800 Subject: userns: Convert adfs to use kuid and kgid where appropriate Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/adfs/adfs.h | 4 ++-- fs/adfs/inode.c | 4 ++-- fs/adfs/super.c | 21 +++++++++++++-------- init/Kconfig | 1 - 4 files changed, 17 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 718ac1f440c6..585adafb0cc2 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -46,8 +46,8 @@ struct adfs_sb_info { struct adfs_discmap *s_map; /* bh list containing map */ struct adfs_dir_ops *s_dir; /* directory operations */ - uid_t s_uid; /* owner uid */ - gid_t s_gid; /* owner gid */ + kuid_t s_uid; /* owner uid */ + kgid_t s_gid; /* owner gid */ umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ umode_t s_other_mask; /* ADFS other perm -> unix perm */ int s_ftsuffix; /* ,xyz hex filetype suffix option */ diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 1dab6a174d6a..e9bad5093a3f 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -304,8 +304,8 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr) * we can't change the UID or GID of any file - * we have a global UID/GID in the superblock */ - if ((ia_valid & ATTR_UID && attr->ia_uid != ADFS_SB(sb)->s_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != ADFS_SB(sb)->s_gid)) + if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, ADFS_SB(sb)->s_uid)) || + (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, ADFS_SB(sb)->s_gid))) error = -EPERM; if (error) diff --git a/fs/adfs/super.c b/fs/adfs/super.c index bdaec92353c2..22a0d7ed5fa1 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "adfs.h" #include "dir_f.h" #include "dir_fplus.h" @@ -130,10 +131,10 @@ static int adfs_show_options(struct seq_file *seq, struct dentry *root) { struct adfs_sb_info *asb = ADFS_SB(root->d_sb); - if (asb->s_uid != 0) - seq_printf(seq, ",uid=%u", asb->s_uid); - if (asb->s_gid != 0) - seq_printf(seq, ",gid=%u", asb->s_gid); + if (!uid_eq(asb->s_uid, GLOBAL_ROOT_UID)) + seq_printf(seq, ",uid=%u", from_kuid_munged(&init_user_ns, asb->s_uid)); + if (!gid_eq(asb->s_gid, GLOBAL_ROOT_GID)) + seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, asb->s_gid)); if (asb->s_owner_mask != ADFS_DEFAULT_OWNER_MASK) seq_printf(seq, ",ownmask=%o", asb->s_owner_mask); if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK) @@ -175,12 +176,16 @@ static int parse_options(struct super_block *sb, char *options) case Opt_uid: if (match_int(args, &option)) return -EINVAL; - asb->s_uid = option; + asb->s_uid = make_kuid(current_user_ns(), option); + if (!uid_valid(asb->s_uid)) + return -EINVAL; break; case Opt_gid: if (match_int(args, &option)) return -EINVAL; - asb->s_gid = option; + asb->s_gid = make_kgid(current_user_ns(), option); + if (!gid_valid(asb->s_gid)) + return -EINVAL; break; case Opt_ownmask: if (match_octal(args, &option)) @@ -369,8 +374,8 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = asb; /* set default options */ - asb->s_uid = 0; - asb->s_gid = 0; + asb->s_uid = GLOBAL_ROOT_UID; + asb->s_gid = GLOBAL_ROOT_GID; asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK; asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK; asb->s_ftsuffix = 0; diff --git a/init/Kconfig b/init/Kconfig index 485c60ae2aed..20ddf0d7dec0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -933,7 +933,6 @@ config UIDGID_CONVERTED # Filesystems depends on 9P_FS = n - depends on ADFS_FS = n depends on AFFS_FS = n depends on AFS_FS = n depends on AUTOFS4_FS = n -- cgit v1.2.3 From 31aba059bb9522ff730301a7d567117fcff0952c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 10:51:24 -0800 Subject: userns: Convert befs to use kuid/kgid where appropriate Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/befs/befs.h | 4 ++-- fs/befs/linuxvfs.c | 27 +++++++++++++++++++-------- init/Kconfig | 1 - 3 files changed, 21 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/befs/befs.h b/fs/befs/befs.h index d9a40abda6b7..b26642839156 100644 --- a/fs/befs/befs.h +++ b/fs/befs/befs.h @@ -20,8 +20,8 @@ typedef u64 befs_blocknr_t; */ typedef struct befs_mount_options { - gid_t gid; - uid_t uid; + kgid_t gid; + kuid_t uid; int use_gid; int use_uid; int debug; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index cf7f3c67c8b7..7f73a692bfd0 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "befs.h" #include "btree.h" @@ -352,9 +353,11 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) */ inode->i_uid = befs_sb->mount_opts.use_uid ? - befs_sb->mount_opts.uid : (uid_t) fs32_to_cpu(sb, raw_inode->uid); + befs_sb->mount_opts.uid : + make_kuid(&init_user_ns, fs32_to_cpu(sb, raw_inode->uid)); inode->i_gid = befs_sb->mount_opts.use_gid ? - befs_sb->mount_opts.gid : (gid_t) fs32_to_cpu(sb, raw_inode->gid); + befs_sb->mount_opts.gid : + make_kgid(&init_user_ns, fs32_to_cpu(sb, raw_inode->gid)); set_nlink(inode, 1); @@ -674,10 +677,12 @@ parse_options(char *options, befs_mount_options * opts) char *p; substring_t args[MAX_OPT_ARGS]; int option; + kuid_t uid; + kgid_t gid; /* Initialize options */ - opts->uid = 0; - opts->gid = 0; + opts->uid = GLOBAL_ROOT_UID; + opts->gid = GLOBAL_ROOT_GID; opts->use_uid = 0; opts->use_gid = 0; opts->iocharset = NULL; @@ -696,23 +701,29 @@ parse_options(char *options, befs_mount_options * opts) case Opt_uid: if (match_int(&args[0], &option)) return 0; - if (option < 0) { + uid = INVALID_UID; + if (option >= 0) + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) { printk(KERN_ERR "BeFS: Invalid uid %d, " "using default\n", option); break; } - opts->uid = option; + opts->uid = uid; opts->use_uid = 1; break; case Opt_gid: if (match_int(&args[0], &option)) return 0; - if (option < 0) { + gid = INVALID_GID; + if (option >= 0) + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) { printk(KERN_ERR "BeFS: Invalid gid %d, " "using default\n", option); break; } - opts->gid = option; + opts->gid = gid; opts->use_gid = 1; break; case Opt_charset: diff --git a/init/Kconfig b/init/Kconfig index 20ddf0d7dec0..d9bb344140d0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -936,7 +936,6 @@ config UIDGID_CONVERTED depends on AFFS_FS = n depends on AFS_FS = n depends on AUTOFS4_FS = n - depends on BEFS_FS = n depends on BFS_FS = n depends on BTRFS_FS = n depends on CEPH_FS = n -- cgit v1.2.3 From a7d9cfe97b450b27c82e6e41b2fde6214708560d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 11:06:08 -0800 Subject: userns: Convert cramfs to use kuid/kgid where appropriate Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/cramfs/inode.c | 4 ++-- init/Kconfig | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 28cca01ca9c9..c6c3f91ecf06 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -90,8 +90,8 @@ static struct inode *get_cramfs_inode(struct super_block *sb, } inode->i_mode = cramfs_inode->mode; - inode->i_uid = cramfs_inode->uid; - inode->i_gid = cramfs_inode->gid; + i_uid_write(inode, cramfs_inode->uid); + i_gid_write(inode, cramfs_inode->gid); /* if the lower 2 bits are zero, the inode contains data */ if (!(inode->i_ino & 3)) { diff --git a/init/Kconfig b/init/Kconfig index d9bb344140d0..5f846b57a750 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -941,7 +941,6 @@ config UIDGID_CONVERTED depends on CEPH_FS = n depends on CIFS = n depends on CODA_FS = n - depends on CRAMFS = n depends on ECRYPT_FS = n depends on EFS_FS = n depends on EXOFS_FS = n -- cgit v1.2.3 From cdf8c58a3546443073cab77d07a88ee439319c19 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:24:33 -0800 Subject: userns: Convert ecryptfs to use kuid/kgid where appropriate Cc: Tyler Hicks Cc: Dustin Kirkland Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/ecryptfs/main.c | 5 +++-- fs/ecryptfs/messaging.c | 5 ++--- init/Kconfig | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 2768138eefee..1d6ce91b7061 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -544,11 +544,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out_free; } - if (check_ruid && path.dentry->d_inode->i_uid != current_uid()) { + if (check_ruid && !uid_eq(path.dentry->d_inode->i_uid, current_uid())) { rc = -EPERM; printk(KERN_ERR "Mount of device (uid: %d) not owned by " "requested user (uid: %d)\n", - path.dentry->d_inode->i_uid, current_uid()); + i_uid_read(path.dentry->d_inode), + from_kuid(&init_user_ns, current_uid())); goto out_free; } diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index b29bb8bfa8d9..5fa2471796c2 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -33,7 +33,7 @@ static struct hlist_head *ecryptfs_daemon_hash; struct mutex ecryptfs_daemon_hash_mux; static int ecryptfs_hash_bits; #define ecryptfs_current_euid_hash(uid) \ - hash_long((unsigned long)current_euid(), ecryptfs_hash_bits) + hash_long((unsigned long)from_kuid(&init_user_ns, current_euid()), ecryptfs_hash_bits) static u32 ecryptfs_msg_counter; static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; @@ -121,8 +121,7 @@ int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon) hlist_for_each_entry(*daemon, elem, &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()], euid_chain) { - if ((*daemon)->file->f_cred->euid == current_euid() && - (*daemon)->file->f_cred->user_ns == current_user_ns()) { + if (uid_eq((*daemon)->file->f_cred->euid, current_euid())) { rc = 0; goto out; } diff --git a/init/Kconfig b/init/Kconfig index 5f846b57a750..6120bae29aea 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -941,7 +941,6 @@ config UIDGID_CONVERTED depends on CEPH_FS = n depends on CIFS = n depends on CODA_FS = n - depends on ECRYPT_FS = n depends on EFS_FS = n depends on EXOFS_FS = n depends on FUSE_FS = n -- cgit v1.2.3 From 5d4ea4da6a6fb5c91e433db4addf3bb23866d821 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 11:10:33 -0800 Subject: userns: Convert efs to use kuid/kgid where appropriate Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/efs/inode.c | 4 ++-- init/Kconfig | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/efs/inode.c b/fs/efs/inode.c index bc84f365d75c..f3913eb2c474 100644 --- a/fs/efs/inode.c +++ b/fs/efs/inode.c @@ -97,8 +97,8 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino) inode->i_mode = be16_to_cpu(efs_inode->di_mode); set_nlink(inode, be16_to_cpu(efs_inode->di_nlink)); - inode->i_uid = (uid_t)be16_to_cpu(efs_inode->di_uid); - inode->i_gid = (gid_t)be16_to_cpu(efs_inode->di_gid); + i_uid_write(inode, (uid_t)be16_to_cpu(efs_inode->di_uid)); + i_gid_write(inode, (gid_t)be16_to_cpu(efs_inode->di_gid)); inode->i_size = be32_to_cpu(efs_inode->di_size); inode->i_atime.tv_sec = be32_to_cpu(efs_inode->di_atime); inode->i_mtime.tv_sec = be32_to_cpu(efs_inode->di_mtime); diff --git a/init/Kconfig b/init/Kconfig index 6120bae29aea..51084b02263f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -941,7 +941,6 @@ config UIDGID_CONVERTED depends on CEPH_FS = n depends on CIFS = n depends on CODA_FS = n - depends on EFS_FS = n depends on EXOFS_FS = n depends on FUSE_FS = n depends on GFS2_FS = n -- cgit v1.2.3 From d001b0536562f816af7eb9947e49de58f504958a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 11:11:19 -0800 Subject: userns: Convert exofs to use kuid/kgid where appropriate Cc: Benny Halevy Acked-by: Boaz Harrosh Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/exofs/inode.c | 8 ++++---- init/Kconfig | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 5badb0c039de..190c3d69e569 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1163,8 +1163,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) /* copy stuff from on-disk struct to in-memory struct */ inode->i_mode = le16_to_cpu(fcb.i_mode); - inode->i_uid = le32_to_cpu(fcb.i_uid); - inode->i_gid = le32_to_cpu(fcb.i_gid); + i_uid_write(inode, le32_to_cpu(fcb.i_uid)); + i_gid_write(inode, le32_to_cpu(fcb.i_gid)); set_nlink(inode, le16_to_cpu(fcb.i_links_count)); inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); @@ -1376,8 +1376,8 @@ static int exofs_update_inode(struct inode *inode, int do_sync) fcb = &args->fcb; fcb->i_mode = cpu_to_le16(inode->i_mode); - fcb->i_uid = cpu_to_le32(inode->i_uid); - fcb->i_gid = cpu_to_le32(inode->i_gid); + fcb->i_uid = cpu_to_le32(i_uid_read(inode)); + fcb->i_gid = cpu_to_le32(i_gid_read(inode)); fcb->i_links_count = cpu_to_le16(inode->i_nlink); fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec); diff --git a/init/Kconfig b/init/Kconfig index 51084b02263f..39e55d614f2c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -941,7 +941,6 @@ config UIDGID_CONVERTED depends on CEPH_FS = n depends on CIFS = n depends on CODA_FS = n - depends on EXOFS_FS = n depends on FUSE_FS = n depends on GFS2_FS = n depends on HFS_FS = n -- cgit v1.2.3 From 43b5e4ccd463a5b42e0531c32628ac462081cab8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:26:59 -0800 Subject: userns: Convert hfs to use kuid and kgid where appropriate Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/hfs/hfs_fs.h | 4 ++-- fs/hfs/inode.c | 4 ++-- fs/hfs/super.c | 16 +++++++++++++--- init/Kconfig | 1 - 4 files changed, 17 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 8275175acf6e..693df9fe52b2 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -134,8 +134,8 @@ struct hfs_sb_info { permissions on all files */ umode_t s_dir_umask; /* The umask applied to the permissions on all dirs */ - uid_t s_uid; /* The uid of all files */ - gid_t s_gid; /* The gid of all files */ + kuid_t s_uid; /* The uid of all files */ + kgid_t s_gid; /* The gid of all files */ int session, part; struct nls_table *nls_io, *nls_disk; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index ee1bc55677f1..5d5c22da1960 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -594,9 +594,9 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) /* no uig/gid changes and limit which mode bits can be set */ if (((attr->ia_valid & ATTR_UID) && - (attr->ia_uid != hsb->s_uid)) || + (!uid_eq(attr->ia_uid, hsb->s_uid))) || ((attr->ia_valid & ATTR_GID) && - (attr->ia_gid != hsb->s_gid)) || + (!gid_eq(attr->ia_gid, hsb->s_gid))) || ((attr->ia_valid & ATTR_MODE) && ((S_ISDIR(inode->i_mode) && (attr->ia_mode != inode->i_mode)) || diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4eb873e0c07b..0b63d135a092 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -138,7 +138,9 @@ static int hfs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator); if (sbi->s_type != cpu_to_be32(0x3f3f3f3f)) seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type); - seq_printf(seq, ",uid=%u,gid=%u", sbi->s_uid, sbi->s_gid); + seq_printf(seq, ",uid=%u,gid=%u", + from_kuid_munged(&init_user_ns, sbi->s_uid), + from_kgid_munged(&init_user_ns, sbi->s_gid)); if (sbi->s_file_umask != 0133) seq_printf(seq, ",file_umask=%o", sbi->s_file_umask); if (sbi->s_dir_umask != 0022) @@ -254,14 +256,22 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) printk(KERN_ERR "hfs: uid requires an argument\n"); return 0; } - hsb->s_uid = (uid_t)tmp; + hsb->s_uid = make_kuid(current_user_ns(), (uid_t)tmp); + if (!uid_valid(hsb->s_uid)) { + printk(KERN_ERR "hfs: invalid uid %d\n", tmp); + return 0; + } break; case opt_gid: if (match_int(&args[0], &tmp)) { printk(KERN_ERR "hfs: gid requires an argument\n"); return 0; } - hsb->s_gid = (gid_t)tmp; + hsb->s_gid = make_kgid(current_user_ns(), (gid_t)tmp); + if (!gid_valid(hsb->s_gid)) { + printk(KERN_ERR "hfs: invalid gid %d\n", tmp); + return 0; + } break; case opt_umask: if (match_octal(&args[0], &tmp)) { diff --git a/init/Kconfig b/init/Kconfig index 39e55d614f2c..6038d64b64b6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -943,7 +943,6 @@ config UIDGID_CONVERTED depends on CODA_FS = n depends on FUSE_FS = n depends on GFS2_FS = n - depends on HFS_FS = n depends on HFSPLUS_FS = n depends on HPFS_FS = n depends on ISO9660_FS = n -- cgit v1.2.3 From 16525e3f146fbba1ae43740c7d3895d4f396a768 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:27:17 -0800 Subject: userns: Convert hfsplus to use kuid and kgid where appropriate Signed-off-by: Eric W. Biederman --- fs/hfsplus/catalog.c | 4 ++-- fs/hfsplus/hfsplus_fs.h | 4 ++-- fs/hfsplus/inode.c | 8 ++++---- fs/hfsplus/options.c | 15 ++++++++++++--- init/Kconfig | 1 - 5 files changed, 20 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index ec2a9c23f0c9..798d9c4c5e71 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -80,8 +80,8 @@ void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms) perms->userflags = HFSPLUS_I(inode)->userflags; perms->mode = cpu_to_be16(inode->i_mode); - perms->owner = cpu_to_be32(inode->i_uid); - perms->group = cpu_to_be32(inode->i_gid); + perms->owner = cpu_to_be32(i_uid_read(inode)); + perms->group = cpu_to_be32(i_gid_read(inode)); if (S_ISREG(inode->i_mode)) perms->dev = cpu_to_be32(inode->i_nlink); diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 558dbb463a4e..c571de224b15 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -149,8 +149,8 @@ struct hfsplus_sb_info { u32 type; umode_t umask; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; int part, session; unsigned long flags; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 3d8b4a675ba0..2172aa5976f5 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -233,12 +233,12 @@ static void hfsplus_get_perms(struct inode *inode, mode = be16_to_cpu(perms->mode); - inode->i_uid = be32_to_cpu(perms->owner); - if (!inode->i_uid && !mode) + i_uid_write(inode, be32_to_cpu(perms->owner)); + if (!i_uid_read(inode) && !mode) inode->i_uid = sbi->uid; - inode->i_gid = be32_to_cpu(perms->group); - if (!inode->i_gid && !mode) + i_gid_write(inode, be32_to_cpu(perms->group)); + if (!i_gid_read(inode) && !mode) inode->i_gid = sbi->gid; if (dir) { diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 06fa5618600c..ed257c671615 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -135,14 +135,22 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) printk(KERN_ERR "hfs: uid requires an argument\n"); return 0; } - sbi->uid = (uid_t)tmp; + sbi->uid = make_kuid(current_user_ns(), (uid_t)tmp); + if (!uid_valid(sbi->uid)) { + printk(KERN_ERR "hfs: invalid uid specified\n"); + return 0; + } break; case opt_gid: if (match_int(&args[0], &tmp)) { printk(KERN_ERR "hfs: gid requires an argument\n"); return 0; } - sbi->gid = (gid_t)tmp; + sbi->gid = make_kgid(current_user_ns(), (gid_t)tmp); + if (!gid_valid(sbi->gid)) { + printk(KERN_ERR "hfs: invalid gid specified\n"); + return 0; + } break; case opt_part: if (match_int(&args[0], &sbi->part)) { @@ -215,7 +223,8 @@ int hfsplus_show_options(struct seq_file *seq, struct dentry *root) if (sbi->type != HFSPLUS_DEF_CR_TYPE) seq_printf(seq, ",type=%.4s", (char *)&sbi->type); seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, - sbi->uid, sbi->gid); + from_kuid_munged(&init_user_ns, sbi->uid), + from_kgid_munged(&init_user_ns, sbi->gid)); if (sbi->part >= 0) seq_printf(seq, ",part=%u", sbi->part); if (sbi->session >= 0) diff --git a/init/Kconfig b/init/Kconfig index 6038d64b64b6..9cf8cb1f618b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -943,7 +943,6 @@ config UIDGID_CONVERTED depends on CODA_FS = n depends on FUSE_FS = n depends on GFS2_FS = n - depends on HFSPLUS_FS = n depends on HPFS_FS = n depends on ISO9660_FS = n depends on JFFS2_FS = n -- cgit v1.2.3 From ba64e2b9e368fbe588ed5e3bb1494cc1dc4664a4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 11:35:50 -0800 Subject: userns: Convert isofs to use kuid/kgid where appropriate Signed-off-by: Eric W. Biederman --- fs/isofs/inode.c | 17 +++++++++++------ fs/isofs/isofs.h | 4 ++-- fs/isofs/rock.c | 4 ++-- init/Kconfig | 1 - 4 files changed, 15 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 29037c365ba4..a7d8e6cc5e0c 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "isofs.h" #include "zisofs.h" @@ -171,8 +172,8 @@ struct iso9660_options{ unsigned int blocksize; umode_t fmode; umode_t dmode; - gid_t gid; - uid_t uid; + kgid_t gid; + kuid_t uid; char *iocharset; /* LVE */ s32 session; @@ -383,8 +384,8 @@ static int parse_options(char *options, struct iso9660_options *popt) popt->fmode = popt->dmode = ISOFS_INVALID_MODE; popt->uid_set = 0; popt->gid_set = 0; - popt->gid = 0; - popt->uid = 0; + popt->gid = GLOBAL_ROOT_GID; + popt->uid = GLOBAL_ROOT_UID; popt->iocharset = NULL; popt->utf8 = 0; popt->overriderockperm = 0; @@ -460,13 +461,17 @@ static int parse_options(char *options, struct iso9660_options *popt) case Opt_uid: if (match_int(&args[0], &option)) return 0; - popt->uid = option; + popt->uid = make_kuid(current_user_ns(), option); + if (!uid_valid(popt->uid)) + return 0; popt->uid_set = 1; break; case Opt_gid: if (match_int(&args[0], &option)) return 0; - popt->gid = option; + popt->gid = make_kgid(current_user_ns(), option); + if (!gid_valid(popt->gid)) + return 0; popt->gid_set = 1; break; case Opt_mode: diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 3620ad1ea9bc..99167238518d 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -52,8 +52,8 @@ struct isofs_sb_info { umode_t s_fmode; umode_t s_dmode; - gid_t s_gid; - uid_t s_uid; + kgid_t s_gid; + kuid_t s_uid; struct nls_table *s_nls_iocharset; /* Native language support table */ }; diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 70e79d0c756a..c0bf42472e40 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -364,8 +364,8 @@ repeat: case SIG('P', 'X'): inode->i_mode = isonum_733(rr->u.PX.mode); set_nlink(inode, isonum_733(rr->u.PX.n_links)); - inode->i_uid = isonum_733(rr->u.PX.uid); - inode->i_gid = isonum_733(rr->u.PX.gid); + i_uid_write(inode, isonum_733(rr->u.PX.uid)); + i_gid_write(inode, isonum_733(rr->u.PX.gid)); break; case SIG('P', 'N'): { diff --git a/init/Kconfig b/init/Kconfig index 9cf8cb1f618b..88f6571d4dd4 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -944,7 +944,6 @@ config UIDGID_CONVERTED depends on FUSE_FS = n depends on GFS2_FS = n depends on HPFS_FS = n - depends on ISO9660_FS = n depends on JFFS2_FS = n depends on JFS_FS = n depends on LOGFS = n -- cgit v1.2.3 From 1a0a994ebe851206d02469782da6c1f9a0547d7d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 11:41:28 -0800 Subject: userns: Convert logfs to use kuid/kgid where appropriate Cc: Joern Engel Cc: Prasad Joshi Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/logfs/inode.c | 4 ++-- fs/logfs/readwrite.c | 8 ++++---- init/Kconfig | 1 - 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index a422f42238b2..43f61c2013f9 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -192,8 +192,8 @@ static void logfs_init_inode(struct super_block *sb, struct inode *inode) li->li_height = 0; li->li_used_bytes = 0; li->li_block = NULL; - inode->i_uid = 0; - inode->i_gid = 0; + i_uid_write(inode, 0); + i_gid_write(inode, 0); inode->i_size = 0; inode->i_blocks = 0; inode->i_ctime = CURRENT_TIME; diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index f1cb512c5019..a8d492d69213 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -119,8 +119,8 @@ static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode) inode->i_mode = be16_to_cpu(di->di_mode); li->li_height = di->di_height; li->li_flags = be32_to_cpu(di->di_flags); - inode->i_uid = be32_to_cpu(di->di_uid); - inode->i_gid = be32_to_cpu(di->di_gid); + i_uid_write(inode, be32_to_cpu(di->di_uid)); + i_gid_write(inode, be32_to_cpu(di->di_gid)); inode->i_size = be64_to_cpu(di->di_size); logfs_set_blocks(inode, be64_to_cpu(di->di_used_bytes)); inode->i_atime = be64_to_timespec(di->di_atime); @@ -156,8 +156,8 @@ static void logfs_inode_to_disk(struct inode *inode, struct logfs_disk_inode*di) di->di_height = li->li_height; di->di_pad = 0; di->di_flags = cpu_to_be32(li->li_flags); - di->di_uid = cpu_to_be32(inode->i_uid); - di->di_gid = cpu_to_be32(inode->i_gid); + di->di_uid = cpu_to_be32(i_uid_read(inode)); + di->di_gid = cpu_to_be32(i_gid_read(inode)); di->di_size = cpu_to_be64(i_size_read(inode)); di->di_used_bytes = cpu_to_be64(li->li_used_bytes); di->di_atime = timespec_to_be64(inode->i_atime); diff --git a/init/Kconfig b/init/Kconfig index 88f6571d4dd4..80edba85ece1 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -946,7 +946,6 @@ config UIDGID_CONVERTED depends on HPFS_FS = n depends on JFFS2_FS = n depends on JFS_FS = n - depends on LOGFS = n depends on MINIX_FS = n depends on NCP_FS = n depends on NFSD = n -- cgit v1.2.3 From f303bdc55e9e74890eadc836c1edd8b5c21a7b89 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 11:45:03 -0800 Subject: userns: Convert minix to use kuid/kgid where appropriate Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/minix/inode.c | 16 ++++++++-------- init/Kconfig | 1 - 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 2a503ad020d5..d0e42c678923 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -460,8 +460,8 @@ static struct inode *V1_minix_iget(struct inode *inode) return ERR_PTR(-EIO); } inode->i_mode = raw_inode->i_mode; - inode->i_uid = (uid_t)raw_inode->i_uid; - inode->i_gid = (gid_t)raw_inode->i_gid; + i_uid_write(inode, raw_inode->i_uid); + i_gid_write(inode, raw_inode->i_gid); set_nlink(inode, raw_inode->i_nlinks); inode->i_size = raw_inode->i_size; inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time; @@ -493,8 +493,8 @@ static struct inode *V2_minix_iget(struct inode *inode) return ERR_PTR(-EIO); } inode->i_mode = raw_inode->i_mode; - inode->i_uid = (uid_t)raw_inode->i_uid; - inode->i_gid = (gid_t)raw_inode->i_gid; + i_uid_write(inode, raw_inode->i_uid); + i_gid_write(inode, raw_inode->i_gid); set_nlink(inode, raw_inode->i_nlinks); inode->i_size = raw_inode->i_size; inode->i_mtime.tv_sec = raw_inode->i_mtime; @@ -545,8 +545,8 @@ static struct buffer_head * V1_minix_update_inode(struct inode * inode) if (!raw_inode) return NULL; raw_inode->i_mode = inode->i_mode; - raw_inode->i_uid = fs_high2lowuid(inode->i_uid); - raw_inode->i_gid = fs_high2lowgid(inode->i_gid); + raw_inode->i_uid = fs_high2lowuid(i_uid_read(inode)); + raw_inode->i_gid = fs_high2lowgid(i_gid_read(inode)); raw_inode->i_nlinks = inode->i_nlink; raw_inode->i_size = inode->i_size; raw_inode->i_time = inode->i_mtime.tv_sec; @@ -572,8 +572,8 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode) if (!raw_inode) return NULL; raw_inode->i_mode = inode->i_mode; - raw_inode->i_uid = fs_high2lowuid(inode->i_uid); - raw_inode->i_gid = fs_high2lowgid(inode->i_gid); + raw_inode->i_uid = fs_high2lowuid(i_uid_read(inode)); + raw_inode->i_gid = fs_high2lowgid(i_gid_read(inode)); raw_inode->i_nlinks = inode->i_nlink; raw_inode->i_size = inode->i_size; raw_inode->i_mtime = inode->i_mtime.tv_sec; diff --git a/init/Kconfig b/init/Kconfig index 80edba85ece1..c69e085ea810 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -946,7 +946,6 @@ config UIDGID_CONVERTED depends on HPFS_FS = n depends on JFFS2_FS = n depends on JFS_FS = n - depends on MINIX_FS = n depends on NCP_FS = n depends on NFSD = n depends on NFS_FS = n -- cgit v1.2.3 From 305d3d0dbc22aa62fa4d8b9224826fd8c7104bc5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 12:31:23 -0800 Subject: userns: Convert nillfs2 to use kuid/kgid where appropriate Acked-by: Ryusuke Konishi Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/nilfs2/inode.c | 8 ++++---- init/Kconfig | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 6e2c3db976b2..4d31d2cca7fd 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -401,8 +401,8 @@ int nilfs_read_inode_common(struct inode *inode, int err; inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); - inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); + i_uid_write(inode, le32_to_cpu(raw_inode->i_uid)); + i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le64_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); @@ -590,8 +590,8 @@ void nilfs_write_inode_common(struct inode *inode, struct nilfs_inode_info *ii = NILFS_I(inode); raw_inode->i_mode = cpu_to_le16(inode->i_mode); - raw_inode->i_uid = cpu_to_le32(inode->i_uid); - raw_inode->i_gid = cpu_to_le32(inode->i_gid); + raw_inode->i_uid = cpu_to_le32(i_uid_read(inode)); + raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le64(inode->i_size); raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); diff --git a/init/Kconfig b/init/Kconfig index c69e085ea810..90c9e06b66a5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -949,7 +949,6 @@ config UIDGID_CONVERTED depends on NCP_FS = n depends on NFSD = n depends on NFS_FS = n - depends on NILFS2_FS = n depends on NTFS_FS = n depends on OCFS2_FS = n depends on OMFS_FS = n -- cgit v1.2.3 From b29f7751c9a880e842e48f421daf313b997ddd65 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:29:36 -0800 Subject: userns: Convert ntfs to use kuid and kgid where appropriate Cc: Anton Altaparmakov Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/ntfs/inode.c | 7 ++++--- fs/ntfs/super.c | 39 ++++++++++++++++++++++++++++++++------- fs/ntfs/volume.h | 5 +++-- init/Kconfig | 1 - 4 files changed, 39 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index c6dbd3db6ca8..1d27331e6fc9 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2124,7 +2124,8 @@ int ntfs_read_inode_mount(struct inode *vi) * ntfs_read_inode() will have set up the default ones. */ /* Set uid and gid to root. */ - vi->i_uid = vi->i_gid = 0; + vi->i_uid = GLOBAL_ROOT_UID; + vi->i_gid = GLOBAL_ROOT_GID; /* Regular file. No access for anyone. */ vi->i_mode = S_IFREG; /* No VFS initiated operations allowed for $MFT. */ @@ -2312,8 +2313,8 @@ int ntfs_show_options(struct seq_file *sf, struct dentry *root) ntfs_volume *vol = NTFS_SB(root->d_sb); int i; - seq_printf(sf, ",uid=%i", vol->uid); - seq_printf(sf, ",gid=%i", vol->gid); + seq_printf(sf, ",uid=%i", from_kuid_munged(&init_user_ns, vol->uid)); + seq_printf(sf, ",gid=%i", from_kgid_munged(&init_user_ns, vol->gid)); if (vol->fmask == vol->dmask) seq_printf(sf, ",umask=0%o", vol->fmask); else { diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 2bc149d6a784..da01c165067d 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -102,8 +102,8 @@ static bool parse_options(ntfs_volume *vol, char *opt) char *p, *v, *ov; static char *utf8 = "utf8"; int errors = 0, sloppy = 0; - uid_t uid = (uid_t)-1; - gid_t gid = (gid_t)-1; + kuid_t uid = INVALID_UID; + kgid_t gid = INVALID_GID; umode_t fmask = (umode_t)-1, dmask = (umode_t)-1; int mft_zone_multiplier = -1, on_errors = -1; int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1; @@ -128,6 +128,30 @@ static bool parse_options(ntfs_volume *vol, char *opt) if (*v) \ goto needs_val; \ } +#define NTFS_GETOPT_UID(option, variable) \ + if (!strcmp(p, option)) { \ + uid_t uid_value; \ + if (!v || !*v) \ + goto needs_arg; \ + uid_value = simple_strtoul(ov = v, &v, 0); \ + if (*v) \ + goto needs_val; \ + variable = make_kuid(current_user_ns(), uid_value); \ + if (!uid_valid(variable)) \ + goto needs_val; \ + } +#define NTFS_GETOPT_GID(option, variable) \ + if (!strcmp(p, option)) { \ + gid_t gid_value; \ + if (!v || !*v) \ + goto needs_arg; \ + gid_value = simple_strtoul(ov = v, &v, 0); \ + if (*v) \ + goto needs_val; \ + variable = make_kgid(current_user_ns(), gid_value); \ + if (!gid_valid(variable)) \ + goto needs_val; \ + } #define NTFS_GETOPT_OCTAL(option, variable) \ if (!strcmp(p, option)) { \ if (!v || !*v) \ @@ -165,8 +189,8 @@ static bool parse_options(ntfs_volume *vol, char *opt) while ((p = strsep(&opt, ","))) { if ((v = strchr(p, '='))) *v++ = 0; - NTFS_GETOPT("uid", uid) - else NTFS_GETOPT("gid", gid) + NTFS_GETOPT_UID("uid", uid) + else NTFS_GETOPT_GID("gid", gid) else NTFS_GETOPT_OCTAL("umask", fmask = dmask) else NTFS_GETOPT_OCTAL("fmask", fmask) else NTFS_GETOPT_OCTAL("dmask", dmask) @@ -283,9 +307,9 @@ no_mount_options: vol->on_errors = on_errors; if (!vol->on_errors || vol->on_errors == ON_ERRORS_RECOVER) vol->on_errors |= ON_ERRORS_CONTINUE; - if (uid != (uid_t)-1) + if (uid_valid(uid)) vol->uid = uid; - if (gid != (gid_t)-1) + if (gid_valid(gid)) vol->gid = gid; if (fmask != (umode_t)-1) vol->fmask = fmask; @@ -1023,7 +1047,8 @@ static bool load_and_init_mft_mirror(ntfs_volume *vol) * ntfs_read_inode() will have set up the default ones. */ /* Set uid and gid to root. */ - tmp_ino->i_uid = tmp_ino->i_gid = 0; + tmp_ino->i_uid = GLOBAL_ROOT_UID; + tmp_ino->i_gid = GLOBAL_ROOT_GID; /* Regular file. No access for anyone. */ tmp_ino->i_mode = S_IFREG; /* No VFS initiated operations allowed for $MFTMirr. */ diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h index 15e3ba8d521a..4f579b02bc76 100644 --- a/fs/ntfs/volume.h +++ b/fs/ntfs/volume.h @@ -25,6 +25,7 @@ #define _LINUX_NTFS_VOLUME_H #include +#include #include "types.h" #include "layout.h" @@ -46,8 +47,8 @@ typedef struct { sized blocks on the device. */ /* Configuration provided by user at mount time. */ unsigned long flags; /* Miscellaneous flags, see below. */ - uid_t uid; /* uid that files will be mounted as. */ - gid_t gid; /* gid that files will be mounted as. */ + kuid_t uid; /* uid that files will be mounted as. */ + kgid_t gid; /* gid that files will be mounted as. */ umode_t fmask; /* The mask for file permissions. */ umode_t dmask; /* The mask for directory permissions. */ diff --git a/init/Kconfig b/init/Kconfig index 90c9e06b66a5..0f65a0231585 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -949,7 +949,6 @@ config UIDGID_CONVERTED depends on NCP_FS = n depends on NFSD = n depends on NFS_FS = n - depends on NTFS_FS = n depends on OCFS2_FS = n depends on OMFS_FS = n depends on QNX4FS_FS = n -- cgit v1.2.3 From 80fcbe751f01bea34759bebd3d213c4ee244a719 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:29:49 -0800 Subject: userns: Convert omfs to use kuid and kgid where appropriate Acked-by: Bob Copeland Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/omfs/inode.c | 8 ++++++-- fs/omfs/omfs.h | 4 ++-- init/Kconfig | 1 - 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index e6213b3725d1..25d715c7c87a 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -391,12 +391,16 @@ static int parse_options(char *options, struct omfs_sb_info *sbi) case Opt_uid: if (match_int(&args[0], &option)) return 0; - sbi->s_uid = option; + sbi->s_uid = make_kuid(current_user_ns(), option); + if (!uid_valid(sbi->s_uid)) + return 0; break; case Opt_gid: if (match_int(&args[0], &option)) return 0; - sbi->s_gid = option; + sbi->s_gid = make_kgid(current_user_ns(), option); + if (!gid_valid(sbi->s_gid)) + return 0; break; case Opt_umask: if (match_octal(&args[0], &option)) diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h index 8941f12c6b01..f0f8bc75e609 100644 --- a/fs/omfs/omfs.h +++ b/fs/omfs/omfs.h @@ -19,8 +19,8 @@ struct omfs_sb_info { unsigned long **s_imap; int s_imap_size; struct mutex s_bitmap_lock; - int s_uid; - int s_gid; + kuid_t s_uid; + kgid_t s_gid; int s_dmask; int s_fmask; }; diff --git a/init/Kconfig b/init/Kconfig index 0f65a0231585..390e6295b188 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -950,7 +950,6 @@ config UIDGID_CONVERTED depends on NFSD = n depends on NFS_FS = n depends on OCFS2_FS = n - depends on OMFS_FS = n depends on QNX4FS_FS = n depends on QNX6FS_FS = n depends on REISERFS_FS = n -- cgit v1.2.3 From 511728d778e8d441e42df68101722548a0c97fe3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 12:11:12 -0800 Subject: userns: Convert the qnx4 filesystem to use kuid/kgid where appropriate Acked-by: Anders Larsen Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/qnx4/inode.c | 4 ++-- init/Kconfig | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 552e994e3aa1..5c3c7b02e17b 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -312,8 +312,8 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino) (ino % QNX4_INODES_PER_BLOCK); inode->i_mode = le16_to_cpu(raw_inode->di_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->di_uid); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->di_gid); + i_uid_write(inode, (uid_t)le16_to_cpu(raw_inode->di_uid)); + i_gid_write(inode, (gid_t)le16_to_cpu(raw_inode->di_gid)); set_nlink(inode, le16_to_cpu(raw_inode->di_nlink)); inode->i_size = le32_to_cpu(raw_inode->di_size); inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->di_mtime); diff --git a/init/Kconfig b/init/Kconfig index 390e6295b188..b9d6be5ebd70 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -950,7 +950,6 @@ config UIDGID_CONVERTED depends on NFSD = n depends on NFS_FS = n depends on OCFS2_FS = n - depends on QNX4FS_FS = n depends on QNX6FS_FS = n depends on REISERFS_FS = n depends on SQUASHFS = n -- cgit v1.2.3 From 85a03d1bba3b9132adc1aa8f4b8b9e11bac429ec Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Apr 2012 17:58:48 -0700 Subject: userns: Convert the qnx6 filesystem to use kuid/kgid where appropriate Cc: Kai Bankett Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/qnx6/inode.c | 4 ++-- init/Kconfig | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 2049c814bda4..f4eef0b5e7b5 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -574,8 +574,8 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino) raw_inode = ((struct qnx6_inode_entry *)page_address(page)) + offs; inode->i_mode = fs16_to_cpu(sbi, raw_inode->di_mode); - inode->i_uid = (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid); - inode->i_gid = (gid_t)fs32_to_cpu(sbi, raw_inode->di_gid); + i_uid_write(inode, (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid)); + i_gid_write(inode, (gid_t)fs32_to_cpu(sbi, raw_inode->di_gid)); inode->i_size = fs64_to_cpu(sbi, raw_inode->di_size); inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_mtime); inode->i_mtime.tv_nsec = 0; diff --git a/init/Kconfig b/init/Kconfig index b9d6be5ebd70..1ff9f4156575 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -950,7 +950,6 @@ config UIDGID_CONVERTED depends on NFSD = n depends on NFS_FS = n depends on OCFS2_FS = n - depends on QNX6FS_FS = n depends on REISERFS_FS = n depends on SQUASHFS = n depends on SYSV_FS = n -- cgit v1.2.3 From a726ecce75896b2045f1646a2d7e31a5562b25a0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 12:19:23 -0800 Subject: userns: Convert the sysv filesystem to use kuid/kgid where appropriate Cc: Christoph Hellwig Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/sysv/inode.c | 8 ++++---- init/Kconfig | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 80e1e2b18df1..b23ab736685d 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -202,8 +202,8 @@ struct inode *sysv_iget(struct super_block *sb, unsigned int ino) } /* SystemV FS: kludge permissions if ino==SYSV_ROOT_INO ?? */ inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode); - inode->i_uid = (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid); - inode->i_gid = (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid); + i_uid_write(inode, (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid)); + i_gid_write(inode, (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid)); set_nlink(inode, fs16_to_cpu(sbi, raw_inode->i_nlink)); inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size); inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime); @@ -256,8 +256,8 @@ static int __sysv_write_inode(struct inode *inode, int wait) } raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode); - raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid)); - raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid)); + raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(i_uid_read(inode))); + raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(i_gid_read(inode))); raw_inode->i_nlink = cpu_to_fs16(sbi, inode->i_nlink); raw_inode->i_size = cpu_to_fs32(sbi, inode->i_size); raw_inode->i_atime = cpu_to_fs32(sbi, inode->i_atime.tv_sec); diff --git a/init/Kconfig b/init/Kconfig index 1ff9f4156575..3427832fb4af 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -952,7 +952,6 @@ config UIDGID_CONVERTED depends on OCFS2_FS = n depends on REISERFS_FS = n depends on SQUASHFS = n - depends on SYSV_FS = n depends on UBIFS_FS = n depends on UDF_FS = n depends on UFS_FS = n -- cgit v1.2.3 From 2f83ffa874ccb6ea4596220188d3e54b20203124 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 11:26:34 -0800 Subject: userns: Convert freevxfs to use kuid/kgid where appropriate Cc: Christoph Hellwig Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/freevxfs/vxfs_inode.c | 4 ++-- init/Kconfig | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index ef67c95f12d4..f47df72cef17 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -224,8 +224,8 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip) { ip->i_mode = vxfs_transmod(vip); - ip->i_uid = (uid_t)vip->vii_uid; - ip->i_gid = (gid_t)vip->vii_gid; + i_uid_write(ip, (uid_t)vip->vii_uid); + i_gid_write(ip, (gid_t)vip->vii_gid); set_nlink(ip, vip->vii_nlink); ip->i_size = vip->vii_size; diff --git a/init/Kconfig b/init/Kconfig index 3427832fb4af..355b1af6932e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -955,7 +955,6 @@ config UIDGID_CONVERTED depends on UBIFS_FS = n depends on UDF_FS = n depends on UFS_FS = n - depends on VXFS_FS = n depends on XFS_FS = n depends on !UML || HOSTFS = n -- cgit v1.2.3 From 29f82ae56e8798f7907d60145e0186082800d130 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:28:57 -0800 Subject: userns: Convert hostfs to use kuid and kgid where appropriate Cc: Jeff Dike Cc: Richard Weinberger Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/hostfs/hostfs_kern.c | 8 ++++---- init/Kconfig | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 124146543aa7..6c9f3a9d5e21 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -542,8 +542,8 @@ static int read_name(struct inode *ino, char *name) ino->i_ino = st.ino; ino->i_mode = st.mode; set_nlink(ino, st.nlink); - ino->i_uid = st.uid; - ino->i_gid = st.gid; + i_uid_write(ino, st.uid); + i_gid_write(ino, st.gid); ino->i_atime = st.atime; ino->i_mtime = st.mtime; ino->i_ctime = st.ctime; @@ -808,11 +808,11 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_UID) { attrs.ia_valid |= HOSTFS_ATTR_UID; - attrs.ia_uid = attr->ia_uid; + attrs.ia_uid = from_kuid(&init_user_ns, attr->ia_uid); } if (attr->ia_valid & ATTR_GID) { attrs.ia_valid |= HOSTFS_ATTR_GID; - attrs.ia_gid = attr->ia_gid; + attrs.ia_gid = from_kgid(&init_user_ns, attr->ia_gid); } if (attr->ia_valid & ATTR_SIZE) { attrs.ia_valid |= HOSTFS_ATTR_SIZE; diff --git a/init/Kconfig b/init/Kconfig index 381f765df923..8450442da6cd 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -957,8 +957,6 @@ config UIDGID_CONVERTED depends on UFS_FS = n depends on XFS_FS = n - depends on !UML || HOSTFS = n - # The rare drivers that won't build depends on ANDROID_BINDER_IPC = n -- cgit v1.2.3 From d2b31ca644fdc8704de3367a6a56a5c958c77f53 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 1 Jun 2012 16:14:19 -0600 Subject: userns: Teach security_path_chown to take kuids and kgids Don't make the security modules deal with raw user space uid and gids instead pass in a kuid_t and a kgid_t so that security modules only have to deal with internal kernel uids and gids. Cc: Al Viro Cc: James Morris Cc: John Johansen Cc: Kentaro Takeda Cc: Tetsuo Handa Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/open.c | 2 +- include/linux/security.h | 6 +++--- security/apparmor/lsm.c | 2 +- security/capability.c | 2 +- security/security.c | 2 +- security/tomoyo/tomoyo.c | 12 +++++++----- 6 files changed, 14 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index f3d96e7e7b19..2b2573980d0f 100644 --- a/fs/open.c +++ b/fs/open.c @@ -534,7 +534,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; mutex_lock(&inode->i_mutex); - error = security_path_chown(path, user, group); + error = security_path_chown(path, uid, gid); if (!error) error = notify_change(path->dentry, &newattrs); mutex_unlock(&inode->i_mutex); diff --git a/include/linux/security.h b/include/linux/security.h index 4e5a73cdbbef..ebb92cb1fa28 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1437,7 +1437,7 @@ struct security_operations { int (*path_rename) (struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); int (*path_chmod) (struct path *path, umode_t mode); - int (*path_chown) (struct path *path, uid_t uid, gid_t gid); + int (*path_chown) (struct path *path, kuid_t uid, kgid_t gid); int (*path_chroot) (struct path *path); #endif @@ -2832,7 +2832,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); int security_path_chmod(struct path *path, umode_t mode); -int security_path_chown(struct path *path, uid_t uid, gid_t gid); +int security_path_chown(struct path *path, kuid_t uid, kgid_t gid); int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(struct path *dir, struct dentry *dentry) @@ -2888,7 +2888,7 @@ static inline int security_path_chmod(struct path *path, umode_t mode) return 0; } -static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid) +static inline int security_path_chown(struct path *path, kuid_t uid, kgid_t gid) { return 0; } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 8ea39aabe948..8c2a7f6b35e2 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -352,7 +352,7 @@ static int apparmor_path_chmod(struct path *path, umode_t mode) return common_perm_mnt_dentry(OP_CHMOD, path->mnt, path->dentry, AA_MAY_CHMOD); } -static int apparmor_path_chown(struct path *path, uid_t uid, gid_t gid) +static int apparmor_path_chown(struct path *path, kuid_t uid, kgid_t gid) { struct path_cond cond = { path->dentry->d_inode->i_uid, path->dentry->d_inode->i_mode diff --git a/security/capability.c b/security/capability.c index 61095df8b89a..a40aac677c72 100644 --- a/security/capability.c +++ b/security/capability.c @@ -284,7 +284,7 @@ static int cap_path_chmod(struct path *path, umode_t mode) return 0; } -static int cap_path_chown(struct path *path, uid_t uid, gid_t gid) +static int cap_path_chown(struct path *path, kuid_t uid, kgid_t gid) { return 0; } diff --git a/security/security.c b/security/security.c index 860aeb349cb3..f9a2f2ef2454 100644 --- a/security/security.c +++ b/security/security.c @@ -434,7 +434,7 @@ int security_path_chmod(struct path *path, umode_t mode) return security_ops->path_chmod(path, mode); } -int security_path_chown(struct path *path, uid_t uid, gid_t gid) +int security_path_chown(struct path *path, kuid_t uid, kgid_t gid) { if (unlikely(IS_PRIVATE(path->dentry->d_inode))) return 0; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index c2d04a50f76a..d88eb3a046ed 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -373,13 +373,15 @@ static int tomoyo_path_chmod(struct path *path, umode_t mode) * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_path_chown(struct path *path, uid_t uid, gid_t gid) +static int tomoyo_path_chown(struct path *path, kuid_t uid, kgid_t gid) { int error = 0; - if (uid != (uid_t) -1) - error = tomoyo_path_number_perm(TOMOYO_TYPE_CHOWN, path, uid); - if (!error && gid != (gid_t) -1) - error = tomoyo_path_number_perm(TOMOYO_TYPE_CHGRP, path, gid); + if (uid_valid(uid)) + error = tomoyo_path_number_perm(TOMOYO_TYPE_CHOWN, path, + from_kuid(&init_user_ns, uid)); + if (!error && gid_valid(gid)) + error = tomoyo_path_number_perm(TOMOYO_TYPE_CHGRP, path, + from_kgid(&init_user_ns, gid)); return error; } -- cgit v1.2.3 From 8fed10be0029acda5564f03b9cc1fc4cb7470bae Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:20:16 -0800 Subject: userns: Convert affs to use kuid/kgid wherwe appropriate Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/affs/affs.h | 4 ++-- fs/affs/inode.c | 20 ++++++++++---------- fs/affs/super.c | 18 +++++++++++------- init/Kconfig | 1 - 4 files changed, 23 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 6e216419f340..3952121f2f28 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -88,8 +88,8 @@ struct affs_sb_info { u32 s_root_block; /* FFS root block number. */ int s_hashsize; /* Size of hash table. */ unsigned long s_flags; /* See below. */ - uid_t s_uid; /* uid to override */ - gid_t s_gid; /* gid to override */ + kuid_t s_uid; /* uid to override */ + kgid_t s_gid; /* gid to override */ umode_t s_mode; /* mode to override */ struct buffer_head *s_root_bh; /* Cached root block. */ struct mutex s_bmlock; /* Protects bitmap access. */ diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 8bc4a59f4e7e..15c484268229 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -80,17 +80,17 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino) if (id == 0 || sbi->s_flags & SF_SETUID) inode->i_uid = sbi->s_uid; else if (id == 0xFFFF && sbi->s_flags & SF_MUFS) - inode->i_uid = 0; + i_uid_write(inode, 0); else - inode->i_uid = id; + i_uid_write(inode, id); id = be16_to_cpu(tail->gid); if (id == 0 || sbi->s_flags & SF_SETGID) inode->i_gid = sbi->s_gid; else if (id == 0xFFFF && sbi->s_flags & SF_MUFS) - inode->i_gid = 0; + i_gid_write(inode, 0); else - inode->i_gid = id; + i_gid_write(inode, id); switch (be32_to_cpu(tail->stype)) { case ST_ROOT: @@ -193,13 +193,13 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc) tail->size = cpu_to_be32(inode->i_size); secs_to_datestamp(inode->i_mtime.tv_sec,&tail->change); if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) { - uid = inode->i_uid; - gid = inode->i_gid; + uid = i_uid_read(inode); + gid = i_gid_read(inode); if (AFFS_SB(sb)->s_flags & SF_MUFS) { - if (inode->i_uid == 0 || inode->i_uid == 0xFFFF) - uid = inode->i_uid ^ ~0; - if (inode->i_gid == 0 || inode->i_gid == 0xFFFF) - gid = inode->i_gid ^ ~0; + if (uid == 0 || uid == 0xFFFF) + uid = uid ^ ~0; + if (gid == 0 || gid == 0xFFFF) + gid = gid ^ ~0; } if (!(AFFS_SB(sb)->s_flags & SF_SETUID)) tail->uid = cpu_to_be16(uid); diff --git a/fs/affs/super.c b/fs/affs/super.c index c70f1e5fc024..966c8c06b9b3 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -188,7 +188,7 @@ static const match_table_t tokens = { }; static int -parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s32 *root, +parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved, s32 *root, int *blocksize, char **prefix, char *volume, unsigned long *mount_opts) { char *p; @@ -253,13 +253,17 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s case Opt_setgid: if (match_int(&args[0], &option)) return 0; - *gid = option; + *gid = make_kgid(current_user_ns(), option); + if (!gid_valid(*gid)) + return 0; *mount_opts |= SF_SETGID; break; case Opt_setuid: if (match_int(&args[0], &option)) return 0; - *uid = option; + *uid = make_kuid(current_user_ns(), option); + if (!uid_valid(*uid)) + return 0; *mount_opts |= SF_SETUID; break; case Opt_verbose: @@ -301,8 +305,8 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) int num_bm; int i, j; s32 key; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; int reserved; unsigned long mount_flags; int tmp_flags; /* fix remount prototype... */ @@ -527,8 +531,8 @@ affs_remount(struct super_block *sb, int *flags, char *data) { struct affs_sb_info *sbi = AFFS_SB(sb); int blocksize; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; int mode; int reserved; int root_block; diff --git a/init/Kconfig b/init/Kconfig index 86910982b94a..a358f4acdc42 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -928,7 +928,6 @@ config UIDGID_CONVERTED # Filesystems depends on 9P_FS = n - depends on AFFS_FS = n depends on AFS_FS = n depends on AUTOFS4_FS = n depends on BFS_FS = n -- cgit v1.2.3 From 7f5b82b835ee62b365f39373e35162eb7a072c5f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 25 Apr 2012 03:57:31 -0700 Subject: userns: Convert bfs to use kuid/kgid where appropriate Cc: "Tigran A. Aivazian" Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/bfs/inode.c | 8 ++++---- init/Kconfig | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 9870417c26e7..b242beba58ed 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -76,8 +76,8 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino) BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock); BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock); BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); - inode->i_uid = le32_to_cpu(di->i_uid); - inode->i_gid = le32_to_cpu(di->i_gid); + i_uid_write(inode, le32_to_cpu(di->i_uid)); + i_gid_write(inode, le32_to_cpu(di->i_gid)); set_nlink(inode, le32_to_cpu(di->i_nlink)); inode->i_size = BFS_FILESIZE(di); inode->i_blocks = BFS_FILEBLOCKS(di); @@ -139,8 +139,8 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) di->i_ino = cpu_to_le16(ino); di->i_mode = cpu_to_le32(inode->i_mode); - di->i_uid = cpu_to_le32(inode->i_uid); - di->i_gid = cpu_to_le32(inode->i_gid); + di->i_uid = cpu_to_le32(i_uid_read(inode)); + di->i_gid = cpu_to_le32(i_gid_read(inode)); di->i_nlink = cpu_to_le32(inode->i_nlink); di->i_atime = cpu_to_le32(inode->i_atime.tv_sec); di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); diff --git a/init/Kconfig b/init/Kconfig index a358f4acdc42..fd91d2b63875 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -930,7 +930,6 @@ config UIDGID_CONVERTED depends on 9P_FS = n depends on AFS_FS = n depends on AUTOFS4_FS = n - depends on BFS_FS = n depends on BTRFS_FS = n depends on CEPH_FS = n depends on CIFS = n -- cgit v1.2.3 From 2f2f43d3c7b1da8dba56716dd1be196b6f57bf9b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 11:05:07 -0800 Subject: userns: Convert btrfs to use kuid/kgid where appropriate Cc: Chris Mason Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/btrfs/delayed-inode.c | 8 ++++---- fs/btrfs/inode.c | 8 ++++---- fs/btrfs/ioctl.c | 6 +++--- init/Kconfig | 1 - 4 files changed, 11 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 335605c8ceab..f908c5180795 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1715,8 +1715,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *inode_item, struct inode *inode) { - btrfs_set_stack_inode_uid(inode_item, inode->i_uid); - btrfs_set_stack_inode_gid(inode_item, inode->i_gid); + btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode)); + btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode)); btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size); btrfs_set_stack_inode_mode(inode_item, inode->i_mode); btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink); @@ -1764,8 +1764,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) inode_item = &delayed_node->inode_item; - inode->i_uid = btrfs_stack_inode_uid(inode_item); - inode->i_gid = btrfs_stack_inode_gid(inode_item); + i_uid_write(inode, btrfs_stack_inode_uid(inode_item)); + i_gid_write(inode, btrfs_stack_inode_gid(inode_item)); btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); inode->i_mode = btrfs_stack_inode_mode(inode_item); set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 83baec24946d..53687149c077 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2570,8 +2570,8 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item); inode->i_mode = btrfs_inode_mode(leaf, inode_item); set_nlink(inode, btrfs_inode_nlink(leaf, inode_item)); - inode->i_uid = btrfs_inode_uid(leaf, inode_item); - inode->i_gid = btrfs_inode_gid(leaf, inode_item); + i_uid_write(inode, btrfs_inode_uid(leaf, inode_item)); + i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); tspec = btrfs_inode_atime(inode_item); @@ -2649,8 +2649,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *item, struct inode *inode) { - btrfs_set_inode_uid(leaf, item, inode->i_uid); - btrfs_set_inode_gid(leaf, item, inode->i_gid); + btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); + btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); btrfs_set_inode_mode(leaf, item, inode->i_mode); btrfs_set_inode_nlink(leaf, item, inode->i_nlink); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bc2f6ffff3cf..1292682c537f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -575,13 +575,13 @@ fail: */ static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) { - uid_t fsuid = current_fsuid(); + kuid_t fsuid = current_fsuid(); if (!(dir->i_mode & S_ISVTX)) return 0; - if (inode->i_uid == fsuid) + if (uid_eq(inode->i_uid, fsuid)) return 0; - if (dir->i_uid == fsuid) + if (uid_eq(dir->i_uid, fsuid)) return 0; return !capable(CAP_FOWNER); } diff --git a/init/Kconfig b/init/Kconfig index fd91d2b63875..44f580f406d0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -930,7 +930,6 @@ config UIDGID_CONVERTED depends on 9P_FS = n depends on AFS_FS = n depends on AUTOFS4_FS = n - depends on BTRFS_FS = n depends on CEPH_FS = n depends on CIFS = n depends on CODA_FS = n -- cgit v1.2.3 From 0e1a43c71612cd0b6b50da03040c85fbf3d24211 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:27:53 -0800 Subject: userns: Convert hpfs to use kuid and kgid where appropriate Cc: Mikulas Patocka Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/hpfs/hpfs_fn.h | 4 ++-- fs/hpfs/inode.c | 19 +++++++++++-------- fs/hpfs/namei.c | 8 ++++---- fs/hpfs/super.c | 18 +++++++++++------- init/Kconfig | 1 - 5 files changed, 28 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index ac1ead194db5..7102aaecc244 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -63,8 +63,8 @@ struct hpfs_sb_info { unsigned sb_dmap; /* sector number of dnode bit map */ unsigned sb_n_free; /* free blocks for statfs, or -1 */ unsigned sb_n_free_dnodes; /* free dnodes for statfs, or -1 */ - uid_t sb_uid; /* uid from mount options */ - gid_t sb_gid; /* gid from mount options */ + kuid_t sb_uid; /* uid from mount options */ + kgid_t sb_gid; /* gid from mount options */ umode_t sb_mode; /* mode from mount options */ unsigned sb_eas : 2; /* eas: 0-ignore, 1-ro, 2-rw */ unsigned sb_err : 2; /* on errs: 0-cont, 1-ro, 2-panic */ diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index ed671e0ea784..804a9a842cbc 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -7,6 +7,7 @@ */ #include +#include #include "hpfs_fn.h" void hpfs_init_inode(struct inode *i) @@ -60,14 +61,14 @@ void hpfs_read_inode(struct inode *i) if (hpfs_sb(i->i_sb)->sb_eas) { if ((ea = hpfs_get_ea(i->i_sb, fnode, "UID", &ea_size))) { if (ea_size == 2) { - i->i_uid = le16_to_cpu(*(__le16*)ea); + i_uid_write(i, le16_to_cpu(*(__le16*)ea)); hpfs_inode->i_ea_uid = 1; } kfree(ea); } if ((ea = hpfs_get_ea(i->i_sb, fnode, "GID", &ea_size))) { if (ea_size == 2) { - i->i_gid = le16_to_cpu(*(__le16*)ea); + i_gid_write(i, le16_to_cpu(*(__le16*)ea)); hpfs_inode->i_ea_gid = 1; } kfree(ea); @@ -149,13 +150,13 @@ static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode) hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino); } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) { __le32 ea; - if ((i->i_uid != hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) { - ea = cpu_to_le32(i->i_uid); + if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) { + ea = cpu_to_le32(i_uid_read(i)); hpfs_set_ea(i, fnode, "UID", (char*)&ea, 2); hpfs_inode->i_ea_uid = 1; } - if ((i->i_gid != hpfs_sb(i->i_sb)->sb_gid) || hpfs_inode->i_ea_gid) { - ea = cpu_to_le32(i->i_gid); + if (!gid_eq(i->i_gid, hpfs_sb(i->i_sb)->sb_gid) || hpfs_inode->i_ea_gid) { + ea = cpu_to_le32(i_gid_read(i)); hpfs_set_ea(i, fnode, "GID", (char *)&ea, 2); hpfs_inode->i_ea_gid = 1; } @@ -261,9 +262,11 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr) hpfs_lock(inode->i_sb); if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root) goto out_unlock; - if ((attr->ia_valid & ATTR_UID) && attr->ia_uid >= 0x10000) + if ((attr->ia_valid & ATTR_UID) && + from_kuid(&init_user_ns, attr->ia_uid) >= 0x10000) goto out_unlock; - if ((attr->ia_valid & ATTR_GID) && attr->ia_gid >= 0x10000) + if ((attr->ia_valid & ATTR_GID) && + from_kgid(&init_user_ns, attr->ia_gid) >= 0x10000) goto out_unlock; if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) goto out_unlock; diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index bc9082482f68..345713d2f8f3 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -91,8 +91,8 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inc_nlink(dir); insert_inode_hash(result); - if (result->i_uid != current_fsuid() || - result->i_gid != current_fsgid() || + if (!uid_eq(result->i_uid, current_fsuid()) || + !gid_eq(result->i_gid, current_fsgid()) || result->i_mode != (mode | S_IFDIR)) { result->i_uid = current_fsuid(); result->i_gid = current_fsgid(); @@ -179,8 +179,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, b insert_inode_hash(result); - if (result->i_uid != current_fsuid() || - result->i_gid != current_fsgid() || + if (!uid_eq(result->i_uid, current_fsuid()) || + !gid_eq(result->i_gid, current_fsgid()) || result->i_mode != (mode | S_IFREG)) { result->i_uid = current_fsuid(); result->i_gid = current_fsgid(); diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 706a12c083ea..a152783602d9 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -251,7 +251,7 @@ static const match_table_t tokens = { {Opt_err, NULL}, }; -static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask, +static int parse_opts(char *opts, kuid_t *uid, kgid_t *gid, umode_t *umask, int *lowercase, int *eas, int *chk, int *errs, int *chkdsk, int *timeshift) { @@ -276,12 +276,16 @@ static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask, case Opt_uid: if (match_int(args, &option)) return 0; - *uid = option; + *uid = make_kuid(current_user_ns(), option); + if (!uid_valid(*uid)) + return 0; break; case Opt_gid: if (match_int(args, &option)) return 0; - *gid = option; + *gid = make_kgid(current_user_ns(), option); + if (!gid_valid(*gid)) + return 0; break; case Opt_umask: if (match_octal(args, &option)) @@ -378,8 +382,8 @@ HPFS filesystem options:\n\ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) { - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; umode_t umask; int lowercase, eas, chk, errs, chkdsk, timeshift; int o; @@ -455,8 +459,8 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) struct hpfs_sb_info *sbi; struct inode *root; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; umode_t umask; int lowercase, eas, chk, errs, chkdsk, timeshift; diff --git a/init/Kconfig b/init/Kconfig index 44f580f406d0..1db1b0f5605e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -935,7 +935,6 @@ config UIDGID_CONVERTED depends on CODA_FS = n depends on FUSE_FS = n depends on GFS2_FS = n - depends on HPFS_FS = n depends on JFFS2_FS = n depends on JFS_FS = n depends on NCP_FS = n -- cgit v1.2.3 From 0cfe53d3c3875e1dd565b30737cd5c6691c00188 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:28:39 -0800 Subject: userns: Convert jffs2 to use kuid and kgid where appropriate - General routine uid/gid conversion work - When storing posix acls treat ACL_USER and ACL_GROUP separately so I can call from_kuid or from_kgid as appropriate. - When reading posix acls treat ACL_USER and ACL_GROUP separately so I can call make_kuid or make_kgid as appropriate. Cc: David Woodhouse Signed-off-by: Eric W. Biederman --- fs/jffs2/acl.c | 26 ++++++++++++++++++++------ fs/jffs2/file.c | 8 ++++---- fs/jffs2/fs.c | 24 +++++++++++++----------- fs/jffs2/os-linux.h | 4 ++-- init/Kconfig | 1 - 5 files changed, 39 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 42e4edc17a90..223283c30111 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -94,15 +94,23 @@ static struct posix_acl *jffs2_acl_from_medium(void *value, size_t size) case ACL_MASK: case ACL_OTHER: value += sizeof(struct jffs2_acl_entry_short); - acl->a_entries[i].e_id = ACL_UNDEFINED_ID; break; case ACL_USER: + value += sizeof(struct jffs2_acl_entry); + if (value > end) + goto fail; + acl->a_entries[i].e_uid = + make_kuid(&init_user_ns, + je32_to_cpu(entry->e_id)); + break; case ACL_GROUP: value += sizeof(struct jffs2_acl_entry); if (value > end) goto fail; - acl->a_entries[i].e_id = je32_to_cpu(entry->e_id); + acl->a_entries[i].e_gid = + make_kgid(&init_user_ns, + je32_to_cpu(entry->e_id)); break; default: @@ -131,13 +139,19 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size) header->a_version = cpu_to_je32(JFFS2_ACL_VERSION); e = header + 1; for (i=0; i < acl->a_count; i++) { + const struct posix_acl_entry *acl_e = &acl->a_entries[i]; entry = e; - entry->e_tag = cpu_to_je16(acl->a_entries[i].e_tag); - entry->e_perm = cpu_to_je16(acl->a_entries[i].e_perm); - switch(acl->a_entries[i].e_tag) { + entry->e_tag = cpu_to_je16(acl_e->e_tag); + entry->e_perm = cpu_to_je16(acl_e->e_perm); + switch(acl_e->e_tag) { case ACL_USER: + entry->e_id = cpu_to_je32( + from_kuid(&init_user_ns, acl_e->e_uid)); + e += sizeof(struct jffs2_acl_entry); + break; case ACL_GROUP: - entry->e_id = cpu_to_je32(acl->a_entries[i].e_id); + entry->e_id = cpu_to_je32( + from_kgid(&init_user_ns, acl_e->e_gid)); e += sizeof(struct jffs2_acl_entry); break; diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index db3889ba8818..60ef3fb707ff 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -175,8 +175,8 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, ri.ino = cpu_to_je32(f->inocache->ino); ri.version = cpu_to_je32(++f->highest_version); ri.mode = cpu_to_jemode(inode->i_mode); - ri.uid = cpu_to_je16(inode->i_uid); - ri.gid = cpu_to_je16(inode->i_gid); + ri.uid = cpu_to_je16(i_uid_read(inode)); + ri.gid = cpu_to_je16(i_gid_read(inode)); ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs)); ri.atime = ri.ctime = ri.mtime = cpu_to_je32(get_seconds()); ri.offset = cpu_to_je32(inode->i_size); @@ -283,8 +283,8 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping, /* Set the fields that the generic jffs2_write_inode_range() code can't find */ ri->ino = cpu_to_je32(inode->i_ino); ri->mode = cpu_to_jemode(inode->i_mode); - ri->uid = cpu_to_je16(inode->i_uid); - ri->gid = cpu_to_je16(inode->i_gid); + ri->uid = cpu_to_je16(i_uid_read(inode)); + ri->gid = cpu_to_je16(i_gid_read(inode)); ri->isize = cpu_to_je32((uint32_t)inode->i_size); ri->atime = ri->ctime = ri->mtime = cpu_to_je32(get_seconds()); diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 3d3092eda811..fe3c0527545f 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -99,8 +99,10 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) ri->ino = cpu_to_je32(inode->i_ino); ri->version = cpu_to_je32(++f->highest_version); - ri->uid = cpu_to_je16((ivalid & ATTR_UID)?iattr->ia_uid:inode->i_uid); - ri->gid = cpu_to_je16((ivalid & ATTR_GID)?iattr->ia_gid:inode->i_gid); + ri->uid = cpu_to_je16((ivalid & ATTR_UID)? + from_kuid(&init_user_ns, iattr->ia_uid):i_uid_read(inode)); + ri->gid = cpu_to_je16((ivalid & ATTR_GID)? + from_kgid(&init_user_ns, iattr->ia_gid):i_gid_read(inode)); if (ivalid & ATTR_MODE) ri->mode = cpu_to_jemode(iattr->ia_mode); @@ -147,8 +149,8 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) inode->i_ctime = ITIME(je32_to_cpu(ri->ctime)); inode->i_mtime = ITIME(je32_to_cpu(ri->mtime)); inode->i_mode = jemode_to_cpu(ri->mode); - inode->i_uid = je16_to_cpu(ri->uid); - inode->i_gid = je16_to_cpu(ri->gid); + i_uid_write(inode, je16_to_cpu(ri->uid)); + i_gid_write(inode, je16_to_cpu(ri->gid)); old_metadata = f->metadata; @@ -276,8 +278,8 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino) return ERR_PTR(ret); } inode->i_mode = jemode_to_cpu(latest_node.mode); - inode->i_uid = je16_to_cpu(latest_node.uid); - inode->i_gid = je16_to_cpu(latest_node.gid); + i_uid_write(inode, je16_to_cpu(latest_node.uid)); + i_gid_write(inode, je16_to_cpu(latest_node.gid)); inode->i_size = je32_to_cpu(latest_node.isize); inode->i_atime = ITIME(je32_to_cpu(latest_node.atime)); inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); @@ -440,14 +442,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r memset(ri, 0, sizeof(*ri)); /* Set OS-specific defaults for new inodes */ - ri->uid = cpu_to_je16(current_fsuid()); + ri->uid = cpu_to_je16(from_kuid(&init_user_ns, current_fsuid())); if (dir_i->i_mode & S_ISGID) { - ri->gid = cpu_to_je16(dir_i->i_gid); + ri->gid = cpu_to_je16(i_gid_read(dir_i)); if (S_ISDIR(mode)) mode |= S_ISGID; } else { - ri->gid = cpu_to_je16(current_fsgid()); + ri->gid = cpu_to_je16(from_kgid(&init_user_ns, current_fsgid())); } /* POSIX ACLs have to be processed now, at least partly. @@ -467,8 +469,8 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r set_nlink(inode, 1); inode->i_ino = je32_to_cpu(ri->ino); inode->i_mode = jemode_to_cpu(ri->mode); - inode->i_gid = je16_to_cpu(ri->gid); - inode->i_uid = je16_to_cpu(ri->uid); + i_gid_write(inode, je16_to_cpu(ri->gid)); + i_uid_write(inode, je16_to_cpu(ri->uid)); inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime)); diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index bcd983d7e7f9..d200a9b8fd5e 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -27,8 +27,8 @@ struct kvec; #define JFFS2_F_I_SIZE(f) (OFNI_EDONI_2SFFJ(f)->i_size) #define JFFS2_F_I_MODE(f) (OFNI_EDONI_2SFFJ(f)->i_mode) -#define JFFS2_F_I_UID(f) (OFNI_EDONI_2SFFJ(f)->i_uid) -#define JFFS2_F_I_GID(f) (OFNI_EDONI_2SFFJ(f)->i_gid) +#define JFFS2_F_I_UID(f) (i_uid_read(OFNI_EDONI_2SFFJ(f))) +#define JFFS2_F_I_GID(f) (i_gid_read(OFNI_EDONI_2SFFJ(f))) #define JFFS2_F_I_RDEV(f) (OFNI_EDONI_2SFFJ(f)->i_rdev) #define ITIME(sec) ((struct timespec){sec, 0}) diff --git a/init/Kconfig b/init/Kconfig index 1db1b0f5605e..cf2d50b82fe5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -935,7 +935,6 @@ config UIDGID_CONVERTED depends on CODA_FS = n depends on FUSE_FS = n depends on GFS2_FS = n - depends on JFFS2_FS = n depends on JFS_FS = n depends on NCP_FS = n depends on NFSD = n -- cgit v1.2.3 From c18cdc1a3ec643b5c6c0d65aac1a6bf8e461778f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 11:40:34 -0800 Subject: userns: Convert jfs to use kuid/kgid where appropriate Cc: Dave Kleikamp Signed-off-by: Eric W. Biederman --- fs/jfs/file.c | 4 ++-- fs/jfs/jfs_imap.c | 22 ++++++++++++---------- fs/jfs/jfs_incore.h | 8 ++++---- fs/jfs/super.c | 22 +++++++++++++++------- init/Kconfig | 1 - 5 files changed, 33 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 844f9460cb11..9d3afd157f99 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -108,8 +108,8 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr) if (is_quota_modification(inode, iattr)) dquot_initialize(inode); - if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) || + (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) { rc = dquot_transfer(inode, iattr); if (rc) return rc; diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 1b6f15f191b3..6ba4006e011b 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -3078,15 +3078,15 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) } set_nlink(ip, le32_to_cpu(dip->di_nlink)); - jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); - if (sbi->uid == -1) + jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid)); + if (!uid_valid(sbi->uid)) ip->i_uid = jfs_ip->saved_uid; else { ip->i_uid = sbi->uid; } - jfs_ip->saved_gid = le32_to_cpu(dip->di_gid); - if (sbi->gid == -1) + jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid)); + if (!gid_valid(sbi->gid)) ip->i_gid = jfs_ip->saved_gid; else { ip->i_gid = sbi->gid; @@ -3150,14 +3150,16 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip) dip->di_size = cpu_to_le64(ip->i_size); dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); dip->di_nlink = cpu_to_le32(ip->i_nlink); - if (sbi->uid == -1) - dip->di_uid = cpu_to_le32(ip->i_uid); + if (!uid_valid(sbi->uid)) + dip->di_uid = cpu_to_le32(i_uid_read(ip)); else - dip->di_uid = cpu_to_le32(jfs_ip->saved_uid); - if (sbi->gid == -1) - dip->di_gid = cpu_to_le32(ip->i_gid); + dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns, + jfs_ip->saved_uid)); + if (!gid_valid(sbi->gid)) + dip->di_gid = cpu_to_le32(i_gid_read(ip)); else - dip->di_gid = cpu_to_le32(jfs_ip->saved_gid); + dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns, + jfs_ip->saved_gid)); jfs_get_inode_flags(jfs_ip); /* * mode2 is only needed for storing the higher order bits. diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 584a4a1a6e81..680605d7bf15 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -38,8 +38,8 @@ struct jfs_inode_info { int fileset; /* fileset number (always 16)*/ uint mode2; /* jfs-specific mode */ - uint saved_uid; /* saved for uid mount option */ - uint saved_gid; /* saved for gid mount option */ + kuid_t saved_uid; /* saved for uid mount option */ + kgid_t saved_gid; /* saved for gid mount option */ pxd_t ixpxd; /* inode extent descriptor */ dxd_t acl; /* dxd describing acl */ dxd_t ea; /* dxd describing ea */ @@ -192,8 +192,8 @@ struct jfs_sb_info { uint state; /* mount/recovery state */ unsigned long flag; /* mount time flags */ uint p_state; /* state prior to going no integrity */ - uint uid; /* uid to override on-disk uid */ - uint gid; /* gid to override on-disk gid */ + kuid_t uid; /* uid to override on-disk uid */ + kgid_t gid; /* gid to override on-disk gid */ uint umask; /* umask to override on-disk umask */ }; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index c55c7452d285..706692f24033 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -321,13 +321,19 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, case Opt_uid: { char *uid = args[0].from; - sbi->uid = simple_strtoul(uid, &uid, 0); + uid_t val = simple_strtoul(uid, &uid, 0); + sbi->uid = make_kuid(current_user_ns(), val); + if (!uid_valid(sbi->uid)) + goto cleanup; break; } case Opt_gid: { char *gid = args[0].from; - sbi->gid = simple_strtoul(gid, &gid, 0); + gid_t val = simple_strtoul(gid, &gid, 0); + sbi->gid = make_kgid(current_user_ns(), val); + if (!gid_valid(sbi->gid)) + goto cleanup; break; } case Opt_umask: @@ -443,7 +449,9 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = sbi; sb->s_max_links = JFS_LINK_MAX; sbi->sb = sb; - sbi->uid = sbi->gid = sbi->umask = -1; + sbi->uid = INVALID_UID; + sbi->gid = INVALID_GID; + sbi->umask = -1; /* initialize the mount flag and determine the default error handler */ flag = JFS_ERR_REMOUNT_RO; @@ -617,10 +625,10 @@ static int jfs_show_options(struct seq_file *seq, struct dentry *root) { struct jfs_sb_info *sbi = JFS_SBI(root->d_sb); - if (sbi->uid != -1) - seq_printf(seq, ",uid=%d", sbi->uid); - if (sbi->gid != -1) - seq_printf(seq, ",gid=%d", sbi->gid); + if (uid_valid(sbi->uid)) + seq_printf(seq, ",uid=%d", from_kuid(&init_user_ns, sbi->uid)); + if (gid_valid(sbi->gid)) + seq_printf(seq, ",gid=%d", from_kgid(&init_user_ns, sbi->gid)); if (sbi->umask != -1) seq_printf(seq, ",umask=%03o", sbi->umask); if (sbi->flag & JFS_NOINTEGRITY) diff --git a/init/Kconfig b/init/Kconfig index cf2d50b82fe5..20d4a1bf3281 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -935,7 +935,6 @@ config UIDGID_CONVERTED depends on CODA_FS = n depends on FUSE_FS = n depends on GFS2_FS = n - depends on JFS_FS = n depends on NCP_FS = n depends on NFSD = n depends on NFS_FS = n -- cgit v1.2.3 From df814654f364369dfb2fe3c870f3544ce69aa78c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:30:06 -0800 Subject: userns: Convert reiserfs to use kuid and kgid where appropriate Cc: reiserfs-devel@vger.kernel.org Signed-off-by: Eric W. Biederman --- fs/reiserfs/inode.c | 26 +++++++++++++------------- fs/reiserfs/xattr_acl.c | 20 +++++++++++++++++--- init/Kconfig | 1 - 3 files changed, 30 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a6d4268fb6c1..7119f488d9cd 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1155,8 +1155,8 @@ static void init_inode(struct inode *inode, struct treepath *path) set_inode_sd_version(inode, STAT_DATA_V1); inode->i_mode = sd_v1_mode(sd); set_nlink(inode, sd_v1_nlink(sd)); - inode->i_uid = sd_v1_uid(sd); - inode->i_gid = sd_v1_gid(sd); + i_uid_write(inode, sd_v1_uid(sd)); + i_gid_write(inode, sd_v1_gid(sd)); inode->i_size = sd_v1_size(sd); inode->i_atime.tv_sec = sd_v1_atime(sd); inode->i_mtime.tv_sec = sd_v1_mtime(sd); @@ -1200,9 +1200,9 @@ static void init_inode(struct inode *inode, struct treepath *path) inode->i_mode = sd_v2_mode(sd); set_nlink(inode, sd_v2_nlink(sd)); - inode->i_uid = sd_v2_uid(sd); + i_uid_write(inode, sd_v2_uid(sd)); inode->i_size = sd_v2_size(sd); - inode->i_gid = sd_v2_gid(sd); + i_gid_write(inode, sd_v2_gid(sd)); inode->i_mtime.tv_sec = sd_v2_mtime(sd); inode->i_atime.tv_sec = sd_v2_atime(sd); inode->i_ctime.tv_sec = sd_v2_ctime(sd); @@ -1258,9 +1258,9 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size) set_sd_v2_mode(sd_v2, inode->i_mode); set_sd_v2_nlink(sd_v2, inode->i_nlink); - set_sd_v2_uid(sd_v2, inode->i_uid); + set_sd_v2_uid(sd_v2, i_uid_read(inode)); set_sd_v2_size(sd_v2, size); - set_sd_v2_gid(sd_v2, inode->i_gid); + set_sd_v2_gid(sd_v2, i_gid_read(inode)); set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); @@ -1280,8 +1280,8 @@ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; set_sd_v1_mode(sd_v1, inode->i_mode); - set_sd_v1_uid(sd_v1, inode->i_uid); - set_sd_v1_gid(sd_v1, inode->i_gid); + set_sd_v1_uid(sd_v1, i_uid_read(inode)); + set_sd_v1_gid(sd_v1, i_gid_read(inode)); set_sd_v1_nlink(sd_v1, inode->i_nlink); set_sd_v1_size(sd_v1, size); set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec); @@ -1869,7 +1869,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, goto out_bad_inode; } if (old_format_only(sb)) { - if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { + if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) { pathrelse(&path_to_key); /* i_uid or i_gid is too big to be stored in stat data v3.5 */ err = -EINVAL; @@ -3140,16 +3140,16 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) } } - if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || - ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && + if ((((attr->ia_valid & ATTR_UID) && (from_kuid(&init_user_ns, attr->ia_uid) & ~0xffff)) || + ((attr->ia_valid & ATTR_GID) && (from_kgid(&init_user_ns, attr->ia_gid) & ~0xffff))) && (get_inode_sd_version(inode) == STAT_DATA_V1)) { /* stat data of format v3.5 has 16 bit uid and gid */ error = -EINVAL; goto out; } - if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || + (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { struct reiserfs_transaction_handle th; int jbegin_count = 2 * diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 87d6911c659d..d7c01ef64eda 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -121,15 +121,23 @@ static struct posix_acl *posix_acl_from_disk(const void *value, size_t size) case ACL_OTHER: value = (char *)value + sizeof(reiserfs_acl_entry_short); - acl->a_entries[n].e_id = ACL_UNDEFINED_ID; break; case ACL_USER: + value = (char *)value + sizeof(reiserfs_acl_entry); + if ((char *)value > end) + goto fail; + acl->a_entries[n].e_uid = + make_kuid(&init_user_ns, + le32_to_cpu(entry->e_id)); + break; case ACL_GROUP: value = (char *)value + sizeof(reiserfs_acl_entry); if ((char *)value > end) goto fail; - acl->a_entries[n].e_id = le32_to_cpu(entry->e_id); + acl->a_entries[n].e_gid = + make_kgid(&init_user_ns, + le32_to_cpu(entry->e_id)); break; default: @@ -164,13 +172,19 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size) ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION); e = (char *)ext_acl + sizeof(reiserfs_acl_header); for (n = 0; n < acl->a_count; n++) { + const struct posix_acl_entry *acl_e = &acl->a_entries[n]; reiserfs_acl_entry *entry = (reiserfs_acl_entry *) e; entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); switch (acl->a_entries[n].e_tag) { case ACL_USER: + entry->e_id = cpu_to_le32( + from_kuid(&init_user_ns, acl_e->e_uid)); + e += sizeof(reiserfs_acl_entry); + break; case ACL_GROUP: - entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); + entry->e_id = cpu_to_le32( + from_kgid(&init_user_ns, acl_e->e_gid)); e += sizeof(reiserfs_acl_entry); break; diff --git a/init/Kconfig b/init/Kconfig index 20d4a1bf3281..8b11f2908c25 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -939,7 +939,6 @@ config UIDGID_CONVERTED depends on NFSD = n depends on NFS_FS = n depends on OCFS2_FS = n - depends on REISERFS_FS = n depends on SQUASHFS = n depends on UBIFS_FS = n depends on UDF_FS = n -- cgit v1.2.3 From 61293ee2749bc2414725da37e50308154ff91574 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 12:14:27 -0800 Subject: userns: Convert squashfs to use kuid/kgid where appropriate Cc: Phillip Lougher Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/squashfs/inode.c | 8 ++++++-- init/Kconfig | 1 - 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index 81afbccfa843..a1ce5ce60632 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -56,16 +56,20 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode, struct squashfs_base_inode *sqsh_ino) { + uid_t i_uid; + gid_t i_gid; int err; - err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &inode->i_uid); + err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid); if (err) return err; - err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->guid), &inode->i_gid); + err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->guid), &i_gid); if (err) return err; + i_uid_write(inode, i_uid); + i_gid_write(inode, i_gid); inode->i_ino = le32_to_cpu(sqsh_ino->inode_number); inode->i_mtime.tv_sec = le32_to_cpu(sqsh_ino->mtime); inode->i_atime.tv_sec = inode->i_mtime.tv_sec; diff --git a/init/Kconfig b/init/Kconfig index 8b11f2908c25..d88f945fa610 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -939,7 +939,6 @@ config UIDGID_CONVERTED depends on NFSD = n depends on NFS_FS = n depends on OCFS2_FS = n - depends on SQUASHFS = n depends on UBIFS_FS = n depends on UDF_FS = n depends on UFS_FS = n -- cgit v1.2.3 From 39241beb78f69925b0475ad78f06f0e0589fb71b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 15:50:56 -0800 Subject: userns: Convert ubifs to use kuid/kgid Cc: Artem Bityutskiy Cc: Adrian Hunter Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/ubifs/budget.c | 4 ++-- fs/ubifs/debug.c | 4 ++-- fs/ubifs/journal.c | 4 ++-- fs/ubifs/sb.c | 4 ++-- fs/ubifs/super.c | 4 ++-- fs/ubifs/ubifs.h | 4 ++-- init/Kconfig | 1 - 7 files changed, 12 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index bc4f94b28706..969489e478bc 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -272,8 +272,8 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) */ static int can_use_rp(struct ubifs_info *c) { - if (current_fsuid() == c->rp_uid || capable(CAP_SYS_RESOURCE) || - (c->rp_gid != 0 && in_group_p(c->rp_gid))) + if (uid_eq(current_fsuid(), c->rp_uid) || capable(CAP_SYS_RESOURCE) || + (!gid_eq(c->rp_gid, GLOBAL_ROOT_GID) && in_group_p(c->rp_gid))) return 1; return 0; } diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index bb3167257aab..340d1afc1302 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -243,8 +243,8 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) printk(KERN_ERR "\tsize %llu\n", (unsigned long long)i_size_read(inode)); printk(KERN_ERR "\tnlink %u\n", inode->i_nlink); - printk(KERN_ERR "\tuid %u\n", (unsigned int)inode->i_uid); - printk(KERN_ERR "\tgid %u\n", (unsigned int)inode->i_gid); + printk(KERN_ERR "\tuid %u\n", (unsigned int)i_uid_read(inode)); + printk(KERN_ERR "\tgid %u\n", (unsigned int)i_gid_read(inode)); printk(KERN_ERR "\tatime %u.%u\n", (unsigned int)inode->i_atime.tv_sec, (unsigned int)inode->i_atime.tv_nsec); diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 12c0f154ca83..afaad07f3b29 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -469,8 +469,8 @@ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec); ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - ino->uid = cpu_to_le32(inode->i_uid); - ino->gid = cpu_to_le32(inode->i_gid); + ino->uid = cpu_to_le32(i_uid_read(inode)); + ino->gid = cpu_to_le32(i_gid_read(inode)); ino->mode = cpu_to_le32(inode->i_mode); ino->flags = cpu_to_le32(ui->flags); ino->size = cpu_to_le64(ui->ui_size); diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 15e2fc5aa60b..52c21f4190f6 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -611,8 +611,8 @@ int ubifs_read_superblock(struct ubifs_info *c) c->fanout = le32_to_cpu(sup->fanout); c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); c->rp_size = le64_to_cpu(sup->rp_size); - c->rp_uid = le32_to_cpu(sup->rp_uid); - c->rp_gid = le32_to_cpu(sup->rp_gid); + c->rp_uid = make_kuid(&init_user_ns, le32_to_cpu(sup->rp_uid)); + c->rp_gid = make_kgid(&init_user_ns, le32_to_cpu(sup->rp_gid)); sup_flags = le32_to_cpu(sup->flags); if (!c->mount_opts.override_compr) c->default_compr = le16_to_cpu(sup->default_compr); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1c766c39c038..f39bad9db61c 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -130,8 +130,8 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) inode->i_flags |= (S_NOCMTIME | S_NOATIME); set_nlink(inode, le32_to_cpu(ino->nlink)); - inode->i_uid = le32_to_cpu(ino->uid); - inode->i_gid = le32_to_cpu(ino->gid); + i_uid_write(inode, le32_to_cpu(ino->uid)); + i_gid_write(inode, le32_to_cpu(ino->gid)); inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 1e5a08623d11..64f2367c2f4c 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1426,8 +1426,8 @@ struct ubifs_info { long long rp_size; long long report_rp_size; - uid_t rp_uid; - gid_t rp_gid; + kuid_t rp_uid; + kgid_t rp_gid; /* The below fields are used only during mounting and re-mounting */ unsigned int empty:1; diff --git a/init/Kconfig b/init/Kconfig index d88f945fa610..21adc1c997ad 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -939,7 +939,6 @@ config UIDGID_CONVERTED depends on NFSD = n depends on NFS_FS = n depends on OCFS2_FS = n - depends on UBIFS_FS = n depends on UDF_FS = n depends on UFS_FS = n depends on XFS_FS = n -- cgit v1.2.3 From c2ba138a27ddac4abbc931599dbce907c868910a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 12:20:35 -0800 Subject: userns: Convert the udf filesystem to use kuid/kgid where appropriate Cc: Jan Kara Signed-off-by: Eric W. Biederman --- fs/udf/inode.c | 12 ++++++------ fs/udf/super.c | 20 ++++++++++++-------- fs/udf/udf_sb.h | 4 ++-- init/Kconfig | 1 - 4 files changed, 20 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/udf/inode.c b/fs/udf/inode.c index fafaad795cd6..1825dc0af728 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1309,14 +1309,14 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) } read_lock(&sbi->s_cred_lock); - inode->i_uid = le32_to_cpu(fe->uid); - if (inode->i_uid == -1 || + i_uid_write(inode, le32_to_cpu(fe->uid)); + if (!uid_valid(inode->i_uid) || UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) || UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_SET)) inode->i_uid = UDF_SB(inode->i_sb)->s_uid; - inode->i_gid = le32_to_cpu(fe->gid); - if (inode->i_gid == -1 || + i_gid_write(inode, le32_to_cpu(fe->gid)); + if (!gid_valid(inode->i_gid) || UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_IGNORE) || UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET)) inode->i_gid = UDF_SB(inode->i_sb)->s_gid; @@ -1539,12 +1539,12 @@ static int udf_update_inode(struct inode *inode, int do_sync) if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET)) fe->uid = cpu_to_le32(-1); else - fe->uid = cpu_to_le32(inode->i_uid); + fe->uid = cpu_to_le32(i_uid_read(inode)); if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_FORGET)) fe->gid = cpu_to_le32(-1); else - fe->gid = cpu_to_le32(inode->i_gid); + fe->gid = cpu_to_le32(i_gid_read(inode)); udfperms = ((inode->i_mode & S_IRWXO)) | ((inode->i_mode & S_IRWXG) << 2) | diff --git a/fs/udf/super.c b/fs/udf/super.c index dcbf98722afc..38c705574b92 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -199,8 +199,8 @@ struct udf_options { unsigned int rootdir; unsigned int flags; umode_t umask; - gid_t gid; - uid_t uid; + kgid_t gid; + kuid_t uid; umode_t fmode; umode_t dmode; struct nls_table *nls_map; @@ -335,9 +335,9 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root) if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_IGNORE)) seq_puts(seq, ",gid=ignore"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET)) - seq_printf(seq, ",uid=%u", sbi->s_uid); + seq_printf(seq, ",uid=%u", from_kuid(&init_user_ns, sbi->s_uid)); if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET)) - seq_printf(seq, ",gid=%u", sbi->s_gid); + seq_printf(seq, ",gid=%u", from_kgid(&init_user_ns, sbi->s_gid)); if (sbi->s_umask != 0) seq_printf(seq, ",umask=%ho", sbi->s_umask); if (sbi->s_fmode != UDF_INVALID_MODE) @@ -516,13 +516,17 @@ static int udf_parse_options(char *options, struct udf_options *uopt, case Opt_gid: if (match_int(args, &option)) return 0; - uopt->gid = option; + uopt->gid = make_kgid(current_user_ns(), option); + if (!gid_valid(uopt->gid)) + return 0; uopt->flags |= (1 << UDF_FLAG_GID_SET); break; case Opt_uid: if (match_int(args, &option)) return 0; - uopt->uid = option; + uopt->uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uopt->uid)) + return 0; uopt->flags |= (1 << UDF_FLAG_UID_SET); break; case Opt_umask: @@ -1931,8 +1935,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) struct udf_sb_info *sbi; uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); - uopt.uid = -1; - uopt.gid = -1; + uopt.uid = INVALID_UID; + uopt.gid = INVALID_GID; uopt.umask = 0; uopt.fmode = UDF_INVALID_MODE; uopt.dmode = UDF_INVALID_MODE; diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 42ad69ac9576..5f027227f085 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -128,8 +128,8 @@ struct udf_sb_info { /* Default permissions */ umode_t s_umask; - gid_t s_gid; - uid_t s_uid; + kgid_t s_gid; + kuid_t s_uid; umode_t s_fmode; umode_t s_dmode; /* Lock protecting consistency of above permission settings */ diff --git a/init/Kconfig b/init/Kconfig index 21adc1c997ad..6f9819ac322b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -939,7 +939,6 @@ config UIDGID_CONVERTED depends on NFSD = n depends on NFS_FS = n depends on OCFS2_FS = n - depends on UDF_FS = n depends on UFS_FS = n depends on XFS_FS = n -- cgit v1.2.3 From 72235465864d84cedb2d9f26f8e1de824ee20339 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Feb 2012 12:21:22 -0800 Subject: userns: Convert the ufs filesystem to use kuid/kgid where appropriate Cc: Evgeniy Dushistov Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/ufs/inode.c | 16 ++++++++-------- init/Kconfig | 1 - 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index dd7c89d8a1c1..eb6d0b7dc879 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -597,8 +597,8 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) /* * Linux now has 32-bit uid and gid, so we can support EFT. */ - inode->i_uid = ufs_get_inode_uid(sb, ufs_inode); - inode->i_gid = ufs_get_inode_gid(sb, ufs_inode); + i_uid_write(inode, ufs_get_inode_uid(sb, ufs_inode)); + i_gid_write(inode, ufs_get_inode_gid(sb, ufs_inode)); inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size); inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec); @@ -645,8 +645,8 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) /* * Linux now has 32-bit uid and gid, so we can support EFT. */ - inode->i_uid = fs32_to_cpu(sb, ufs2_inode->ui_uid); - inode->i_gid = fs32_to_cpu(sb, ufs2_inode->ui_gid); + i_uid_write(inode, fs32_to_cpu(sb, ufs2_inode->ui_uid)); + i_gid_write(inode, fs32_to_cpu(sb, ufs2_inode->ui_gid)); inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size); inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime); @@ -745,8 +745,8 @@ static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode) ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode); ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink); - ufs_set_inode_uid(sb, ufs_inode, inode->i_uid); - ufs_set_inode_gid(sb, ufs_inode, inode->i_gid); + ufs_set_inode_uid(sb, ufs_inode, i_uid_read(inode)); + ufs_set_inode_gid(sb, ufs_inode, i_gid_read(inode)); ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size); ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec); @@ -789,8 +789,8 @@ static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode) ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode); ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink); - ufs_inode->ui_uid = cpu_to_fs32(sb, inode->i_uid); - ufs_inode->ui_gid = cpu_to_fs32(sb, inode->i_gid); + ufs_inode->ui_uid = cpu_to_fs32(sb, i_uid_read(inode)); + ufs_inode->ui_gid = cpu_to_fs32(sb, i_gid_read(inode)); ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size); ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec); diff --git a/init/Kconfig b/init/Kconfig index 6f9819ac322b..577916d8c9d8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -939,7 +939,6 @@ config UIDGID_CONVERTED depends on NFSD = n depends on NFS_FS = n depends on OCFS2_FS = n - depends on UFS_FS = n depends on XFS_FS = n config UIDGID_STRICT_TYPE_CHECKS -- cgit v1.2.3 From 84c3b84860440a9e3a3666c14112f41311b8f623 Mon Sep 17 00:00:00 2001 From: Jaeden Amero Date: Wed, 19 Sep 2012 15:34:31 +0100 Subject: compat_ioctl: Add RS-485 IOCTLs to the list The RS-485 TIOCSRS485 and TIOCGRS485 ioctls are 32-bit compatible, so in order to call them on 64-bit systems from 32-bit user mode, we add them to the ioctl pointer list as compatible ioctls. Signed-off-by: Jaeden Amero Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- fs/compat_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index debdfe0fc809..85dfebfe6820 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -866,6 +866,8 @@ COMPATIBLE_IOCTL(TIOCGPTN) COMPATIBLE_IOCTL(TIOCSPTLCK) COMPATIBLE_IOCTL(TIOCSERGETLSR) COMPATIBLE_IOCTL(TIOCSIG) +COMPATIBLE_IOCTL(TIOCSRS485) +COMPATIBLE_IOCTL(TIOCGRS485) #ifdef TCGETS2 COMPATIBLE_IOCTL(TCGETS2) COMPATIBLE_IOCTL(TCSETS2) -- cgit v1.2.3 From 36048853c5257a7b6df346b83758ffa776a59e9f Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 21 Sep 2012 02:16:29 -0700 Subject: debugfs: fix race in u32_array_read and allocate array at open u32_array_open() is racy when multiple threads read from a file with a seek position of zero, i.e. when two or more simultaneous reads are occurring after the non-seekable files are created. It is possible that file->private_data is double-freed because the threads races between kfree(file->private-data); and file->private_data = NULL; The fix is to only do format_array_alloc() when the file is opened and free it when it is closed. Note that because the file has always been non-seekable, you can't open it and read it multiple times anyway, so the data has always been generated just once. The difference is that now it is generated at open time rather than at the time of the first read, and that avoids the race. Reported-by: Dave Jones Acked-by: Konrad Rzeszutek Wilk Tested-by: Raghavendra Signed-off-by: David Rientjes Signed-off-by: Linus Torvalds --- fs/debugfs/file.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 2340f6978d6e..a09d3c0aad68 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -526,12 +526,6 @@ struct array_data { u32 elements; }; -static int u32_array_open(struct inode *inode, struct file *file) -{ - file->private_data = NULL; - return nonseekable_open(inode, file); -} - static size_t format_array(char *buf, size_t bufsize, const char *fmt, u32 *array, u32 array_size) { @@ -573,26 +567,21 @@ static char *format_array_alloc(const char *fmt, u32 *array, return ret; } -static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, - loff_t *ppos) +static int u32_array_open(struct inode *inode, struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; struct array_data *data = inode->i_private; - size_t size; - if (*ppos == 0) { - if (file->private_data) { - kfree(file->private_data); - file->private_data = NULL; - } - - file->private_data = format_array_alloc("%u", data->array, - data->elements); - } + file->private_data = format_array_alloc("%u", data->array, + data->elements); + if (!file->private_data) + return -ENOMEM; + return nonseekable_open(inode, file); +} - size = 0; - if (file->private_data) - size = strlen(file->private_data); +static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, + loff_t *ppos) +{ + size_t size = strlen(file->private_data); return simple_read_from_buffer(buf, len, ppos, file->private_data, size); -- cgit v1.2.3 From e05e279e6fc940a2adb9d4d4bf2b814dfc286176 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 21 Sep 2012 11:48:05 -0700 Subject: debugfs: fix u32_array race in format_array_alloc The format_array_alloc() function is fundamentally racy, in that it prints the array twice: once to figure out how much space to allocate for the buffer, and the second time to actually print out the data. If any of the array contents changes in between, the allocation size may be wrong, and the end result may be truncated in odd ways. Just don't do it. Allocate a maximum-sized array up-front, and just format the array contents once. The only user of the u32_array interfaces is the Xen spinlock statistics code, and it has 31 entries in the arrays, so the maximum size really isn't that big, and the end result is much simpler code without the bug. Signed-off-by: Linus Torvalds --- fs/debugfs/file.c | 57 ++++++++++++++++++++++--------------------------------- 1 file changed, 23 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index a09d3c0aad68..c5ca6ae5a30c 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -526,55 +526,44 @@ struct array_data { u32 elements; }; -static size_t format_array(char *buf, size_t bufsize, const char *fmt, - u32 *array, u32 array_size) +static size_t u32_format_array(char *buf, size_t bufsize, + u32 *array, int array_size) { size_t ret = 0; - u32 i; - for (i = 0; i < array_size; i++) { + while (--array_size >= 0) { size_t len; + char term = array_size ? ' ' : '\n'; - len = snprintf(buf, bufsize, fmt, array[i]); - len++; /* ' ' or '\n' */ + len = snprintf(buf, bufsize, "%u%c", *array++, term); ret += len; - if (buf) { - buf += len; - bufsize -= len; - buf[-1] = (i == array_size-1) ? '\n' : ' '; - } + buf += len; + bufsize -= len; } - - ret++; /* \0 */ - if (buf) - *buf = '\0'; - - return ret; -} - -static char *format_array_alloc(const char *fmt, u32 *array, - u32 array_size) -{ - size_t len = format_array(NULL, 0, fmt, array, array_size); - char *ret; - - ret = kmalloc(len, GFP_KERNEL); - if (ret == NULL) - return NULL; - - format_array(ret, len, fmt, array, array_size); return ret; } static int u32_array_open(struct inode *inode, struct file *file) { struct array_data *data = inode->i_private; - - file->private_data = format_array_alloc("%u", data->array, - data->elements); - if (!file->private_data) + int size, elements = data->elements; + char *buf; + + /* + * Max size: + * - 10 digits + ' '/'\n' = 11 bytes per number + * - terminating NUL character + */ + size = elements*11; + buf = kmalloc(size+1, GFP_KERNEL); + if (!buf) return -ENOMEM; + buf[size] = 0; + + file->private_data = buf; + u32_format_array(buf, size, data->array, data->elements); + return nonseekable_open(inode, file); } -- cgit v1.2.3 From 156cacb1d0d36b0d0582d9e798e58e0044f516b3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Sep 2012 08:19:02 -0400 Subject: do_add_mount()/umount -l races normally we deal with lock_mount()/umount races by checking that mountpoint to be is still in our namespace after lock_mount() has been done. However, do_add_mount() skips that check when called with MNT_SHRINKABLE in flags (i.e. from finish_automount()). The reason is that ->mnt_ns may be a temporary namespace created exactly to contain automounts a-la NFS4 referral handling. It's not the namespace of the caller, though, so check_mnt() would fail here. We still need to check that ->mnt_ns is non-NULL in that case, though. Signed-off-by: Al Viro --- fs/namespace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 4d31f73e2561..7bdf7907413f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1886,8 +1886,14 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) return err; err = -EINVAL; - if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt))) - goto unlock; + if (unlikely(!check_mnt(real_mount(path->mnt)))) { + /* that's acceptable only for automounts done in private ns */ + if (!(mnt_flags & MNT_SHRINKABLE)) + goto unlock; + /* ... and for those we'd better have mountpoint still alive */ + if (!real_mount(path->mnt)->mnt_ns) + goto unlock; + } /* Refuse the same filesystem on the same mount point */ err = -EBUSY; -- cgit v1.2.3 From c5aa1e554a20fb3542c62688ae46049c9225a965 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 29 Aug 2012 09:00:01 -0400 Subject: close the race in nlmsvc_free_block() we need to grab mutex before the reference counter reaches 0 Signed-off-by: Al Viro --- fs/lockd/svclock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index fb1a2bedbe97..8d80c990dffd 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -289,7 +289,6 @@ static void nlmsvc_free_block(struct kref *kref) dprintk("lockd: freeing block %p...\n", block); /* Remove block from file's list of blocks */ - mutex_lock(&file->f_mutex); list_del_init(&block->b_flist); mutex_unlock(&file->f_mutex); @@ -303,7 +302,7 @@ static void nlmsvc_free_block(struct kref *kref) static void nlmsvc_release_block(struct nlm_block *block) { if (block != NULL) - kref_put(&block->b_count, nlmsvc_free_block); + kref_put_mutex(&block->b_count, nlmsvc_free_block, &block->b_file->f_mutex); } /* -- cgit v1.2.3 From 50df9fd55e4271e89a7adf3b1172083dd0ca199d Mon Sep 17 00:00:00 2001 From: Herton Ronaldo Krzesinski Date: Sun, 23 Sep 2012 22:49:12 -0400 Subject: ext4: fix crash when accessing /proc/mounts concurrently The crash was caused by a variable being erronously declared static in token2str(). In addition to /proc/mounts, the problem can also be easily replicated by accessing /proc/fs/ext4//options in parallel: $ cat /proc/fs/ext4//options > options.txt ... and then running the following command in two different terminals: $ while diff /proc/fs/ext4//options options.txt; do true; done This is also the cause of the following a crash while running xfstests #234, as reported in the following bug reports: https://bugs.launchpad.net/bugs/1053019 https://bugzilla.kernel.org/show_bug.cgi?id=47731 Signed-off-by: Herton Ronaldo Krzesinski Signed-off-by: "Theodore Ts'o" Cc: Brad Figg Cc: stable@vger.kernel.org --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e6784b3276be..7bef0a4bc518 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1749,7 +1749,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq, static const char *token2str(int token) { - static const struct match_token *t; + const struct match_token *t; for (t = tokens; t->token != Opt_err; t++) if (t->token == token && !strchr(t->pattern, '=')) -- cgit v1.2.3 From 838cd0cf9af52e034ee81513642083bbe8e4ddb1 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Sun, 23 Sep 2012 23:10:51 -0400 Subject: ext4: check free block counters in ext4_mb_find_by_goal Free block counters should be checked before doing allocation. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 2c7c082b8169..bb821a924049 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1660,10 +1660,13 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, int max; int err; struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); + struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); struct ext4_free_extent ex; if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) return 0; + if (grp->bb_free == 0) + return 0; err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); if (err) -- cgit v1.2.3 From f2a09af645b762f8230e7eba7fee3b6f7e6e96e7 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Sun, 23 Sep 2012 23:16:03 -0400 Subject: ext4: check free inode count before allocating an inode Recently, I ecountered some corrupted filesystems in which some groups' free inode counts were 65535, it seemed that free inode count was overflow. This patch teaches ext4 to check free inode count before allocaing an inode. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 26154b81b836..fa36372f3fdf 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -697,6 +697,15 @@ got_group: if (!gdp) goto fail; + /* + * Check free inodes count before loading bitmap. + */ + if (ext4_free_inodes_count(sb, gdp) == 0) { + if (++group == ngroups) + group = 0; + continue; + } + brelse(inode_bitmap_bh); inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); if (!inode_bitmap_bh) -- cgit v1.2.3 From 1f981697432daea3c0abf938b39174dcfb29340e Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 26 Jul 2012 11:26:36 +0100 Subject: GFS2: Merge two nearly identical xattr functions There were two functions in the xattr code which were nearly identical, the only difference being that one was copy data into the unstuffed xattrs and the other was copying data out from it. This patch merges the two functions such that the code which deal with iteration over the unstuffed xattrs is no longer duplicated. Signed-off-by: Steven Whitehouse --- fs/gfs2/xattr.c | 94 ++++++++++++++++++--------------------------------------- 1 file changed, 30 insertions(+), 64 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 27a0b4a901f5..5404ed1582ff 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -448,17 +448,18 @@ ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) } /** - * ea_get_unstuffed - actually copies the unstuffed data into the - * request buffer + * ea_iter_unstuffed - copies the unstuffed xattr data to/from the + * request buffer * @ip: The GFS2 inode * @ea: The extended attribute header structure - * @data: The data to be copied + * @din: The data to be copied in + * @dout: The data to be copied out (one of din,dout will be NULL) * * Returns: errno */ -static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, - char *data) +static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, + const char *din, char *dout) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head **bh; @@ -467,6 +468,8 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); unsigned int x; int error = 0; + unsigned char *pos; + unsigned cp_size; bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); if (!bh) @@ -497,12 +500,21 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, goto out; } - memcpy(data, bh[x]->b_data + sizeof(struct gfs2_meta_header), - (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); + pos = bh[x]->b_data + sizeof(struct gfs2_meta_header); + cp_size = (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize; - amount -= sdp->sd_jbsize; - data += sdp->sd_jbsize; + if (dout) { + memcpy(dout, pos, cp_size); + dout += sdp->sd_jbsize; + } + + if (din) { + gfs2_trans_add_bh(ip->i_gl, bh[x], 1); + memcpy(pos, din, cp_size); + din += sdp->sd_jbsize; + } + amount -= sdp->sd_jbsize; brelse(bh[x]); } @@ -523,7 +535,7 @@ static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, memcpy(data, GFS2_EA2DATA(el->el_ea), len); return len; } - ret = ea_get_unstuffed(ip, el->el_ea, data); + ret = gfs2_iter_unstuffed(ip, el->el_ea, NULL, data); if (ret < 0) return ret; return len; @@ -1220,69 +1232,23 @@ static int gfs2_xattr_set(struct dentry *dentry, const char *name, size, flags, type); } + static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, char *data) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct buffer_head **bh; unsigned int amount = GFS2_EA_DATA_LEN(ea); unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); - __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); - unsigned int x; - int error; - - bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); - if (!bh) - return -ENOMEM; - - error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0); - if (error) - goto out; - - for (x = 0; x < nptrs; x++) { - error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0, - bh + x); - if (error) { - while (x--) - brelse(bh[x]); - goto fail; - } - dataptrs++; - } - - for (x = 0; x < nptrs; x++) { - error = gfs2_meta_wait(sdp, bh[x]); - if (error) { - for (; x < nptrs; x++) - brelse(bh[x]); - goto fail; - } - if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) { - for (; x < nptrs; x++) - brelse(bh[x]); - error = -EIO; - goto fail; - } - - gfs2_trans_add_bh(ip->i_gl, bh[x], 1); - - memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header), data, - (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); - - amount -= sdp->sd_jbsize; - data += sdp->sd_jbsize; - - brelse(bh[x]); - } + int ret; -out: - kfree(bh); - return error; + ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0); + if (ret) + return ret; -fail: + ret = gfs2_iter_unstuffed(ip, ea, data, NULL); gfs2_trans_end(sdp); - kfree(bh); - return error; + + return ret; } int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) -- cgit v1.2.3 From 71f890f7f758f340215d48fed5223f9cce05b652 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 30 Jul 2012 14:53:19 +0100 Subject: GFS2: Remove rs_requested field from reservations The rs_requested field is left over from the original allocation code, however this should have been a parameter passed to the various functions from gfs2_inplace_reserve() and not a member of the reservation structure as the value is not required after the initial allocation. This also helps simplify the code since we no longer need to set the rs_requested to zero. Also the gfs2_inplace_release() function can also be simplified since the reservation structure will always be defined when it is called, and the only remaining task is to unlock the rgrp if required. It can also now be called unconditionally too, resulting in a further simplification. Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 9 +++++---- fs/gfs2/file.c | 4 ++-- fs/gfs2/incore.h | 3 --- fs/gfs2/inode.c | 9 +++------ fs/gfs2/quota.c | 9 +++++---- fs/gfs2/rgrp.c | 35 ++++++++--------------------------- fs/gfs2/rgrp.h | 8 -------- fs/gfs2/trans.h | 7 +++---- fs/gfs2/xattr.c | 2 +- 9 files changed, 27 insertions(+), 59 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index d6526347d386..00eaa83871b7 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -612,6 +612,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, struct gfs2_sbd *sdp = GFS2_SB(mapping->host); struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); unsigned int data_blocks = 0, ind_blocks = 0, rblocks; + unsigned requested = 0; int alloc_required; int error = 0; pgoff_t index = pos >> PAGE_CACHE_SHIFT; @@ -641,7 +642,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (error) goto out_unlock; - error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); + requested = data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip, requested); if (error) goto out_qunlock; } @@ -654,7 +656,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (&ip->i_inode == sdp->sd_rindex) rblocks += 2 * RES_STATFS; if (alloc_required) - rblocks += gfs2_rg_blocks(ip); + rblocks += gfs2_rg_blocks(ip, requested); error = gfs2_trans_begin(sdp, rblocks, PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); @@ -868,8 +870,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, brelse(dibh); failed: gfs2_trans_end(sdp); - if (gfs2_mb_reserved(ip)) - gfs2_inplace_release(ip); + gfs2_inplace_release(ip); if (ip->i_res->rs_qa_qd_num) gfs2_quota_unlock(ip); if (inode == sdp->sd_rindex) { diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 382000ffac1f..30e21997a1a1 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -441,7 +441,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; - rblocks += gfs2_rg_blocks(ip); + rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) @@ -845,7 +845,7 @@ retry: &max_bytes, &data_blocks, &ind_blocks); rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + - RES_RG_HDR + gfs2_rg_blocks(ip); + RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index aaecc8085fc5..52078a161ecd 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -250,9 +250,6 @@ struct gfs2_blkreserv { /* components used during write (step 1): */ atomic_t rs_sizehint; /* hint of the write size */ - /* components used during inplace_reserve (step 2): */ - u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ - /* components used during get_local_rgrp (step 3): */ struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */ struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 753af3d86bbc..f2709ea887da 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -737,10 +737,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, brelse(bh); gfs2_trans_end(sdp); - /* Check if we reserved space in the rgrp. Function link_dinode may - not, depending on whether alloc is required. */ - if (gfs2_mb_reserved(dip)) - gfs2_inplace_release(dip); + gfs2_inplace_release(dip); gfs2_quota_unlock(dip); mark_inode_dirty(inode); gfs2_glock_dq_uninit_m(2, ghs); @@ -897,7 +894,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(dip) + + gfs2_rg_blocks(dip, sdp->sd_max_dirres) + 2 * RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) @@ -1378,7 +1375,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(ndip) + + gfs2_rg_blocks(ndip, sdp->sd_max_dirres) + 4 * RES_DINODE + 4 * RES_LEAF + RES_STATFS + RES_QUOTA + 4, 0); if (error) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a3bde91645c2..420bc3805ccc 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -765,6 +765,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) struct gfs2_holder *ghs, i_gh; unsigned int qx, x; struct gfs2_quota_data *qd; + unsigned reserved; loff_t offset; unsigned int nalloc = 0, blocks; int error; @@ -811,13 +812,13 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) * two blocks need to be updated instead of 1 */ blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; - error = gfs2_inplace_reserve(ip, 1 + - (nalloc * (data_blocks + ind_blocks))); + reserved = 1 + (nalloc * (data_blocks + ind_blocks)); + error = gfs2_inplace_reserve(ip, reserved); if (error) goto out_alloc; if (nalloc) - blocks += gfs2_rg_blocks(ip) + nalloc * ind_blocks + RES_STATFS; + blocks += gfs2_rg_blocks(ip, reserved) + nalloc * ind_blocks + RES_STATFS; error = gfs2_trans_begin(sdp, blocks, 0); if (error) @@ -1598,7 +1599,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, error = gfs2_inplace_reserve(ip, blocks); if (error) goto out_i; - blocks += gfs2_rg_blocks(ip); + blocks += gfs2_rg_blocks(ip, blocks); } /* Some quotas span block boundaries and can update two blocks, diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c9ed814eeb6f..a2b43bb83499 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -539,7 +539,6 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) E.g. We can't set rs_rgd to NULL because the rgd glock is held and dequeued through this pointer. Can't: atomic_set(&rs->rs_sizehint, 0); - Can't: rs->rs_requested = 0; Can't: rs->rs_rgd = NULL;*/ rs->rs_bi = NULL; rs->rs_biblk = 0; @@ -1350,7 +1349,7 @@ static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) * Returns: 0 if successful or BFITNOENT if there isn't enough free space */ -static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) +static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested) { struct gfs2_bitmap *bi = rgd->rd_bits; const u32 length = rgd->rd_length; @@ -1422,8 +1421,7 @@ do_search: what we can. If there's not enough, keep looking. */ if (nonzero == NULL) rsv_bytes = search_bytes; - else if ((nonzero - ptr) * GFS2_NBBY >= - ip->i_res->rs_requested) + else if ((nonzero - ptr) * GFS2_NBBY >= requested) rsv_bytes = (nonzero - ptr); if (rsv_bytes) { @@ -1461,17 +1459,16 @@ skip: * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) */ -static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) +static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, + unsigned requested) { - struct gfs2_blkreserv *rs = ip->i_res; - if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) return 0; /* Look for a multi-block reservation. */ if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && - rg_mblk_search(rgd, ip) != BFITNOENT) + rg_mblk_search(rgd, ip, requested) != BFITNOENT) return 1; - if (unclaimed_blocks(rgd) >= rs->rs_requested) + if (unclaimed_blocks(rgd) >= requested) return 1; return 0; @@ -1562,7 +1559,6 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - rs->rs_requested = requested; if (gfs2_assert_warn(sdp, requested)) { error = -EINVAL; goto out; @@ -1606,7 +1602,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) case 0: if (gfs2_rs_active(rs)) { if (unclaimed_blocks(rs->rs_rgd) + - rs->rs_free >= rs->rs_requested) { + rs->rs_free >= requested) { ip->i_rgd = rs->rs_rgd; return 0; } @@ -1616,7 +1612,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) and look for a suitable rgrp. */ gfs2_rs_deltree(rs); } - if (try_rgrp_fit(rs->rs_rgd, ip)) { + if (try_rgrp_fit(rs->rs_rgd, ip, requested)) { if (sdp->sd_args.ar_rgrplvb) gfs2_rgrp_bh_get(rs->rs_rgd); ip->i_rgd = rs->rs_rgd; @@ -1656,8 +1652,6 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) error = -ENOSPC; out: - if (error) - rs->rs_requested = 0; return error; } @@ -1672,15 +1666,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_blkreserv *rs = ip->i_res; - if (!rs) - return; - - if (!rs->rs_free) - gfs2_rs_deltree(rs); - if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - rs->rs_requested = 0; } /** @@ -2021,12 +2008,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, int error; struct gfs2_bitmap *bi; - /* Only happens if there is a bug in gfs2, return something distinctive - * to ensure that it is noticed. - */ - if (ip->i_res->rs_requested == 0) - return -ECANCELED; - /* If we have a reservation, claim blocks from it. */ if (gfs2_rs_active(ip->i_res)) { BUG_ON(!ip->i_res->rs_free); diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index ca6e26729b86..0b0e9cc7e3d9 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -73,14 +73,6 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); extern int gfs2_fitrim(struct file *filp, void __user *argp); -/* This is how to tell if a multi-block reservation is "inplace" reserved: */ -static inline int gfs2_mb_reserved(struct gfs2_inode *ip) -{ - if (ip->i_res && ip->i_res->rs_requested) - return 1; - return 0; -} - /* This is how to tell if a multi-block reservation is in the rgrp tree: */ static inline int gfs2_rs_active(struct gfs2_blkreserv *rs) { diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 41f42cdccbb8..bf2ae9aeee7a 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -28,11 +28,10 @@ struct gfs2_glock; /* reserve either the number of blocks to be allocated plus the rg header * block, or all of the blocks in the rg, whichever is smaller */ -static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) +static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested) { - const struct gfs2_blkreserv *rs = ip->i_res; - if (rs && rs->rs_requested < ip->i_rgd->rd_length) - return rs->rs_requested + 1; + if (requested < ip->i_rgd->rd_length) + return requested + 1; return ip->i_rgd->rd_length; } diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 5404ed1582ff..db330e5518cd 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -739,7 +739,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, goto out_gunlock_q; error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), - blks + gfs2_rg_blocks(ip) + + blks + gfs2_rg_blocks(ip, blks) + RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) goto out_ipres; -- cgit v1.2.3 From 4a993fb1503d11496974bd86c0b7123f63d9c8a2 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 31 Jul 2012 15:21:20 +0100 Subject: GFS2: Add structure to contain rgrp, bitmap, offset tuple This patch introduces a new structure, gfs2_rbm, which is a tuple of a resource group, a bitmap within the resource group and an offset within that bitmap. This is designed to make manipulating these sets of variables easier. There is also a new helper function which converts this representation back to a disk block address. In addition, the rbtree nodes which are used for the reservations were not being correctly initialised, which is now fixed. Also, the tracing was not passing through the inode where it should have been. That is mostly fixed aside from one corner case. This needs to be revisited since there can also be a NULL rgrp in some cases which results in the device being incorrect in the trace. This is intended to be the first step towards cleaning up some of the allocation code, and some further bug fixes. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 2 +- fs/gfs2/incore.h | 15 ++++- fs/gfs2/rgrp.c | 175 ++++++++++++++++++++++++--------------------------- fs/gfs2/rgrp.h | 16 ++--- fs/gfs2/super.c | 2 +- fs/gfs2/trace_gfs2.h | 10 +-- 6 files changed, 105 insertions(+), 115 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 49cd7dd4a9fa..1fd3ae237bdd 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -786,7 +786,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, goto out_rlist; if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(ip, ip->i_res); error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + RES_STATFS + RES_QUOTA, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 52078a161ecd..d5e254604c72 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -102,6 +102,17 @@ struct gfs2_rgrpd { u32 rd_rs_cnt; /* count of current reservations */ }; +struct gfs2_rbm { + struct gfs2_rgrpd *rgd; + struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */ + u32 offset; /* The offset is bitmap relative */ +}; + +static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) +{ + return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset; +} + enum gfs2_state_bits { BH_Pinned = BH_PrivateStart, BH_Escaped = BH_PrivateStart + 1, @@ -251,13 +262,11 @@ struct gfs2_blkreserv { atomic_t rs_sizehint; /* hint of the write size */ /* components used during get_local_rgrp (step 3): */ - struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */ + struct gfs2_rbm rs_rbm; struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ struct rb_node rs_node; /* link to other block reservations */ /* components used during block searches and assignments (step 4): */ - struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */ - u32 rs_biblk; /* start block relative to the bi */ u32 rs_free; /* how many blocks are still free */ /* ancillary quota stuff */ diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index a2b43bb83499..eaa41885a00d 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -192,7 +192,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) */ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) { - u64 startblk = gfs2_rs_startblk(rs); + u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); if (blk >= startblk + rs->rs_free) return 1; @@ -487,6 +487,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) if (!res) error = -ENOMEM; + rb_init_node(&res->rs_node); + down_write(&ip->i_rw_mutex); if (ip->i_res) kmem_cache_free(gfs2_rsrv_cachep, res); @@ -499,8 +501,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) { gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", - rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, - rs->rs_free); + rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), + rs->rs_rbm.offset, rs->rs_free); } /** @@ -508,40 +510,28 @@ static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -static void __rs_deltree(struct gfs2_blkreserv *rs) +static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; if (!gfs2_rs_active(rs)) return; - rgd = rs->rs_rgd; - /* We can't do this: The reason is that when the rgrp is invalidated, - it's in the "middle" of acquiring the glock, but the HOLDER bit - isn't set yet: - BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ - trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); - - if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) - rb_erase(&rs->rs_node, &rgd->rd_rstree); + rgd = rs->rs_rbm.rgd; + trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL); + rb_erase(&rs->rs_node, &rgd->rd_rstree); + rb_init_node(&rs->rs_node); BUG_ON(!rgd->rd_rs_cnt); rgd->rd_rs_cnt--; if (rs->rs_free) { /* return reserved blocks to the rgrp and the ip */ - BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); - rs->rs_rgd->rd_reserved -= rs->rs_free; + BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); + rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; rs->rs_free = 0; - clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); + clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); smp_mb__after_clear_bit(); } - /* We can't change any of the step 1 or step 2 components of the rs. - E.g. We can't set rs_rgd to NULL because the rgd glock is held and - dequeued through this pointer. - Can't: atomic_set(&rs->rs_sizehint, 0); - Can't: rs->rs_rgd = NULL;*/ - rs->rs_bi = NULL; - rs->rs_biblk = 0; } /** @@ -549,17 +539,16 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -void gfs2_rs_deltree(struct gfs2_blkreserv *rs) +void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; - if (!gfs2_rs_active(rs)) - return; - - rgd = rs->rs_rgd; - spin_lock(&rgd->rd_rsspin); - __rs_deltree(rs); - spin_unlock(&rgd->rd_rsspin); + rgd = rs->rs_rbm.rgd; + if (rgd) { + spin_lock(&rgd->rd_rsspin); + __rs_deltree(ip, rs); + spin_unlock(&rgd->rd_rsspin); + } } /** @@ -571,7 +560,7 @@ void gfs2_rs_delete(struct gfs2_inode *ip) { down_write(&ip->i_rw_mutex); if (ip->i_res) { - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(ip, ip->i_res); trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); @@ -596,7 +585,7 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd) spin_lock(&rgd->rd_rsspin); while ((n = rb_first(&rgd->rd_rstree))) { rs = rb_entry(n, struct gfs2_blkreserv, rs_node); - __rs_deltree(rs); + __rs_deltree(NULL, rs); } spin_unlock(&rgd->rd_rsspin); } @@ -1284,7 +1273,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, struct rb_node **newn, *parent = NULL; int rc; struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rgd; + struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; spin_lock(&rgd->rd_rsspin); @@ -1312,8 +1301,8 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, /* Do our reservation work */ rs = ip->i_res; rs->rs_free = amount; - rs->rs_biblk = biblk; - rs->rs_bi = bi; + rs->rs_rbm.offset = biblk; + rs->rs_rbm.bi = bi; rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); @@ -1564,34 +1553,34 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) goto out; } if (gfs2_rs_active(rs)) { - begin = rs->rs_rgd; + begin = rs->rs_rbm.rgd; flags = 0; /* Yoda: Do or do not. There is no try */ } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { - rs->rs_rgd = begin = ip->i_rgd; + rs->rs_rbm.rgd = begin = ip->i_rgd; } else { - rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); + rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } - if (rs->rs_rgd == NULL) + if (rs->rs_rbm.rgd == NULL) return -EBADSLT; while (loops < 3) { rg_locked = 0; - if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { + if (gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { rg_locked = 1; error = 0; } else if (!loops && !gfs2_rs_active(rs) && - rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { + rs->rs_rbm.rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { /* If the rgrp already is maxed out for contenders, we can eliminate it as a "first pass" without even requesting the rgrp glock. */ error = GLR_TRYFAILED; } else { - error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, + error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); if (!error && sdp->sd_args.ar_rgrplvb) { - error = update_rgrp_lvb(rs->rs_rgd); + error = update_rgrp_lvb(rs->rs_rbm.rgd); if (error) { gfs2_glock_dq_uninit(&rs->rs_rgd_gh); return error; @@ -1601,36 +1590,36 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) switch (error) { case 0: if (gfs2_rs_active(rs)) { - if (unclaimed_blocks(rs->rs_rgd) + + if (unclaimed_blocks(rs->rs_rbm.rgd) + rs->rs_free >= requested) { - ip->i_rgd = rs->rs_rgd; + ip->i_rgd = rs->rs_rbm.rgd; return 0; } /* We have a multi-block reservation, but the rgrp doesn't have enough free blocks to satisfy the request. Free the reservation and look for a suitable rgrp. */ - gfs2_rs_deltree(rs); + gfs2_rs_deltree(ip, rs); } - if (try_rgrp_fit(rs->rs_rgd, ip, requested)) { + if (try_rgrp_fit(rs->rs_rbm.rgd, ip, requested)) { if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rgd); - ip->i_rgd = rs->rs_rgd; + gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + ip->i_rgd = rs->rs_rbm.rgd; return 0; } - if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) { + if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) { if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rgd); - try_rgrp_unlink(rs->rs_rgd, &last_unlinked, + gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, ip->i_no_addr); } if (!rg_locked) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); /* fall through */ case GLR_TRYFAILED: - rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd); - rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */ - if (rs->rs_rgd != begin) /* If we didn't wrap */ + rs->rs_rbm.rgd = gfs2_rgrpd_get_next(rs->rs_rbm.rgd); + rs->rs_rbm.rgd = rs->rs_rbm.rgd ? : begin; /* if NULL, wrap */ + if (rs->rs_rbm.rgd != begin) /* If we didn't wrap */ break; flags &= ~LM_FLAG_TRY; @@ -1776,11 +1765,11 @@ do_search: if (rs == NULL) break; - BUG_ON(rs->rs_bi != bi); + BUG_ON(rs->rs_rbm.bi != bi); biblk = BFITNOENT; /* This should jump to the first block after the reservation. */ - goal = rs->rs_biblk + rs->rs_free; + goal = rs->rs_rbm.offset + rs->rs_free; if (goal >= bi->bi_len * GFS2_NBBY) break; } @@ -1805,9 +1794,7 @@ skip: /** * gfs2_alloc_extent - allocate an extent from a given bitmap - * @rgd: the resource group descriptor - * @bi: the bitmap within the rgrp - * @blk: the block within the bitmap + * @rbm: the resource group information * @dinode: TRUE if the first block we allocate is for a dinode * @n: The extent length * @@ -1815,9 +1802,12 @@ skip: * Set the found bits to @new_state to change block's allocation state. * Returns: starting block number of the extent (fs scope) */ -static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, - u32 blk, bool dinode, unsigned int *n) +static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, + unsigned int *n) { + struct gfs2_rgrpd *rgd = rbm->rgd; + struct gfs2_bitmap *bi = rbm->bi; + u32 blk = rbm->offset; const unsigned int elen = *n; u32 goal, rgblk; const u8 *buffer = NULL; @@ -1956,21 +1946,21 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, unsigned int *nblocks) { struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rgd; + struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; struct gfs2_bitmap *bi; - u64 start_block = gfs2_rs_startblk(rs); + u64 start_block = gfs2_rbm_to_block(&rs->rs_rbm); const unsigned int elen = *nblocks; - bi = rs->rs_bi; + bi = rs->rs_rbm.bi; gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { if (gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len, rs->rs_biblk) != GFS2_BLKST_FREE) + bi->bi_len, rs->rs_rbm.offset) != GFS2_BLKST_FREE) break; - gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, + gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_rbm.offset, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); - rs->rs_biblk++; + rs->rs_rbm.offset++; rs->rs_free--; BUG_ON(!rgd->rd_reserved); @@ -1980,7 +1970,7 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); if (!rs->rs_free || *nblocks != elen) - gfs2_rs_deltree(rs); + gfs2_rs_deltree(ip, rs); return start_block; } @@ -2001,40 +1991,37 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; - struct gfs2_rgrpd *rgd; + struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; unsigned int ndata; - u32 goal, blk; /* block, within the rgrp scope */ + u32 goal; /* block, within the rgrp scope */ u64 block; /* block, within the file system scope */ int error; - struct gfs2_bitmap *bi; /* If we have a reservation, claim blocks from it. */ if (gfs2_rs_active(ip->i_res)) { BUG_ON(!ip->i_res->rs_free); - rgd = ip->i_res->rs_rgd; + rbm.rgd = ip->i_res->rs_rbm.rgd; block = claim_reserved_blks(ip, dinode, nblocks); if (*nblocks) goto found_blocks; } - rgd = ip->i_rgd; - - if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; + if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) + goal = ip->i_goal - rbm.rgd->rd_data0; else - goal = rgd->rd_last_alloc; + goal = rbm.rgd->rd_last_alloc; - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); + rbm.offset = rgblk_search(rbm.rgd, goal, GFS2_BLKST_FREE, &rbm.bi); /* Since all blocks are reserved in advance, this shouldn't happen */ - if (blk == BFITNOENT) { + if (rbm.offset == BFITNOENT) { printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", *nblocks); printk(KERN_WARNING "FULL=%d\n", - test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); + test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); goto rgrp_error; } - block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + block = gfs2_alloc_extent(&rbm, dinode, nblocks); found_blocks: ndata = *nblocks; if (dinode) @@ -2052,22 +2039,22 @@ found_blocks: brelse(dibh); } } - if (rgd->rd_free < *nblocks) { + if (rbm.rgd->rd_free < *nblocks) { printk(KERN_WARNING "nblocks=%u\n", *nblocks); goto rgrp_error; } - rgd->rd_free -= *nblocks; + rbm.rgd->rd_free -= *nblocks; if (dinode) { - rgd->rd_dinodes++; - *generation = rgd->rd_igeneration++; + rbm.rgd->rd_dinodes++; + *generation = rbm.rgd->rd_igeneration++; if (*generation == 0) - *generation = rgd->rd_igeneration++; + *generation = rbm.rgd->rd_igeneration++; } - gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); - gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); + gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); + gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -2081,14 +2068,14 @@ found_blocks: gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, ip->i_inode.i_gid); - rgd->rd_free_clone -= *nblocks; - trace_gfs2_block_alloc(ip, rgd, block, *nblocks, + rbm.rgd->rd_free_clone -= *nblocks; + trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); *bn = block; return 0; rgrp_error: - gfs2_rgrp_error(rgd); + gfs2_rgrp_error(rbm.rgd); return -EIO; } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 0b0e9cc7e3d9..c98f6af07e1c 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -46,7 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern int gfs2_rs_alloc(struct gfs2_inode *ip); -extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); +extern void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs); extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); @@ -73,22 +73,16 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); extern int gfs2_fitrim(struct file *filp, void __user *argp); -/* This is how to tell if a multi-block reservation is in the rgrp tree: */ -static inline int gfs2_rs_active(struct gfs2_blkreserv *rs) +/* This is how to tell if a reservation is in the rgrp tree: */ +static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs) { - if (rs && rs->rs_bi) - return 1; - return 0; + return rs && !RB_EMPTY_NODE(&rs->rs_node); } + static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk) { return (bi->bi_start * GFS2_NBBY) + blk; } -static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs) -{ - return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0; -} - #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index fc3168f47a14..3cbac6803038 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1557,7 +1557,7 @@ out_truncate: out_unlock: /* Error path for case 1 */ if (gfs2_rs_active(ip->i_res)) - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(ip, ip->i_res); if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) gfs2_glock_dq(&ip->i_iopen_gh); diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index a25c252fe412..b947aa4dfca4 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -526,12 +526,12 @@ TRACE_EVENT(gfs2_rs, ), TP_fast_assign( - __entry->dev = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0; - __entry->rd_addr = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0; - __entry->rd_free_clone = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0; - __entry->rd_reserved = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0; + __entry->dev = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev : 0; + __entry->rd_addr = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_addr : 0; + __entry->rd_free_clone = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_free_clone : 0; + __entry->rd_reserved = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_reserved : 0; __entry->inum = ip ? ip->i_no_addr : 0; - __entry->start = gfs2_rs_startblk(rs); + __entry->start = gfs2_rbm_to_block(&rs->rs_rbm); __entry->free = rs->rs_free; __entry->func = func; ), -- cgit v1.2.3 From 5b924ae2dcb1cc5e78445a0cedb5a3673bb5ad8a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 1 Aug 2012 20:35:05 +0100 Subject: GFS2: Replace rgblk_search with gfs2_rbm_find This is part of a series of patches which are introducing the gfs2_rbm structure throughout the block allocation code. The main aim of this part is to create a search function which can deal directly with struct gfs2_rbm. In this case it specifies the initial position at which to start the search and also the point at which the search terminates. The net result of this is to clean up the search code and make it rather more readable, and the various possible exceptions which may occur during the search are partitioned into their own functions. There are some bug fixes too. We should not be checking the reservations while allocating extents - the time for that is when we are searching for where to put the extent, not when we've already made that decision. Also, rgblk_search had two uses, and in only one of those cases did it make sense to check for reservations. This is fixed in the new gfs2_rbm_find function, which has a cleaner interface. The reservation checking has been improved by always checking for contiguous reservations, and returning the first free block after all contiguous reservations. This is done under the spin lock to ensure consistancy of the tree. The allocation of extents is now in all cases done by the existing allocation code, and if there is an active reservation, that is updated after the fact. Again this is done under the spin lock, since it entails changing the lookup key for the reservation in question. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 7 + fs/gfs2/rgrp.c | 460 ++++++++++++++++++++++++++++++------------------------- 2 files changed, 257 insertions(+), 210 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index d5e254604c72..99d7c64b5091 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -113,6 +113,13 @@ static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset; } +static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1, + const struct gfs2_rbm *rbm2) +{ + return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) && + (rbm1->offset == rbm2->offset); +} + enum gfs2_state_bits { BH_Pinned = BH_PrivateStart, BH_Escaped = BH_PrivateStart + 1, diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index eaa41885a00d..bd3b926949d0 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -67,10 +67,6 @@ static const char valid_change[16] = { 1, 0, 0, 0 }; -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, - unsigned char old_state, - struct gfs2_bitmap **rbi); - /** * gfs2_setbit - Set a bit in the bitmaps * @rgd: the resource group descriptor @@ -201,36 +197,6 @@ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) return 0; } -/** - * rs_find - Find a rgrp multi-block reservation that contains a given block - * @rgd: The rgrp - * @rgblk: The block we're looking for, relative to the rgrp - */ -static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) -{ - struct rb_node **newn; - int rc; - u64 fsblk = rgblk + rgd->rd_data0; - - spin_lock(&rgd->rd_rsspin); - newn = &rgd->rd_rstree.rb_node; - while (*newn) { - struct gfs2_blkreserv *cur = - rb_entry(*newn, struct gfs2_blkreserv, rs_node); - rc = rs_cmp(fsblk, 1, cur); - if (rc < 0) - newn = &((*newn)->rb_left); - else if (rc > 0) - newn = &((*newn)->rb_right); - else { - spin_unlock(&rgd->rd_rsspin); - return cur; - } - } - spin_unlock(&rgd->rd_rsspin); - return NULL; -} - /** * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing * a block in a given allocation state. @@ -1306,9 +1272,6 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); - /* Do our inode accounting for the reservation */ - /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ - /* Do our rgrp accounting for the reservation */ rgd->rd_reserved += amount; /* blocks reserved */ rgd->rd_rs_cnt++; /* number of in-tree reservations */ @@ -1463,6 +1426,199 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, return 0; } +/** + * gfs2_next_unreserved_block - Return next block that is not reserved + * @rgd: The resource group + * @block: The starting block + * @ip: Ignore any reservations for this inode + * + * If the block does not appear in any reservation, then return the + * block number unchanged. If it does appear in the reservation, then + * keep looking through the tree of reservations in order to find the + * first block number which is not reserved. + */ + +static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, + const struct gfs2_inode *ip) +{ + struct gfs2_blkreserv *rs; + struct rb_node *n; + int rc; + + spin_lock(&rgd->rd_rsspin); + n = rb_first(&rgd->rd_rstree); + while (n) { + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); + rc = rs_cmp(block, 1, rs); + if (rc < 0) + n = n->rb_left; + else if (rc > 0) + n = n->rb_right; + else + break; + } + + if (n) { + while ((rs_cmp(block, 1, rs) == 0) && (ip->i_res != rs)) { + block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; + n = rb_next(&rs->rs_node); + if (n == NULL) + break; + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); + } + } + + spin_unlock(&rgd->rd_rsspin); + return block; +} + +/** + * gfs2_rbm_from_block - Set the rbm based upon rgd and block number + * @rbm: The rbm with rgd already set correctly + * @block: The block number (filesystem relative) + * + * This sets the bi and offset members of an rbm based on a + * resource group and a filesystem relative block number. The + * resource group must be set in the rbm on entry, the bi and + * offset members will be set by this function. + * + * Returns: 0 on success, or an error code + */ + +static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) +{ + u64 rblock = block - rbm->rgd->rd_data0; + u32 goal = (u32)rblock; + int x; + + if (WARN_ON_ONCE(rblock > UINT_MAX)) + return -EINVAL; + + for (x = 0; x < rbm->rgd->rd_length; x++) { + rbm->bi = rbm->rgd->rd_bits + x; + if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { + rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); + return 0; + } + } + + return -E2BIG; +} + +/** + * gfs2_reservation_check_and_update - Check for reservations during block alloc + * @rbm: The current position in the resource group + * + * This checks the current position in the rgrp to see whether there is + * a reservation covering this block. If not then this function is a + * no-op. If there is, then the position is moved to the end of the + * contiguous reservation(s) so that we are pointing at the first + * non-reserved block. + * + * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error + */ + +static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, + const struct gfs2_inode *ip) +{ + u64 block = gfs2_rbm_to_block(rbm); + u64 nblock; + int ret; + + nblock = gfs2_next_unreserved_block(rbm->rgd, block, ip); + if (nblock == block) + return 0; + ret = gfs2_rbm_from_block(rbm, nblock); + if (ret < 0) + return ret; + return 1; +} + +/** + * gfs2_rbm_find - Look for blocks of a particular state + * @rbm: Value/result starting position and final position + * @state: The state which we want to find + * @ip: If set, check for reservations + * @nowrap: Stop looking at the end of the rgrp, rather than wrapping + * around until we've reached the starting point. + * + * Side effects: + * - If looking for free blocks, we set GBF_FULL on each bitmap which + * has no free blocks in it. + * + * Returns: 0 on success, -ENOSPC if there is no block of the requested state + */ + +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, + const struct gfs2_inode *ip, bool nowrap) +{ + struct buffer_head *bh; + struct gfs2_bitmap *initial_bi; + u32 initial_offset; + u32 offset; + u8 *buffer; + int index; + int n = 0; + int iters = rbm->rgd->rd_length; + int ret; + + /* If we are not starting at the beginning of a bitmap, then we + * need to add one to the bitmap count to ensure that we search + * the starting bitmap twice. + */ + if (rbm->offset != 0) + iters++; + + while(1) { + if (test_bit(GBF_FULL, &rbm->bi->bi_flags) && + (state == GFS2_BLKST_FREE)) + goto next_bitmap; + + bh = rbm->bi->bi_bh; + buffer = bh->b_data + rbm->bi->bi_offset; + WARN_ON(!buffer_uptodate(bh)); + if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) + buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; +find_next: + initial_offset = rbm->offset; + offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); + if (offset == BFITNOENT) + goto bitmap_full; + rbm->offset = offset; + if (ip == NULL) + return 0; + + initial_bi = rbm->bi; + ret = gfs2_reservation_check_and_update(rbm, ip); + if (ret == 0) + return 0; + if (ret > 0) { + n += (rbm->bi - initial_bi); + goto find_next; + } + return ret; + +bitmap_full: /* Mark bitmap as full and fall through */ + if ((state == GFS2_BLKST_FREE) && initial_offset == 0) + set_bit(GBF_FULL, &rbm->bi->bi_flags); + +next_bitmap: /* Find next bitmap in the rgrp */ + rbm->offset = 0; + index = rbm->bi - rbm->rgd->rd_bits; + index++; + if (index == rbm->rgd->rd_length) + index = 0; + rbm->bi = &rbm->rgd->rd_bits[index]; + if ((index == 0) && nowrap) + break; + n++; + if (n >= iters) + break; + } + + return -ENOSPC; +} + /** * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes * @rgd: The rgrp @@ -1475,34 +1631,33 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) { - u32 goal = 0, block; - u64 no_addr; + u64 block; struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_glock *gl; struct gfs2_inode *ip; int error; int found = 0; - struct gfs2_bitmap *bi; + struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 }; - while (goal < rgd->rd_data) { + while (1) { down_write(&sdp->sd_log_flush_lock); - block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, true); up_write(&sdp->sd_log_flush_lock); - if (block == BFITNOENT) + if (error == -ENOSPC) + break; + if (WARN_ON_ONCE(error)) break; - block = gfs2_bi2rgd_blk(bi, block); - /* rgblk_search can return a block < goal, so we need to - keep it marching forward. */ - no_addr = block + rgd->rd_data0; - goal = max(block + 1, goal + 1); - if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) + block = gfs2_rbm_to_block(&rbm); + if (gfs2_rbm_from_block(&rbm, block + 1)) + break; + if (*last_unlinked != NO_BLOCK && block <= *last_unlinked) continue; - if (no_addr == skip) + if (block == skip) continue; - *last_unlinked = no_addr; + *last_unlinked = block; - error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); + error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl); if (error) continue; @@ -1692,105 +1847,6 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) return type; } -/** - * rgblk_search - find a block in @state - * @rgd: the resource group descriptor - * @goal: the goal block within the RG (start here to search for avail block) - * @state: GFS2_BLKST_XXX the before-allocation state to find - * @rbi: address of the pointer to the bitmap containing the block found - * - * Walk rgrp's bitmap to find bits that represent a block in @state. - * - * This function never fails, because we wouldn't call it unless we - * know (from reservation results, etc.) that a block is available. - * - * Scope of @goal is just within rgrp, not the whole filesystem. - * Scope of @returned block is just within bitmap, not the whole filesystem. - * - * Returns: the block number found relative to the bitmap rbi - */ - -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state, - struct gfs2_bitmap **rbi) -{ - struct gfs2_bitmap *bi = NULL; - const u32 length = rgd->rd_length; - u32 biblk = BFITNOENT; - unsigned int buf, x; - const u8 *buffer = NULL; - - *rbi = NULL; - /* Find bitmap block that contains bits for goal block */ - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - /* Convert scope of "goal" from rgrp-wide to within found bit block */ - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { - goal -= bi->bi_start * GFS2_NBBY; - goto do_search; - } - } - buf = 0; - goal = 0; - -do_search: - /* Search (up to entire) bitmap in this rgrp for allocatable block. - "x <= length", instead of "x < length", because we typically start - the search in the middle of a bit block, but if we can't find an - allocatable block anywhere else, we want to be able wrap around and - search in the first part of our first-searched bit block. */ - for (x = 0; x <= length; x++) { - bi = rgd->rd_bits + buf; - - if (test_bit(GBF_FULL, &bi->bi_flags) && - (state == GFS2_BLKST_FREE)) - goto skip; - - /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone - bitmaps, so we must search the originals for that. */ - buffer = bi->bi_bh->b_data + bi->bi_offset; - WARN_ON(!buffer_uptodate(bi->bi_bh)); - if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) - buffer = bi->bi_clone + bi->bi_offset; - - while (1) { - struct gfs2_blkreserv *rs; - u32 rgblk; - - biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); - if (biblk == BFITNOENT) - break; - /* Check if this block is reserved() */ - rgblk = gfs2_bi2rgd_blk(bi, biblk); - rs = rs_find(rgd, rgblk); - if (rs == NULL) - break; - - BUG_ON(rs->rs_rbm.bi != bi); - biblk = BFITNOENT; - /* This should jump to the first block after the - reservation. */ - goal = rs->rs_rbm.offset + rs->rs_free; - if (goal >= bi->bi_len * GFS2_NBBY) - break; - } - if (biblk != BFITNOENT) - break; - - if ((goal == 0) && (state == GFS2_BLKST_FREE)) - set_bit(GBF_FULL, &bi->bi_flags); - - /* Try next bitmap block (wrap back to rgrp header if at end) */ -skip: - buf++; - buf %= length; - goal = 0; - } - - if (biblk != BFITNOENT) - *rbi = bi; - - return biblk; -} /** * gfs2_alloc_extent - allocate an extent from a given bitmap @@ -1809,9 +1865,8 @@ static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, struct gfs2_bitmap *bi = rbm->bi; u32 blk = rbm->offset; const unsigned int elen = *n; - u32 goal, rgblk; + u32 goal; const u8 *buffer = NULL; - struct gfs2_blkreserv *rs; *n = 0; buffer = bi->bi_bh->b_data + bi->bi_offset; @@ -1824,10 +1879,6 @@ static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, goal++; if (goal >= (bi->bi_len * GFS2_NBBY)) break; - rgblk = gfs2_bi2rgd_blk(bi, goal); - rs = rs_find(rgd, rgblk); - if (rs) /* Oops, we bumped into someone's reservation */ - break; if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != GFS2_BLKST_FREE) break; @@ -1933,46 +1984,41 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) } /** - * claim_reserved_blks - Claim previously reserved blocks - * @ip: the inode that's claiming the reservation - * @dinode: 1 if this block is a dinode block, otherwise data block - * @nblocks: desired extent length + * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation + * @ip: The inode we have just allocated blocks for + * @rbm: The start of the allocated blocks + * @len: The extent length * - * Lay claim to previously reserved blocks. - * Returns: Starting block number of the blocks claimed. - * Sets *nblocks to the actual extent length allocated. + * Adjusts a reservation after an allocation has taken place. If the + * reservation does not match the allocation, or if it is now empty + * then it is removed. */ -static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, - unsigned int *nblocks) + +static void gfs2_adjust_reservation(struct gfs2_inode *ip, + const struct gfs2_rbm *rbm, unsigned len) { struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; - struct gfs2_bitmap *bi; - u64 start_block = gfs2_rbm_to_block(&rs->rs_rbm); - const unsigned int elen = *nblocks; - - bi = rs->rs_rbm.bi; - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); + struct gfs2_rgrpd *rgd = rbm->rgd; + unsigned rlen; + u64 block; + int ret; - for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { - if (gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len, rs->rs_rbm.offset) != GFS2_BLKST_FREE) - break; - gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_rbm.offset, - dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); - rs->rs_rbm.offset++; - rs->rs_free--; - - BUG_ON(!rgd->rd_reserved); - rgd->rd_reserved--; - dinode = false; + spin_lock(&rgd->rd_rsspin); + if (gfs2_rs_active(rs)) { + if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { + block = gfs2_rbm_to_block(rbm); + ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); + rlen = min(rs->rs_free, len); + rs->rs_free -= rlen; + rgd->rd_reserved -= rlen; + trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); + if (rs->rs_free && !ret) + goto out; + } + __rs_deltree(ip, rs); } - - trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); - if (!rs->rs_free || *nblocks != elen) - gfs2_rs_deltree(ip, rs); - - return start_block; +out: + spin_unlock(&rgd->rd_rsspin); } /** @@ -1993,36 +2039,30 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, struct buffer_head *dibh; struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; unsigned int ndata; - u32 goal; /* block, within the rgrp scope */ + u64 goal; u64 block; /* block, within the file system scope */ int error; - /* If we have a reservation, claim blocks from it. */ - if (gfs2_rs_active(ip->i_res)) { - BUG_ON(!ip->i_res->rs_free); - rbm.rgd = ip->i_res->rs_rbm.rgd; - block = claim_reserved_blks(ip, dinode, nblocks); - if (*nblocks) - goto found_blocks; - } - - if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) - goal = ip->i_goal - rbm.rgd->rd_data0; + if (gfs2_rs_active(ip->i_res)) + goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm); + else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) + goal = ip->i_goal; else - goal = rbm.rgd->rd_last_alloc; + goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; - rbm.offset = rgblk_search(rbm.rgd, goal, GFS2_BLKST_FREE, &rbm.bi); + gfs2_rbm_from_block(&rbm, goal); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); /* Since all blocks are reserved in advance, this shouldn't happen */ - if (rbm.offset == BFITNOENT) { - printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", *nblocks); - printk(KERN_WARNING "FULL=%d\n", - test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); + if (error) { + fs_warn(sdp, "error=%d, nblocks=%u, full=%d\n", error, *nblocks, + test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); goto rgrp_error; } block = gfs2_alloc_extent(&rbm, dinode, nblocks); -found_blocks: + if (gfs2_rs_active(ip->i_res)) + gfs2_adjust_reservation(ip, &rbm, *nblocks); ndata = *nblocks; if (dinode) ndata--; -- cgit v1.2.3 From 3983903a712e74548fa08ef25d68e55b8e4349c6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 3 Aug 2012 11:10:30 +0100 Subject: GFS2: Update gfs2_get_block_type() to use rbm Use the new gfs2_rbm_from_block() function to replace an open coded version of the same code. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index bd3b926949d0..0c1be38f8370 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1824,27 +1824,14 @@ void gfs2_inplace_release(struct gfs2_inode *ip) static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) { - struct gfs2_bitmap *bi = NULL; - u32 length, rgrp_block, buf_block; - unsigned int buf; - unsigned char type; - - length = rgd->rd_length; - rgrp_block = block - rgd->rd_data0; - - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; - } - - gfs2_assert(rgd->rd_sbd, buf < length); - buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; + struct gfs2_rbm rbm = { .rgd = rgd, }; + int ret; - type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len, buf_block); + ret = gfs2_rbm_from_block(&rbm, block); + WARN_ON_ONCE(ret != 0); - return type; + return gfs2_testbit(rgd, rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, + rbm.bi->bi_len, rbm.offset); } -- cgit v1.2.3 From 3b1d0b9d0b6f4b76293c8f30cc95aa946bd34150 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 3 Aug 2012 11:23:28 +0100 Subject: GFS2: Update rgblk_free() to use rbm Replace open coded version with a call to gfs2_rbm_from_block() Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 0c1be38f8370..06476b34a1b9 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1890,46 +1890,30 @@ static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, u32 blen, unsigned char new_state) { - struct gfs2_rgrpd *rgd; - struct gfs2_bitmap *bi = NULL; - u32 length, rgrp_blk, buf_blk; - unsigned int buf; + struct gfs2_rbm rbm; - rgd = gfs2_blk2rgrpd(sdp, bstart, 1); - if (!rgd) { + rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); + if (!rbm.rgd) { if (gfs2_consist(sdp)) fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); return NULL; } - length = rgd->rd_length; - - rgrp_blk = bstart - rgd->rd_data0; - while (blen--) { - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; + gfs2_rbm_from_block(&rbm, bstart); + bstart++; + if (!rbm.bi->bi_clone) { + rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size, + GFP_NOFS | __GFP_NOFAIL); + memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset, + rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, + rbm.bi->bi_len); } - - gfs2_assert(rgd->rd_sbd, buf < length); - - buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY; - rgrp_blk++; - - if (!bi->bi_clone) { - bi->bi_clone = kmalloc(bi->bi_bh->b_size, - GFP_NOFS | __GFP_NOFAIL); - memcpy(bi->bi_clone + bi->bi_offset, - bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len); - } - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, NULL, bi, buf_blk, new_state); + gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); + gfs2_setbit(rbm.rgd, NULL, rbm.bi, rbm.offset, new_state); } - return rgd; + return rbm.rgd; } /** -- cgit v1.2.3 From 24d634e8f3b43fe2eb7c7d66567de7aba8edc308 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Sun, 5 Aug 2012 22:04:08 -0700 Subject: GFS2: Use RB_CLEAR_NODE() rather than rb_init_node() gfs2 calls RB_EMPTY_NODE() to check if nodes are not on an rbtree. The corresponding initialization function is RB_CLEAR_NODE(). rb_init_node() was never clearly defined and is going away. Signed-off-by: Michel Lespinasse Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 06476b34a1b9..7ce22d8c489b 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -453,7 +453,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) if (!res) error = -ENOMEM; - rb_init_node(&res->rs_node); + RB_CLEAR_NODE(&res->rs_node); down_write(&ip->i_rw_mutex); if (ip->i_res) @@ -486,7 +486,7 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) rgd = rs->rs_rbm.rgd; trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL); rb_erase(&rs->rs_node, &rgd->rd_rstree); - rb_init_node(&rs->rs_node); + RB_CLEAR_NODE(&rs->rs_node); BUG_ON(!rgd->rd_rs_cnt); rgd->rd_rs_cnt--; -- cgit v1.2.3 From 5d50d5324612d28c47b9361e5424f13a19c888cd Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 7 Aug 2012 13:47:12 +0100 Subject: GFS2: Fix case where reservation finished at end of rgrp One corner case which the original patch failed to take into account was when there is a reservation which ended such that the following block was one beyond the end of the rgrp in question. This extra test fixes that case. Signed-off-by: Steven Whitehouse Reported-by: Bob Peterson Tested-by: Bob Peterson --- fs/gfs2/rgrp.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 7ce22d8c489b..c17029a92b8f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1596,6 +1596,12 @@ find_next: n += (rbm->bi - initial_bi); goto find_next; } + if (ret == -E2BIG) { + index = 0; + rbm->offset = 0; + n += (rbm->bi - initial_bi); + goto res_covered_end_of_rgrp; + } return ret; bitmap_full: /* Mark bitmap as full and fall through */ @@ -1608,6 +1614,7 @@ next_bitmap: /* Find next bitmap in the rgrp */ index++; if (index == rbm->rgd->rd_length) index = 0; +res_covered_end_of_rgrp: rbm->bi = &rbm->rgd->rd_bits[index]; if ((index == 0) && nowrap) break; -- cgit v1.2.3 From 8d8b752a0f55282498b918f6a28f87ee2bcf19c3 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 7 Aug 2012 13:28:17 -0400 Subject: GFS2: rbm code cleanup This patch fixes a few small rbm related things. First, it fixes a corner case where the rbm needs to switch bitmaps and wasn't adjusting its buffer pointer. Second, there's a white space issue fixed. Third, the logic in function gfs2_rbm_from_block was optimized a bit. Lastly, a check for goal block overflows was added to function gfs2_alloc_blocks. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c17029a92b8f..c26711882a6b 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -467,7 +467,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) { gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", - rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), + rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), rs->rs_rbm.offset, rs->rs_free); } @@ -1493,16 +1493,18 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) if (WARN_ON_ONCE(rblock > UINT_MAX)) return -EINVAL; + if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) + return -E2BIG; for (x = 0; x < rbm->rgd->rd_length; x++) { rbm->bi = rbm->rgd->rd_bits + x; if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); - return 0; + break; } } - return -E2BIG; + return 0; } /** @@ -1579,7 +1581,6 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, WARN_ON(!buffer_uptodate(bh)); if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; -find_next: initial_offset = rbm->offset; offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); if (offset == BFITNOENT) @@ -1594,7 +1595,7 @@ find_next: return 0; if (ret > 0) { n += (rbm->bi - initial_bi); - goto find_next; + goto next_iter; } if (ret == -E2BIG) { index = 0; @@ -1619,6 +1620,7 @@ res_covered_end_of_rgrp: if ((index == 0) && nowrap) break; n++; +next_iter: if (n >= iters) break; } @@ -2028,6 +2030,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, else goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; + if ((goal < rbm.rgd->rd_data0) || + (goal >= rbm.rgd->rd_data0 + rbm.rgd->rd_data)) + rbm.rgd = gfs2_blk2rgrpd(sdp, goal, 1); + gfs2_rbm_from_block(&rbm, goal); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); -- cgit v1.2.3 From 8e711e100fd63b9cbf095030d45fd5ee2fa4c995 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 9 Aug 2012 12:48:42 -0500 Subject: GFS2: change function gfs2_direct_IO to use a normal gfs2_glock_dq This patch changes function gfs2_direct_IO so that it uses a normal call to gfs2_glock_dq rather than a call to a multiple-dq of one item. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 00eaa83871b7..01c4975da4bc 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1024,7 +1024,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, offset, nr_segs, gfs2_get_block_direct, NULL, NULL, 0); out: - gfs2_glock_dq_m(1, &gh); + gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); return rv; } -- cgit v1.2.3 From 4abb6ad9eae0aebfcec4f188a408447cc4ea1cb4 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 9 Aug 2012 12:48:43 -0500 Subject: GFS2: inline __gfs2_glock_schedule_for_reclaim Since function gfs2_glock_schedule_for_reclaim is only two significant lines, we can eliminate it, simplifying the code and making it more readable. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1ed81f40da0d..67f3e42d4bd2 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -185,20 +185,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) spin_unlock(&lru_lock); } -/** - * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list - * @gl: the glock - * - * If the glock is demotable, then we add it (or move it) to the end - * of the glock LRU list. - */ - -static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) -{ - if (demote_ok(gl)) - gfs2_glock_add_to_lru(gl); -} - /** * gfs2_glock_put_nolock() - Decrement reference count on glock * @gl: The glock to put @@ -1121,8 +1107,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh) !test_bit(GLF_DEMOTE, &gl->gl_flags)) fast_path = 1; } - if (!test_bit(GLF_LFLUSH, &gl->gl_flags)) - __gfs2_glock_schedule_for_reclaim(gl); + if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) + gfs2_glock_add_to_lru(gl); + trace_gfs2_glock_queue(gh, 0); spin_unlock(&gl->gl_spin); if (likely(fast_path)) -- cgit v1.2.3 From 07a790494260e102eb42840537af82e84d2ab766 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 9 Aug 2012 12:48:44 -0500 Subject: GFS2: Combine functions gfs2_glock_wait and wait_on_holder Function gfs2_glock_wait only called function wait_on_holder and returned its return code, so they were combined for readability. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 67f3e42d4bd2..5c8790960735 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -869,7 +869,14 @@ static int gfs2_glock_demote_wait(void *word) return 0; } -static void wait_on_holder(struct gfs2_holder *gh) +/** + * gfs2_glock_wait - wait on a glock acquisition + * @gh: the glock holder + * + * Returns: 0 on success + */ + +int gfs2_glock_wait(struct gfs2_holder *gh) { unsigned long time1 = jiffies; @@ -880,6 +887,7 @@ static void wait_on_holder(struct gfs2_holder *gh) gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + GL_GLOCK_HOLD_INCR, GL_GLOCK_MAX_HOLD); + return gh->gh_error; } static void wait_on_demote(struct gfs2_glock *gl) @@ -915,19 +923,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, trace_gfs2_demote_rq(gl); } -/** - * gfs2_glock_wait - wait on a glock acquisition - * @gh: the glock holder - * - * Returns: 0 on success - */ - -int gfs2_glock_wait(struct gfs2_holder *gh) -{ - wait_on_holder(gh); - return gh->gh_error; -} - void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) { struct va_format vaf; -- cgit v1.2.3 From 81e1d45061d09f296d7664af70d3334840d123a0 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 9 Aug 2012 12:48:45 -0500 Subject: GFS2: Combine functions gfs2_glock_dq_wait and wait_on_demote Function gfs2_glock_dq_wait called two-line function wait_on_demote, so they were combined. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 5c8790960735..fca6a873dcf3 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -890,12 +890,6 @@ int gfs2_glock_wait(struct gfs2_holder *gh) return gh->gh_error; } -static void wait_on_demote(struct gfs2_glock *gl) -{ - might_sleep(); - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); -} - /** * handle_callback - process a demote request * @gl: the glock @@ -1123,7 +1117,8 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; gfs2_glock_dq(gh); - wait_on_demote(gl); + might_sleep(); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); } /** -- cgit v1.2.3 From e5dc76b9afcfb936818261d65f6f9e1f2c346d59 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 9 Aug 2012 12:48:46 -0500 Subject: GFS2: Eliminate redundant calls to may_grant Function add_to_queue was checking may_grant for the passed-in holder for every iteration of its gh2 loop. Now it only checks it once at the beginning to see if a try lock is futile. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index fca6a873dcf3..e6c2fd53cab2 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -954,7 +954,7 @@ __acquires(&gl->gl_spin) struct gfs2_sbd *sdp = gl->gl_sbd; struct list_head *insert_pt = NULL; struct gfs2_holder *gh2; - int try_lock = 0; + int try_futile = 0; BUG_ON(gh->gh_owner_pid == NULL); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) @@ -962,7 +962,7 @@ __acquires(&gl->gl_spin) if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { if (test_bit(GLF_LOCK, &gl->gl_flags)) - try_lock = 1; + try_futile = !may_grant(gl, gh); if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) goto fail; } @@ -971,9 +971,8 @@ __acquires(&gl->gl_spin) if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) goto trap_recursive; - if (try_lock && - !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && - !may_grant(gl, gh)) { + if (try_futile && + !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { fail: gh->gh_error = GLR_TRYFAILED; gfs2_holder_wake(gh); -- cgit v1.2.3 From 29c05b205d4d30248982d5167bb142146db89bd3 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 9 Aug 2012 12:48:47 -0500 Subject: GFS2: Eliminate unnecessary check for state > 3 in bitfit Function gfs2_bitfit was checking for state > 3, but that's impossible since it is only called from rgblk_search, which receives only GFS2_BLKST_ constants. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c26711882a6b..47d2346575ad 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -228,8 +228,6 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, u64 mask = 0x5555555555555555ULL; u32 bit; - BUG_ON(state > 3); - /* Mask off bits we don't care about at the start of the search */ mask <<= spoint; tmp = gfs2_bit_search(ptr, mask, state); -- cgit v1.2.3 From c04a2ef3a8b51cf58d8ff18bb4d6af17252f0fb6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 13 Aug 2012 11:14:57 +0100 Subject: GFS2: Use rbm for gfs2_testbit() Change the arguments to gfs2_testbit() so that it now just takes an rbm specifying the position of the two bit entry to return. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 72 +++++++++++++++++++++++----------------------------------- 1 file changed, 28 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 47d2346575ad..3a288cec5af0 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -117,30 +117,21 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, /** * gfs2_testbit - test a bit in the bitmaps - * @rgd: the resource group descriptor - * @buffer: the buffer that holds the bitmaps - * @buflen: the length (in bytes) of the buffer - * @block: the block to read + * @rbm: The bit to test * + * Returns: The two bit block state of the requested bit */ -static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, - const unsigned char *buffer, - unsigned int buflen, u32 block) +static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) { - const unsigned char *byte, *end; - unsigned char cur_state; + const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; + const u8 *byte; unsigned int bit; - byte = buffer + (block / GFS2_NBBY); - bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; - end = buffer + buflen; - - gfs2_assert(rgd->rd_sbd, byte < end); - - cur_state = (*byte >> bit) & GFS2_BIT_MASK; + byte = buffer + (rbm->offset / GFS2_NBBY); + bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - return cur_state; + return (*byte >> bit) & GFS2_BIT_MASK; } /** @@ -1837,8 +1828,7 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) ret = gfs2_rbm_from_block(&rbm, block); WARN_ON_ONCE(ret != 0); - return gfs2_testbit(rgd, rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, - rbm.bi->bi_len, rbm.offset); + return gfs2_testbit(&rbm); } @@ -1846,42 +1836,35 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) * gfs2_alloc_extent - allocate an extent from a given bitmap * @rbm: the resource group information * @dinode: TRUE if the first block we allocate is for a dinode - * @n: The extent length + * @n: The extent length (value/result) * - * Add the found bitmap buffer to the transaction. + * Add the bitmap buffer to the transaction. * Set the found bits to @new_state to change block's allocation state. - * Returns: starting block number of the extent (fs scope) */ -static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, +static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, unsigned int *n) { - struct gfs2_rgrpd *rgd = rbm->rgd; - struct gfs2_bitmap *bi = rbm->bi; - u32 blk = rbm->offset; + struct gfs2_rbm pos = { .rgd = rbm->rgd, }; const unsigned int elen = *n; - u32 goal; - const u8 *buffer = NULL; + u64 block; + int ret; - *n = 0; - buffer = bi->bi_bh->b_data + bi->bi_offset; - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, bi->bi_clone, bi, blk, + *n = 1; + block = gfs2_rbm_to_block(rbm); + gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); + gfs2_setbit(rbm->rgd, rbm->bi->bi_clone, rbm->bi, rbm->offset, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); - (*n)++; - goal = blk; + block++; while (*n < elen) { - goal++; - if (goal >= (bi->bi_len * GFS2_NBBY)) - break; - if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != - GFS2_BLKST_FREE) + ret = gfs2_rbm_from_block(&pos, block); + WARN_ON(ret); + if (gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; - gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED); + gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); + gfs2_setbit(pos.rgd, pos.bi->bi_clone, pos.bi, pos.offset, GFS2_BLKST_USED); (*n)++; + block++; } - blk = gfs2_bi2rgd_blk(bi, blk); - rgd->rd_last_alloc = blk + *n - 1; - return rgd->rd_data0 + blk; } /** @@ -2042,7 +2025,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, goto rgrp_error; } - block = gfs2_alloc_extent(&rbm, dinode, nblocks); + gfs2_alloc_extent(&rbm, dinode, nblocks); + block = gfs2_rbm_to_block(&rbm); if (gfs2_rs_active(ip->i_res)) gfs2_adjust_reservation(ip, &rbm, *nblocks); ndata = *nblocks; -- cgit v1.2.3 From 3e6339dd2850353781513dec42a8d56690e9e1db Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 13 Aug 2012 11:37:51 +0100 Subject: GFS2: Use rbm for gfs2_setbit() Use the rbm structure for gfs2_setbit() in order to simplify the arguments to the function. We have to add a bool to control whether the clone bitmap should be updated (if it exists) but otherwise it is a more or less direct substitution. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 46 ++++++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3a288cec5af0..55a2651666c9 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -69,47 +69,42 @@ static const char valid_change[16] = { /** * gfs2_setbit - Set a bit in the bitmaps - * @rgd: the resource group descriptor - * @buf2: the clone buffer that holds the bitmaps - * @bi: the bitmap structure - * @block: the block to set + * @rbm: The position of the bit to set + * @do_clone: Also set the clone bitmap, if it exists * @new_state: the new state of the block * */ -static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, - struct gfs2_bitmap *bi, u32 block, +static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, unsigned char new_state) { unsigned char *byte1, *byte2, *end, cur_state; - unsigned int buflen = bi->bi_len; - const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; + unsigned int buflen = rbm->bi->bi_len; + const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY); - end = bi->bi_bh->b_data + bi->bi_offset + buflen; + byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); + end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; BUG_ON(byte1 >= end); cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; if (unlikely(!valid_change[new_state * 4 + cur_state])) { - printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " - "new_state=%d\n", - (unsigned long long)block, cur_state, new_state); - printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", - (unsigned long long)rgd->rd_addr, - (unsigned long)bi->bi_start); - printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", - (unsigned long)bi->bi_offset, - (unsigned long)bi->bi_len); + printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " + "new_state=%d\n", rbm->offset, cur_state, new_state); + printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", + (unsigned long long)rbm->rgd->rd_addr, + rbm->bi->bi_start); + printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", + rbm->bi->bi_offset, rbm->bi->bi_len); dump_stack(); - gfs2_consist_rgrpd(rgd); + gfs2_consist_rgrpd(rbm->rgd); return; } *byte1 ^= (cur_state ^ new_state) << bit; - if (buf2) { - byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY); + if (do_clone && rbm->bi->bi_clone) { + byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; *byte2 ^= (cur_state ^ new_state) << bit; } @@ -1852,8 +1847,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, *n = 1; block = gfs2_rbm_to_block(rbm); gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); - gfs2_setbit(rbm->rgd, rbm->bi->bi_clone, rbm->bi, rbm->offset, - dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); + gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); @@ -1861,7 +1855,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, if (gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); - gfs2_setbit(pos.rgd, pos.bi->bi_clone, pos.bi, pos.offset, GFS2_BLKST_USED); + gfs2_setbit(&pos, true, GFS2_BLKST_USED); (*n)++; block++; } @@ -1900,7 +1894,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, rbm.bi->bi_len); } gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); - gfs2_setbit(rbm.rgd, NULL, rbm.bi, rbm.offset, new_state); + gfs2_setbit(&rbm, false, new_state); } return rbm.rgd; -- cgit v1.2.3 From 2b9731e8bb1bdc6b9da149f4e482a071747207f3 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 20 Aug 2012 17:07:49 +0100 Subject: GFS2: Fix ->show_options() for statfs slow The ->show_options() function for GFS2 was not correctly displaying the value when statfs slow in in use. Signed-off-by: Steven Whitehouse Reported-by: Milos Jakubicek --- fs/gfs2/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 3cbac6803038..79cac7057691 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1366,6 +1366,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) val = sdp->sd_tune.gt_statfs_quantum; if (val != 30) seq_printf(s, ",statfs_quantum=%d", val); + else if (sdp->sd_tune.gt_statfs_slow) + seq_puts(s, ",statfs_quantum=0"); val = sdp->sd_tune.gt_quota_quantum; if (val != 60) seq_printf(s, ",quota_quantum=%d", val); -- cgit v1.2.3 From 137834a696fd51ef8c710a0ad854b585027c7df0 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 23 Aug 2012 13:43:40 +0100 Subject: GFS2: Fall back to ignoring reservations, if there are no other blocks left When we get to the stage of allocating blocks, we know that the resource group in question must contain enough free blocks, otherwise gfs2_inplace_reserve() would have failed. So if we are left with only free blocks which are reserved, then we must use those. This can happen if another node has sneeked in and use some blocks reserved on this node, for example. Generally this will happen very rarely and only when the resouce group is nearly full. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 55a2651666c9..30c864e70298 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -2012,6 +2012,11 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_rbm_from_block(&rbm, goal); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); + if (error == -ENOSPC) { + gfs2_rbm_from_block(&rbm, goal); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, false); + } + /* Since all blocks are reserved in advance, this shouldn't happen */ if (error) { fs_warn(sdp, "error=%d, nblocks=%u, full=%d\n", error, *nblocks, -- cgit v1.2.3 From 9e733d3923fb0e4eeae7b827019332d246576a22 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 23 Aug 2012 15:37:59 +0100 Subject: GFS2: Improve block reservation tracing This patch improves the tracing of block reservations by removing some corner cases and also providing more useful detail in the traces. A new field is added to the reservation structure to contain the inode number. This is used since in certain contexts it is not possible to access the inode itself to obtain this information. As a result we can then display the inode number for all tracepoints and also in case we dump the resource group. The "del" tracepoint operation has been removed. This could be called with the reservation rgrp set to NULL. That resulted in not printing the device number, and thus making the information largely useless anyway. Also, the conditional on the rgrp being NULL can then be removed from the tracepoint. After this change, all the block reservation tracepoint calls will be called with the rgrp information. The existing ins,clm and tdel calls to the block reservation tracepoint are sufficient to track the entire life of the block reservation. In gfs2_block_alloc() the error detection is updated to print out the inode number of the problematic inode. This can then be compared against the information in the glock dump,tracepoints, etc. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 6 ++---- fs/gfs2/rgrp.c | 22 ++++++++++------------ fs/gfs2/trace_gfs2.h | 18 ++++++++---------- 3 files changed, 20 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 99d7c64b5091..6aaa07c7c731 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -268,13 +268,11 @@ struct gfs2_blkreserv { /* components used during write (step 1): */ atomic_t rs_sizehint; /* hint of the write size */ - /* components used during get_local_rgrp (step 3): */ - struct gfs2_rbm rs_rbm; struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ struct rb_node rs_node; /* link to other block reservations */ - - /* components used during block searches and assignments (step 4): */ + struct gfs2_rbm rs_rbm; /* Start of reservation */ u32 rs_free; /* how many blocks are still free */ + u64 rs_inum; /* Inode number for reservation */ /* ancillary quota stuff */ struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 30c864e70298..87ee0b70f818 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -448,10 +448,11 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) return error; } -static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) +static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) { - gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", - rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), + gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", + (unsigned long long)rs->rs_inum, + (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), rs->rs_rbm.offset, rs->rs_free); } @@ -468,7 +469,7 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) return; rgd = rs->rs_rbm.rgd; - trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL); + trace_gfs2_rs(rs, TRACE_RS_TREEDEL); rb_erase(&rs->rs_node, &rgd->rd_rstree); RB_CLEAR_NODE(&rs->rs_node); BUG_ON(!rgd->rd_rs_cnt); @@ -511,7 +512,6 @@ void gfs2_rs_delete(struct gfs2_inode *ip) down_write(&ip->i_rw_mutex); if (ip->i_res) { gfs2_rs_deltree(ip, ip->i_res); - trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); ip->i_res = NULL; @@ -1253,6 +1253,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, rs->rs_free = amount; rs->rs_rbm.offset = biblk; rs->rs_rbm.bi = bi; + rs->rs_inum = ip->i_no_addr; rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); @@ -1260,7 +1261,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, rgd->rd_reserved += amount; /* blocks reserved */ rgd->rd_rs_cnt++; /* number of in-tree reservations */ spin_unlock(&rgd->rd_rsspin); - trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); + trace_gfs2_rs(rs, TRACE_RS_INSERT); return rs; } @@ -1966,7 +1967,7 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, rlen = min(rs->rs_free, len); rs->rs_free -= rlen; rgd->rd_reserved -= rlen; - trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); + trace_gfs2_rs(rs, TRACE_RS_CLAIM); if (rs->rs_free && !ret) goto out; } @@ -2005,10 +2006,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, else goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; - if ((goal < rbm.rgd->rd_data0) || - (goal >= rbm.rgd->rd_data0 + rbm.rgd->rd_data)) - rbm.rgd = gfs2_blk2rgrpd(sdp, goal, 1); - gfs2_rbm_from_block(&rbm, goal); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); @@ -2019,7 +2016,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, /* Since all blocks are reserved in advance, this shouldn't happen */ if (error) { - fs_warn(sdp, "error=%d, nblocks=%u, full=%d\n", error, *nblocks, + fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n", + (unsigned long long)ip->i_no_addr, error, *nblocks, test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); goto rgrp_error; } diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index b947aa4dfca4..bbdc78af60ca 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -509,10 +509,9 @@ TRACE_EVENT(gfs2_block_alloc, /* Keep track of multi-block reservations as they are allocated/freed */ TRACE_EVENT(gfs2_rs, - TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs, - u8 func), + TP_PROTO(const struct gfs2_blkreserv *rs, u8 func), - TP_ARGS(ip, rs, func), + TP_ARGS(rs, func), TP_STRUCT__entry( __field( dev_t, dev ) @@ -526,18 +525,17 @@ TRACE_EVENT(gfs2_rs, ), TP_fast_assign( - __entry->dev = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev : 0; - __entry->rd_addr = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_addr : 0; - __entry->rd_free_clone = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_free_clone : 0; - __entry->rd_reserved = rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_reserved : 0; - __entry->inum = ip ? ip->i_no_addr : 0; + __entry->dev = rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev; + __entry->rd_addr = rs->rs_rbm.rgd->rd_addr; + __entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone; + __entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved; + __entry->inum = rs->rs_inum; __entry->start = gfs2_rbm_to_block(&rs->rs_rbm); __entry->free = rs->rs_free; __entry->func = func; ), - TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s " - "f:%lu", + TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, -- cgit v1.2.3 From c743ffd09fa7d3464c6f74767a3ae2ca5dc3ebf7 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Sat, 25 Aug 2012 18:21:47 +0100 Subject: GFS2: Fix unclaimed_blocks() wrapping bug and clean up When rgd->rd_free_clone is less than rgd->rd_reserved, the unclaimed_blocks() calculation would wrap and produce incorrect results. This patch checks for this condition when this function is called from gfs2_mblk_search() In addition, the use of this particular function in other places in the code has been dropped by means of a general clean up of gfs2_inplace_reserve(). This function is now much easier to follow. Also the setting of the rgd->rd_last_alloc field is corrected. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 193 ++++++++++++++++++++++++++------------------------------- 1 file changed, 88 insertions(+), 105 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 87ee0b70f818..886954126704 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1231,7 +1231,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, BUG_ON(!ip->i_res); BUG_ON(gfs2_rs_active(rs)); /* Figure out where to put new node */ - /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ + while (*newn) { struct gfs2_blkreserv *cur = rb_entry(*newn, struct gfs2_blkreserv, rs_node); @@ -1276,17 +1276,16 @@ static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) /** * rg_mblk_search - find a group of multiple free blocks * @rgd: the resource group descriptor - * @rs: the block reservation * @ip: pointer to the inode for which we're reserving blocks + * @requested: number of blocks required for this allocation * * This is very similar to rgblk_search, except we're looking for whole * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing * on aligned dwords for speed's sake. * - * Returns: 0 if successful or BFITNOENT if there isn't enough free space */ -static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested) +static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested) { struct gfs2_bitmap *bi = rgd->rd_bits; const u32 length = rgd->rd_length; @@ -1299,11 +1298,16 @@ static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigne u32 best_rs_bytes, unclaimed; int best_rs_blocks; + if ((rgd->rd_free_clone < rgd->rd_reserved) || + (unclaimed_blocks(rgd) < max(requested, RGRP_RSRV_MINBLKS))) + return; + /* Find bitmap block that contains bits for goal block */ if (rgrp_contains_block(rgd, ip->i_goal)) goal = ip->i_goal - rgd->rd_data0; else goal = rgd->rd_last_alloc; + for (buf = 0; buf < length; buf++) { bi = rgd->rd_bits + buf; /* Convert scope of "goal" from rgrp-wide to within @@ -1366,10 +1370,8 @@ do_search: BUG_ON(blk >= bi->bi_len * GFS2_NBBY); rs = rs_insert(bi, ip, blk, rsv_bytes * GFS2_NBBY); - if (IS_ERR(rs)) - return PTR_ERR(rs); if (rs) - return 0; + return; } ptr += ALIGN(search_bytes, sizeof(u64)); } @@ -1380,35 +1382,6 @@ skip: buf %= length; goal = 0; } - - return BFITNOENT; -} - -/** - * try_rgrp_fit - See if a given reservation will fit in a given RG - * @rgd: the RG data - * @ip: the inode - * - * If there's room for the requested blocks to be allocated from the RG: - * This will try to get a multi-block reservation first, and if that doesn't - * fit, it will take what it can. - * - * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) - */ - -static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, - unsigned requested) -{ - if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) - return 0; - /* Look for a multi-block reservation. */ - if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && - rg_mblk_search(rgd, ip, requested) != BFITNOENT) - return 1; - if (unclaimed_blocks(rgd) >= requested) - return 1; - - return 0; } /** @@ -1678,6 +1651,19 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip return; } +static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) +{ + struct gfs2_rgrpd *rgd = *pos; + + rgd = gfs2_rgrpd_get_next(rgd); + if (rgd == NULL) + rgd = gfs2_rgrpd_get_next(NULL); + *pos = rgd; + if (rgd != begin) /* If we didn't wrap */ + return true; + return false; +} + /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for @@ -1697,10 +1683,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - if (gfs2_assert_warn(sdp, requested)) { - error = -EINVAL; - goto out; - } + if (gfs2_assert_warn(sdp, requested)) + return -EINVAL; if (gfs2_rs_active(rs)) { begin = rs->rs_rbm.rgd; flags = 0; /* Yoda: Do or do not. There is no try */ @@ -1713,84 +1697,82 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) return -EBADSLT; while (loops < 3) { - rg_locked = 0; - - if (gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { - rg_locked = 1; - error = 0; - } else if (!loops && !gfs2_rs_active(rs) && - rs->rs_rbm.rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { - /* If the rgrp already is maxed out for contenders, - we can eliminate it as a "first pass" without even - requesting the rgrp glock. */ - error = GLR_TRYFAILED; - } else { + rg_locked = 1; + + if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { + rg_locked = 0; error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); - if (!error && sdp->sd_args.ar_rgrplvb) { + if (error == GLR_TRYFAILED) + goto next_rgrp; + if (unlikely(error)) + return error; + if (sdp->sd_args.ar_rgrplvb) { error = update_rgrp_lvb(rs->rs_rbm.rgd); - if (error) { + if (unlikely(error)) { gfs2_glock_dq_uninit(&rs->rs_rgd_gh); return error; } } } - switch (error) { - case 0: - if (gfs2_rs_active(rs)) { - if (unclaimed_blocks(rs->rs_rbm.rgd) + - rs->rs_free >= requested) { - ip->i_rgd = rs->rs_rbm.rgd; - return 0; - } - /* We have a multi-block reservation, but the - rgrp doesn't have enough free blocks to - satisfy the request. Free the reservation - and look for a suitable rgrp. */ - gfs2_rs_deltree(ip, rs); - } - if (try_rgrp_fit(rs->rs_rbm.rgd, ip, requested)) { - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rbm.rgd); - ip->i_rgd = rs->rs_rbm.rgd; - return 0; - } - if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) { - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rbm.rgd); - try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, - ip->i_no_addr); - } - if (!rg_locked) - gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - /* fall through */ - case GLR_TRYFAILED: - rs->rs_rbm.rgd = gfs2_rgrpd_get_next(rs->rs_rbm.rgd); - rs->rs_rbm.rgd = rs->rs_rbm.rgd ? : begin; /* if NULL, wrap */ - if (rs->rs_rbm.rgd != begin) /* If we didn't wrap */ - break; - flags &= ~LM_FLAG_TRY; - loops++; - /* Check that fs hasn't grown if writing to rindex */ - if (ip == GFS2_I(sdp->sd_rindex) && - !sdp->sd_rindex_uptodate) { - error = gfs2_ri_update(ip); - if (error) - goto out; - } else if (loops == 2) - /* Flushing the log may release space */ - gfs2_log_flush(sdp, NULL); - break; - default: - goto out; + /* Skip unuseable resource groups */ + if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) + goto skip_rgrp; + + if (sdp->sd_args.ar_rgrplvb) + gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + + /* Get a reservation if we don't already have one */ + if (!gfs2_rs_active(rs)) + rg_mblk_search(rs->rs_rbm.rgd, ip, requested); + + /* Skip rgrps when we can't get a reservation on first pass */ + if (!gfs2_rs_active(rs) && (loops < 1)) + goto check_rgrp; + + /* If rgrp has enough free space, use it */ + if (rs->rs_rbm.rgd->rd_free_clone >= requested) { + ip->i_rgd = rs->rs_rbm.rgd; + return 0; } + + /* Drop reservation, if we couldn't use reserved rgrp */ + if (gfs2_rs_active(rs)) + gfs2_rs_deltree(ip, rs); +check_rgrp: + /* Check for unlinked inodes which can be reclaimed */ + if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) + try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, + ip->i_no_addr); +skip_rgrp: + /* Unlock rgrp if required */ + if (!rg_locked) + gfs2_glock_dq_uninit(&rs->rs_rgd_gh); +next_rgrp: + /* Find the next rgrp, and continue looking */ + if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) + continue; + + /* If we've scanned all the rgrps, but found no free blocks + * then this checks for some less likely conditions before + * trying again. + */ + flags &= ~LM_FLAG_TRY; + loops++; + /* Check that fs hasn't grown if writing to rindex */ + if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { + error = gfs2_ri_update(ip); + if (error) + return error; + } + /* Flushing the log may release space */ + if (loops == 2) + gfs2_log_flush(sdp, NULL); } - error = -ENOSPC; -out: - return error; + return -ENOSPC; } /** @@ -2024,6 +2006,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_alloc_extent(&rbm, dinode, nblocks); block = gfs2_rbm_to_block(&rbm); + rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; if (gfs2_rs_active(ip->i_res)) gfs2_adjust_reservation(ip, &rbm, *nblocks); ndata = *nblocks; -- cgit v1.2.3 From 0688a5ecea61a36ba12d17a18ab9f8712145cfa2 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 28 Aug 2012 08:45:56 -0400 Subject: GFS2: Stop block extents at the end of bitmaps This patch stops multiple block allocations if a nonzero return code is received from gfs2_rbm_from_block. Without this patch, if enough pressure is put on the file system, you get a kernel warning quickly followed by: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] gfs2_alloc_blocks+0x2c8/0x880 [gfs2] With this patch, things run normally. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 886954126704..defb8265ce52 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1834,8 +1834,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); - WARN_ON(ret); - if (gfs2_testbit(&pos) != GFS2_BLKST_FREE) + if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); gfs2_setbit(&pos, true, GFS2_BLKST_USED); -- cgit v1.2.3 From 56aa72d0fcc9c4a3af4d0111d8d7f336b63adff9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 5 Sep 2012 16:55:11 -0400 Subject: GFS2: Get rid of I_MUTEX_QUOTA usage GFS2 uses i_mutex on its system quota inode to synchronize writes to quota file. Since this is an internal inode to GFS2 (not part of directory hiearchy or visible by user) we are safe to define locking rules for it. So let's just get it its own locking class to make it clear. Signed-off-by: Jan Kara Signed-off-by: J. Bruce Fields Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_fstype.c | 8 ++++++++ fs/gfs2/quota.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e5af9dc420ef..e443966c8106 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -766,6 +767,7 @@ fail: return error; } +static struct lock_class_key gfs2_quota_imutex_key; static int init_inodes(struct gfs2_sbd *sdp, int undo) { @@ -803,6 +805,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) fs_err(sdp, "can't get quota file inode: %d\n", error); goto fail_rindex; } + /* + * i_mutex on quota files is special. Since this inode is hidden system + * file, we are safe to define locking ourselves. + */ + lockdep_set_class(&sdp->sd_quota_inode->i_mutex, + &gfs2_quota_imutex_key); error = gfs2_rindex_update(sdp); if (error) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 420bc3805ccc..4021deca61ef 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -782,7 +782,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) return -ENOMEM; sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); - mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA); + mutex_lock(&ip->i_inode.i_mutex); for (qx = 0; qx < num_qd; qx++) { error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &ghs[qx]); -- cgit v1.2.3 From ff7f4cb461163967a9dbb8c569e2447b7520654f Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 10 Sep 2012 10:03:50 +0100 Subject: GFS2: Consolidate free block searching functions With the recently added block reservation code, an additional function was added to search for free blocks. This had a restriction of only being able to search for aligned extents of free blocks. As a result the allocation patterns when reserving blocks were suboptimal when the existing allocation of blocks for an inode was not aligned to the same boundary. This patch resolves that problem by adding the ability for gfs2_rbm_find to search for extents of a particular minimum size. We can then use gfs2_rbm_find for both looking for reservations, and also looking for free blocks on an individual basis when we actually come to do the allocation later on. As a result we only need a single set of code to deal with both situations. The function gfs2_rbm_from_block() is moved up rgrp.c so that it occurs before all of its callers. Many thanks are due to Bob for helping track down the final issue in this patch. That fix to the rb_tree traversal and to not share block reservations from a dirctory to its children is included here. Signed-off-by: Steven Whitehouse Signed-off-by: Bob Peterson --- fs/gfs2/incore.h | 1 - fs/gfs2/inode.c | 11 +- fs/gfs2/rgrp.c | 367 +++++++++++++++++++++++++++++-------------------------- fs/gfs2/rgrp.h | 6 - 4 files changed, 195 insertions(+), 190 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 6aaa07c7c731..3d469d37345e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -99,7 +99,6 @@ struct gfs2_rgrpd { #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ spinlock_t rd_rsspin; /* protects reservation related vars */ struct rb_root rd_rstree; /* multi-block reservation tree */ - u32 rd_rs_cnt; /* count of current reservations */ }; struct gfs2_rbm { diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index f2709ea887da..381893ceefa4 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -712,14 +712,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; - /* The newly created inode needs a reservation so it can allocate - xattrs. At the same time, we want new blocks allocated to the new - dinode to be as contiguous as possible. Since we allocated the - dinode block under the directory's reservation, we transfer - ownership of that reservation to the new inode. The directory - doesn't need a reservation unless it needs a new allocation. */ - ip->i_res = dip->i_res; - dip->i_res = NULL; + error = gfs2_rs_alloc(ip); + if (error) + goto fail_gunlock2; error = gfs2_acl_create(dip, inode); if (error) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index defb8265ce52..b933cdcda7f4 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -35,9 +35,6 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) -#define RSRV_CONTENTION_FACTOR 4 -#define RGRP_RSRV_MAX_CONTENDERS 2 - #if BITS_PER_LONG == 32 #define LBITMASK (0x55555555UL) #define LBITSKIP55 (0x55555555UL) @@ -67,6 +64,10 @@ static const char valid_change[16] = { 1, 0, 0, 0 }; +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, + const struct gfs2_inode *ip, bool nowrap); + + /** * gfs2_setbit - Set a bit in the bitmaps * @rbm: The position of the bit to set @@ -234,6 +235,130 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; } +/** + * gfs2_rbm_from_block - Set the rbm based upon rgd and block number + * @rbm: The rbm with rgd already set correctly + * @block: The block number (filesystem relative) + * + * This sets the bi and offset members of an rbm based on a + * resource group and a filesystem relative block number. The + * resource group must be set in the rbm on entry, the bi and + * offset members will be set by this function. + * + * Returns: 0 on success, or an error code + */ + +static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) +{ + u64 rblock = block - rbm->rgd->rd_data0; + u32 goal = (u32)rblock; + int x; + + if (WARN_ON_ONCE(rblock > UINT_MAX)) + return -EINVAL; + if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) + return -E2BIG; + + for (x = 0; x < rbm->rgd->rd_length; x++) { + rbm->bi = rbm->rgd->rd_bits + x; + if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { + rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); + break; + } + } + + return 0; +} + +/** + * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned + * @rbm: Position to search (value/result) + * @n_unaligned: Number of unaligned blocks to check + * @len: Decremented for each block found (terminate on zero) + * + * Returns: true if a non-free block is encountered + */ + +static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) +{ + u64 block; + u32 n; + u8 res; + + for (n = 0; n < n_unaligned; n++) { + res = gfs2_testbit(rbm); + if (res != GFS2_BLKST_FREE) + return true; + (*len)--; + if (*len == 0) + return true; + block = gfs2_rbm_to_block(rbm); + if (gfs2_rbm_from_block(rbm, block + 1)) + return true; + } + + return false; +} + +/** + * gfs2_free_extlen - Return extent length of free blocks + * @rbm: Starting position + * @len: Max length to check + * + * Starting at the block specified by the rbm, see how many free blocks + * there are, not reading more than len blocks ahead. This can be done + * using memchr_inv when the blocks are byte aligned, but has to be done + * on a block by block basis in case of unaligned blocks. Also this + * function can cope with bitmap boundaries (although it must stop on + * a resource group boundary) + * + * Returns: Number of free blocks in the extent + */ + +static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) +{ + struct gfs2_rbm rbm = *rrbm; + u32 n_unaligned = rbm.offset & 3; + u32 size = len; + u32 bytes; + u32 chunk_size; + u8 *ptr, *start, *end; + u64 block; + + if (n_unaligned && + gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) + goto out; + + /* Start is now byte aligned */ + while (len > 3) { + start = rbm.bi->bi_bh->b_data; + if (rbm.bi->bi_clone) + start = rbm.bi->bi_clone; + end = start + rbm.bi->bi_bh->b_size; + start += rbm.bi->bi_offset; + BUG_ON(rbm.offset & 3); + start += (rbm.offset / GFS2_NBBY); + bytes = min_t(u32, len / GFS2_NBBY, (end - start)); + ptr = memchr_inv(start, 0, bytes); + chunk_size = ((ptr == NULL) ? bytes : (ptr - start)); + chunk_size *= GFS2_NBBY; + BUG_ON(len < chunk_size); + len -= chunk_size; + block = gfs2_rbm_to_block(&rbm); + gfs2_rbm_from_block(&rbm, block + chunk_size); + n_unaligned = 3; + if (ptr) + break; + n_unaligned = len & 3; + } + + /* Deal with any bits left over at the end */ + if (n_unaligned) + gfs2_unaligned_extlen(&rbm, n_unaligned, &len); +out: + return size - len; +} + /** * gfs2_bitcount - count the number of bits in a certain state * @rgd: the resource group descriptor @@ -472,8 +597,6 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) trace_gfs2_rs(rs, TRACE_RS_TREEDEL); rb_erase(&rs->rs_node, &rgd->rd_rstree); RB_CLEAR_NODE(&rs->rs_node); - BUG_ON(!rgd->rd_rs_cnt); - rgd->rd_rs_cnt--; if (rs->rs_free) { /* return reserved blocks to the rgrp and the ip */ @@ -1208,179 +1331,85 @@ out: /** * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree - * @bi: the bitmap with the blocks * @ip: the inode structure - * @biblk: the 32-bit block number relative to the start of the bitmap - * @amount: the number of blocks to reserve * - * Returns: NULL - reservation was already taken, so not inserted - * pointer to the inserted reservation */ -static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, - struct gfs2_inode *ip, u32 biblk, - int amount) +static void rs_insert(struct gfs2_inode *ip) { struct rb_node **newn, *parent = NULL; int rc; struct gfs2_blkreserv *rs = ip->i_res; struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; - u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; + u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); - spin_lock(&rgd->rd_rsspin); - newn = &rgd->rd_rstree.rb_node; - BUG_ON(!ip->i_res); BUG_ON(gfs2_rs_active(rs)); - /* Figure out where to put new node */ + spin_lock(&rgd->rd_rsspin); + newn = &rgd->rd_rstree.rb_node; while (*newn) { struct gfs2_blkreserv *cur = rb_entry(*newn, struct gfs2_blkreserv, rs_node); parent = *newn; - rc = rs_cmp(fsblock, amount, cur); + rc = rs_cmp(fsblock, rs->rs_free, cur); if (rc > 0) newn = &((*newn)->rb_right); else if (rc < 0) newn = &((*newn)->rb_left); else { spin_unlock(&rgd->rd_rsspin); - return NULL; /* reservation already in use */ + WARN_ON(1); + return; } } - /* Do our reservation work */ - rs = ip->i_res; - rs->rs_free = amount; - rs->rs_rbm.offset = biblk; - rs->rs_rbm.bi = bi; - rs->rs_inum = ip->i_no_addr; rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); /* Do our rgrp accounting for the reservation */ - rgd->rd_reserved += amount; /* blocks reserved */ - rgd->rd_rs_cnt++; /* number of in-tree reservations */ + rgd->rd_reserved += rs->rs_free; /* blocks reserved */ spin_unlock(&rgd->rd_rsspin); trace_gfs2_rs(rs, TRACE_RS_INSERT); - return rs; } /** - * unclaimed_blocks - return number of blocks that aren't spoken for - */ -static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) -{ - return rgd->rd_free_clone - rgd->rd_reserved; -} - -/** - * rg_mblk_search - find a group of multiple free blocks + * rg_mblk_search - find a group of multiple free blocks to form a reservation * @rgd: the resource group descriptor * @ip: pointer to the inode for which we're reserving blocks * @requested: number of blocks required for this allocation * - * This is very similar to rgblk_search, except we're looking for whole - * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing - * on aligned dwords for speed's sake. - * */ -static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested) +static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, + unsigned requested) { - struct gfs2_bitmap *bi = rgd->rd_bits; - const u32 length = rgd->rd_length; - u32 blk; - unsigned int buf, x, search_bytes; - u8 *buffer = NULL; - u8 *ptr, *end, *nonzero; - u32 goal, rsv_bytes; - struct gfs2_blkreserv *rs; - u32 best_rs_bytes, unclaimed; - int best_rs_blocks; + struct gfs2_rbm rbm = { .rgd = rgd, }; + u64 goal; + struct gfs2_blkreserv *rs = ip->i_res; + u32 extlen; + u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; + int ret; - if ((rgd->rd_free_clone < rgd->rd_reserved) || - (unclaimed_blocks(rgd) < max(requested, RGRP_RSRV_MINBLKS))) + extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); + extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); + if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) return; /* Find bitmap block that contains bits for goal block */ if (rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; + goal = ip->i_goal; else - goal = rgd->rd_last_alloc; + goal = rgd->rd_last_alloc + rgd->rd_data0; - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - /* Convert scope of "goal" from rgrp-wide to within - found bit block */ - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { - goal -= bi->bi_start * GFS2_NBBY; - goto do_search; - } - } - buf = 0; - goal = 0; - -do_search: - best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint), - (RGRP_RSRV_MINBLKS * rgd->rd_length)); - best_rs_bytes = (best_rs_blocks * - (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) / - GFS2_NBBY; /* 1 + is for our not-yet-created reservation */ - best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64)); - unclaimed = unclaimed_blocks(rgd); - if (best_rs_bytes * GFS2_NBBY > unclaimed) - best_rs_bytes = unclaimed >> GFS2_BIT_SIZE; - - for (x = 0; x <= length; x++) { - bi = rgd->rd_bits + buf; - - if (test_bit(GBF_FULL, &bi->bi_flags)) - goto skip; + if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) + return; - WARN_ON(!buffer_uptodate(bi->bi_bh)); - if (bi->bi_clone) - buffer = bi->bi_clone + bi->bi_offset; - else - buffer = bi->bi_bh->b_data + bi->bi_offset; - - /* We have to keep the reservations aligned on u64 boundaries - otherwise we could get situations where a byte can't be - used because it's after a reservation, but a free bit still - is within the reservation's area. */ - ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); - end = (buffer + bi->bi_len); - while (ptr < end) { - rsv_bytes = 0; - if ((ptr + best_rs_bytes) <= end) - search_bytes = best_rs_bytes; - else - search_bytes = end - ptr; - BUG_ON(!search_bytes); - nonzero = memchr_inv(ptr, 0, search_bytes); - /* If the lot is all zeroes, reserve the whole size. If - there's enough zeroes to satisfy the request, use - what we can. If there's not enough, keep looking. */ - if (nonzero == NULL) - rsv_bytes = search_bytes; - else if ((nonzero - ptr) * GFS2_NBBY >= requested) - rsv_bytes = (nonzero - ptr); - - if (rsv_bytes) { - blk = ((ptr - buffer) * GFS2_NBBY); - BUG_ON(blk >= bi->bi_len * GFS2_NBBY); - rs = rs_insert(bi, ip, blk, - rsv_bytes * GFS2_NBBY); - if (rs) - return; - } - ptr += ALIGN(search_bytes, sizeof(u64)); - } -skip: - /* Try next bitmap block (wrap back to rgrp header - if at end) */ - buf++; - buf %= length; - goal = 0; + ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); + if (ret == 0) { + rs->rs_rbm = rbm; + rs->rs_free = extlen; + rs->rs_inum = ip->i_no_addr; + rs_insert(ip); } } @@ -1388,6 +1417,7 @@ skip: * gfs2_next_unreserved_block - Return next block that is not reserved * @rgd: The resource group * @block: The starting block + * @length: The required length * @ip: Ignore any reservations for this inode * * If the block does not appear in any reservation, then return the @@ -1397,6 +1427,7 @@ skip: */ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, + u32 length, const struct gfs2_inode *ip) { struct gfs2_blkreserv *rs; @@ -1404,10 +1435,10 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, int rc; spin_lock(&rgd->rd_rsspin); - n = rb_first(&rgd->rd_rstree); + n = rgd->rd_rstree.rb_node; while (n) { rs = rb_entry(n, struct gfs2_blkreserv, rs_node); - rc = rs_cmp(block, 1, rs); + rc = rs_cmp(block, length, rs); if (rc < 0) n = n->rb_left; else if (rc > 0) @@ -1417,9 +1448,9 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, } if (n) { - while ((rs_cmp(block, 1, rs) == 0) && (ip->i_res != rs)) { + while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) { block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; - n = rb_next(&rs->rs_node); + n = n->rb_right; if (n == NULL) break; rs = rb_entry(n, struct gfs2_blkreserv, rs_node); @@ -1430,44 +1461,11 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, return block; } -/** - * gfs2_rbm_from_block - Set the rbm based upon rgd and block number - * @rbm: The rbm with rgd already set correctly - * @block: The block number (filesystem relative) - * - * This sets the bi and offset members of an rbm based on a - * resource group and a filesystem relative block number. The - * resource group must be set in the rbm on entry, the bi and - * offset members will be set by this function. - * - * Returns: 0 on success, or an error code - */ - -static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) -{ - u64 rblock = block - rbm->rgd->rd_data0; - u32 goal = (u32)rblock; - int x; - - if (WARN_ON_ONCE(rblock > UINT_MAX)) - return -EINVAL; - if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) - return -E2BIG; - - for (x = 0; x < rbm->rgd->rd_length; x++) { - rbm->bi = rbm->rgd->rd_bits + x; - if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { - rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); - break; - } - } - - return 0; -} - /** * gfs2_reservation_check_and_update - Check for reservations during block alloc * @rbm: The current position in the resource group + * @ip: The inode for which we are searching for blocks + * @minext: The minimum extent length * * This checks the current position in the rgrp to see whether there is * a reservation covering this block. If not then this function is a @@ -1479,15 +1477,33 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) */ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, - const struct gfs2_inode *ip) + const struct gfs2_inode *ip, + u32 minext) { u64 block = gfs2_rbm_to_block(rbm); + u32 extlen = 1; u64 nblock; int ret; - nblock = gfs2_next_unreserved_block(rbm->rgd, block, ip); + /* + * If we have a minimum extent length, then skip over any extent + * which is less than the min extent length in size. + */ + if (minext) { + extlen = gfs2_free_extlen(rbm, minext); + nblock = block + extlen; + if (extlen < minext) + goto fail; + } + + /* + * Check the extent which has been found against the reservations + * and skip if parts of it are already reserved + */ + nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); if (nblock == block) return 0; +fail: ret = gfs2_rbm_from_block(rbm, nblock); if (ret < 0) return ret; @@ -1498,6 +1514,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, * gfs2_rbm_find - Look for blocks of a particular state * @rbm: Value/result starting position and final position * @state: The state which we want to find + * @minext: The requested extent length (0 for a single block) * @ip: If set, check for reservations * @nowrap: Stop looking at the end of the rgrp, rather than wrapping * around until we've reached the starting point. @@ -1509,7 +1526,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, * Returns: 0 on success, -ENOSPC if there is no block of the requested state */ -static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, const struct gfs2_inode *ip, bool nowrap) { struct buffer_head *bh; @@ -1548,7 +1565,7 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, return 0; initial_bi = rbm->bi; - ret = gfs2_reservation_check_and_update(rbm, ip); + ret = gfs2_reservation_check_and_update(rbm, ip, minext); if (ret == 0) return 0; if (ret > 0) { @@ -1608,7 +1625,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip while (1) { down_write(&sdp->sd_log_flush_lock); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, true); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); up_write(&sdp->sd_log_flush_lock); if (error == -ENOSPC) break; @@ -1988,11 +2005,11 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; gfs2_rbm_from_block(&rbm, goal); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); if (error == -ENOSPC) { gfs2_rbm_from_block(&rbm, goal); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, false); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); } /* Since all blocks are reserved in advance, this shouldn't happen */ diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index c98f6af07e1c..24077958dcf6 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -79,10 +79,4 @@ static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs) return rs && !RB_EMPTY_NODE(&rs->rs_node); } - -static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk) -{ - return (bi->bi_start * GFS2_NBBY) + blk; -} - #endif /* __RGRP_DOT_H__ */ -- cgit v1.2.3 From 3701530aed9711fcfe9b29d3de0f7a27c8dbc8ae Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 12 Sep 2012 09:40:31 -0400 Subject: GFS2: Fix infinite loop in rbm_find This patch fixes an infinite loop in gfs2_rbm_find that was introduced by the previous patch. The problem occurred when the length was less than 3 but the rbm block was byte-aligned, causing it to improperly return a extent length of zero, which caused it to spin. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse Tested-by: Bob Peterson Tested-by: Barry Marson --- fs/gfs2/rgrp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b933cdcda7f4..3cc402ce6fea 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -329,6 +329,7 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) goto out; + n_unaligned = len & 3; /* Start is now byte aligned */ while (len > 3) { start = rbm.bi->bi_bh->b_data; -- cgit v1.2.3 From a0b4df294387c777c3091a43e9970572e10261b1 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 17 Sep 2012 21:50:31 -0500 Subject: GFS2: fix s_writers.counter imbalance in gfs2_ail_empty_gl gfs2_ail_empty_gl() contains an "inline version" of gfs2_trans_begin(), so it needs an explicit sb_start_intwrite() as well, to balance the sb_end_intwrite() which will be called by gfs2_trans_end(). With this, xfstest 068 passes on lock_nolock local gfs2. Without it, we reach a writer count of -1 and get stuck. Signed-off-by: Eric Sandeen Signed-off-by: Steven Whitehouse --- fs/gfs2/glops.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 4bdcf3784187..32cc4fde975c 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -94,6 +94,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) /* A shortened, inline version of gfs2_trans_begin() */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); tr.tr_ip = (unsigned long)__builtin_return_address(0); + sb_start_intwrite(sdp->sd_vfs); gfs2_log_reserve(sdp, tr.tr_reserved); BUG_ON(current->journal_info); current->journal_info = &tr; -- cgit v1.2.3 From 2216db70c95a9610f5279b8da53bec614d98270f Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Thu, 20 Sep 2012 09:52:58 -0500 Subject: GFS2: Write out dirty inode metadata in delayed deletes If a dirty GFS2 inode was being deleted but was in use by another node, its metadata was not getting written out before GFS2 checked for dirty buffers in gfs2_ail_flush(). GFS2 was relying on inode_go_sync() to write out the metadata when the other node tried to free the file, but it failed the error check before it got that far. This patch writes out the metadata before calling gfs2_ail_flush() Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/super.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 79cac7057691..a8d90f2f576c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1545,6 +1545,11 @@ static void gfs2_evict_inode(struct inode *inode) out_truncate: gfs2_log_flush(sdp, ip->i_gl); + if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { + struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); + filemap_fdatawrite(metamapping); + filemap_fdatawait(metamapping); + } write_inode_now(inode, 1); gfs2_ail_flush(ip->i_gl, 0); -- cgit v1.2.3 From ed6875e0d6c28e4a6b44da04d6d4363b3d92d630 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:25 -0700 Subject: CIFS: Move unlink code to ops struct Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 6 ++++++ fs/cifs/cifsproto.h | 9 ++++++--- fs/cifs/cifssmb.c | 16 ++++++++-------- fs/cifs/inode.c | 33 ++++++++++++++++++++++----------- fs/cifs/smb1ops.c | 2 ++ 5 files changed, 44 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 977dc0e85ccb..843356fa262d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -256,6 +256,12 @@ struct smb_version_operations { /* remove directory */ int (*rmdir)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *); + /* unlink file */ + int (*unlink)(const unsigned int, struct cifs_tcon *, const char *, + struct cifs_sb_info *); + /* open, rename and delete file */ + int (*rename_pending_delete)(const char *, struct dentry *, + const unsigned int); }; struct smb_version_values { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f1bbf8305d3a..79c088469757 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -144,6 +144,11 @@ extern int cifs_get_file_info_unix(struct file *filp); extern int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *search_path, struct super_block *sb, unsigned int xid); +extern int cifs_set_file_info(struct inode *inode, struct iattr *attrs, + unsigned int xid, char *full_path, __u32 dosattr); +extern int cifs_rename_pending_delete(const char *full_path, + struct dentry *dentry, + const unsigned int xid); extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, struct inode *inode, const char *path, const __u16 *pfid); @@ -303,9 +308,7 @@ extern int CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nls_codepage, int remap_special_chars); extern int CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, - const char *name, - const struct nls_table *nls_codepage, - int remap_special_chars); + const char *name, struct cifs_sb_info *cifs_sb); extern int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, const struct nls_table *nls_codepage, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index f0cf934ba877..2dddf01d2b66 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -902,15 +902,15 @@ PsxDelete: } int -CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, - const char *fileName, const struct nls_table *nls_codepage, - int remap) +CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *cifs_sb) { DELETE_FILE_REQ *pSMB = NULL; DELETE_FILE_RSP *pSMBr = NULL; int rc = 0; int bytes_returned; int name_len; + int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; DelFileRetry: rc = smb_init(SMB_COM_DELETE, 1, tcon, (void **) &pSMB, @@ -919,15 +919,15 @@ DelFileRetry: return rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = - cifsConvertToUTF16((__le16 *) pSMB->fileName, fileName, - PATH_MAX, nls_codepage, remap); + name_len = cifsConvertToUTF16((__le16 *) pSMB->fileName, name, + PATH_MAX, cifs_sb->local_nls, + remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve check for buffer overruns BB */ - name_len = strnlen(fileName, PATH_MAX); + name_len = strnlen(name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->fileName, fileName, name_len); + strncpy(pSMB->fileName, name, name_len); } pSMB->SearchAttributes = cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index cb79c7edecb0..bb39ea475a20 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -876,9 +876,9 @@ out: return inode; } -static int +int cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, - char *full_path, __u32 dosattr) + char *full_path, __u32 dosattr) { int rc; int oplock = 0; @@ -993,13 +993,13 @@ out: } /* - * open the given file (if it isn't already), set the DELETE_ON_CLOSE bit + * Open the given file (if it isn't already), set the DELETE_ON_CLOSE bit * and rename it to a random name that hopefully won't conflict with * anything else. */ -static int -cifs_rename_pending_delete(char *full_path, struct dentry *dentry, - unsigned int xid) +int +cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, + const unsigned int xid) { int oplock = 0; int rc; @@ -1136,6 +1136,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct tcon_link *tlink; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; struct iattr *attrs = NULL; __u32 dosattr = 0, origattr = 0; @@ -1145,6 +1146,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) if (IS_ERR(tlink)) return PTR_ERR(tlink); tcon = tlink_tcon(tlink); + server = tcon->ses->server; xid = get_xid(); @@ -1167,8 +1169,12 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) } retry_std_delete: - rc = CIFSSMBDelFile(xid, tcon, full_path, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (!server->ops->unlink) { + rc = -ENOSYS; + goto psx_del_no_retry; + } + + rc = server->ops->unlink(xid, tcon, full_path, cifs_sb); psx_del_no_retry: if (!rc) { @@ -1177,9 +1183,14 @@ psx_del_no_retry: } else if (rc == -ENOENT) { d_drop(dentry); } else if (rc == -ETXTBSY) { - rc = cifs_rename_pending_delete(full_path, dentry, xid); - if (rc == 0) - cifs_drop_nlink(inode); + if (server->ops->rename_pending_delete) { + rc = server->ops->rename_pending_delete(full_path, + dentry, xid); + if (rc == 0) + cifs_drop_nlink(inode); + } + if (rc == -ETXTBSY) + rc = -EBUSY; } else if ((rc == -EACCES) && (dosattr == 0) && inode) { attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); if (attrs == NULL) { diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 3129ac74b819..725fa6195867 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -644,6 +644,8 @@ struct smb_version_operations smb1_operations = { .mkdir = CIFSSMBMkDir, .mkdir_setinfo = cifs_mkdir_setinfo, .rmdir = CIFSSMBRmDir, + .unlink = CIFSSMBDelFile, + .rename_pending_delete = cifs_rename_pending_delete, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From cbe6f439f5762c7fb4d2dd9293f5fdbfc4cd68f8 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:25 -0700 Subject: CIFS: Add SMB2 support for unlink Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2inode.c | 9 +++++++++ fs/cifs/smb2ops.c | 1 + fs/cifs/smb2proto.h | 2 ++ 3 files changed, 12 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 2aa5cb08c526..02a9bda4248c 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -161,3 +161,12 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, 0, CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE, NULL, SMB2_OP_DELETE); } + +int +smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *cifs_sb) +{ + return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, + 0, CREATE_DELETE_ON_CLOSE, NULL, + SMB2_OP_DELETE); +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 826209bf3684..bf9b318cfa4b 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -321,6 +321,7 @@ struct smb_version_operations smb21_operations = { .mkdir = smb2_mkdir, .mkdir_setinfo = smb2_mkdir_setinfo, .rmdir = smb2_rmdir, + .unlink = smb2_unlink, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index bfaa7b148afd..f4ac72799f70 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -59,6 +59,8 @@ extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, struct cifs_tcon *tcon, const unsigned int xid); extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); +extern int smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, + const char *name, struct cifs_sb_info *cifs_sb); /* * SMB2 Worker functions - most of protocol specific implementation details -- cgit v1.2.3 From 4b4de76e35518fc0c636f628abca8c1b19ad6689 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:26 -0700 Subject: CIFS: Replace netfid with cifs_fid struct in cifsFileInfo This is help us to extend the code for future protocols that can use another fid mechanism (as SMB2 that has it divided into two parts: persistent and violatile). Also rename variables and refactor the code around the changes. Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 2 +- fs/cifs/cifsglob.h | 6 ++- fs/cifs/cifssmb.c | 4 +- fs/cifs/file.c | 116 ++++++++++++++++++++++++++--------------------------- fs/cifs/inode.c | 10 ++--- fs/cifs/ioctl.c | 13 ++++-- fs/cifs/misc.c | 2 +- fs/cifs/readdir.c | 10 ++--- fs/cifs/smb1ops.c | 2 +- 9 files changed, 87 insertions(+), 78 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 05f4dc263a23..2ee5c54797fa 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1222,7 +1222,7 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, if (!open_file) return get_cifs_acl_by_path(cifs_sb, path, pacllen); - pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen); + pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->fid.netfid, pacllen); cifsFileInfo_put(open_file); return pntsd; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 843356fa262d..a2a3865dee1b 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -746,6 +746,10 @@ struct cifs_search_info { bool smallBuf:1; /* so we know which buf_release function to call */ }; +struct cifs_fid { + __u16 netfid; +}; + struct cifsFileInfo { struct list_head tlist; /* pointer to next fid owned by tcon */ struct list_head flist; /* next fid (file instance) for this inode */ @@ -755,7 +759,7 @@ struct cifsFileInfo { */ unsigned int uid; /* allows finding which FileInfo structure */ __u32 pid; /* process id who opened file */ - __u16 netfid; /* file id from remote */ + struct cifs_fid fid; /* file id from remote */ /* BB add lock scope info here if needed */ ; /* lock scope id (0 if none) */ struct dentry *dentry; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 2dddf01d2b66..4c48b9c60b26 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1632,7 +1632,7 @@ cifs_async_readv(struct cifs_readdata *rdata) smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16)); smb->AndXCommand = 0xFF; /* none */ - smb->Fid = rdata->cfile->netfid; + smb->Fid = rdata->cfile->fid.netfid; smb->OffsetLow = cpu_to_le32(rdata->offset & 0xFFFFFFFF); if (wct == 12) smb->OffsetHigh = cpu_to_le32(rdata->offset >> 32); @@ -2084,7 +2084,7 @@ cifs_async_writev(struct cifs_writedata *wdata) smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16)); smb->AndXCommand = 0xFF; /* none */ - smb->Fid = wdata->cfile->netfid; + smb->Fid = wdata->cfile->fid.netfid; smb->OffsetLow = cpu_to_le32(wdata->offset & 0xFFFFFFFF); if (wct == 14) smb->OffsetHigh = cpu_to_le32(wdata->offset >> 32); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 71e9ad9f5961..712f2a4d0d49 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -247,39 +247,39 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file, { struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - struct cifsInodeInfo *pCifsInode = CIFS_I(inode); - struct cifsFileInfo *pCifsFile; - - pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); - if (pCifsFile == NULL) - return pCifsFile; - - pCifsFile->count = 1; - pCifsFile->netfid = fileHandle; - pCifsFile->pid = current->tgid; - pCifsFile->uid = current_fsuid(); - pCifsFile->dentry = dget(dentry); - pCifsFile->f_flags = file->f_flags; - pCifsFile->invalidHandle = false; - pCifsFile->tlink = cifs_get_tlink(tlink); - mutex_init(&pCifsFile->fh_mutex); - INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); - INIT_LIST_HEAD(&pCifsFile->llist); + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifsFileInfo *cfile; + + cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); + if (cfile == NULL) + return cfile; + + cfile->count = 1; + cfile->fid.netfid = fileHandle; + cfile->pid = current->tgid; + cfile->uid = current_fsuid(); + cfile->dentry = dget(dentry); + cfile->f_flags = file->f_flags; + cfile->invalidHandle = false; + cfile->tlink = cifs_get_tlink(tlink); + mutex_init(&cfile->fh_mutex); + INIT_WORK(&cfile->oplock_break, cifs_oplock_break); + INIT_LIST_HEAD(&cfile->llist); spin_lock(&cifs_file_list_lock); - list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList)); + list_add(&cfile->tlist, &(tlink_tcon(tlink)->openFileList)); /* if readable file instance put first in list*/ if (file->f_mode & FMODE_READ) - list_add(&pCifsFile->flist, &pCifsInode->openFileList); + list_add(&cfile->flist, &cinode->openFileList); else - list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); + list_add_tail(&cfile->flist, &cinode->openFileList); spin_unlock(&cifs_file_list_lock); - cifs_set_oplock_level(pCifsInode, oplock); - pCifsInode->can_cache_brlcks = pCifsInode->clientCanCacheAll; + cifs_set_oplock_level(cinode, oplock); + cinode->can_cache_brlcks = cinode->clientCanCacheAll; - file->private_data = pCifsFile; - return pCifsFile; + file->private_data = cfile; + return cfile; } static void cifs_del_lock_waiters(struct cifsLockInfo *lock); @@ -336,7 +336,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) unsigned int xid; int rc; xid = get_xid(); - rc = CIFSSMBClose(xid, tcon, cifs_file->netfid); + rc = CIFSSMBClose(xid, tcon, cifs_file->fid.netfid); free_xid(xid); } @@ -561,7 +561,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) } reopen_success: - pCifsFile->netfid = netfid; + pCifsFile->fid.netfid = netfid; pCifsFile->invalidHandle = false; mutex_unlock(&pCifsFile->fh_mutex); pCifsInode = CIFS_I(inode); @@ -609,39 +609,37 @@ int cifs_closedir(struct inode *inode, struct file *file) { int rc = 0; unsigned int xid; - struct cifsFileInfo *pCFileStruct = file->private_data; - char *ptmp; + struct cifsFileInfo *cfile = file->private_data; + char *tmp; cFYI(1, "Closedir inode = 0x%p", inode); xid = get_xid(); - if (pCFileStruct) { - struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink); + if (cfile) { + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); cFYI(1, "Freeing private data in close dir"); spin_lock(&cifs_file_list_lock); - if (!pCFileStruct->srch_inf.endOfSearch && - !pCFileStruct->invalidHandle) { - pCFileStruct->invalidHandle = true; + if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { + cfile->invalidHandle = true; spin_unlock(&cifs_file_list_lock); - rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid); - cFYI(1, "Closing uncompleted readdir with rc %d", - rc); + rc = CIFSFindClose(xid, tcon, cfile->fid.netfid); + cFYI(1, "Closing uncompleted readdir with rc %d", rc); /* not much we can do if it fails anyway, ignore rc */ rc = 0; } else spin_unlock(&cifs_file_list_lock); - ptmp = pCFileStruct->srch_inf.ntwrk_buf_start; - if (ptmp) { + tmp = cfile->srch_inf.ntwrk_buf_start; + if (tmp) { cFYI(1, "closedir free smb buf in srch struct"); - pCFileStruct->srch_inf.ntwrk_buf_start = NULL; - if (pCFileStruct->srch_inf.smallBuf) - cifs_small_buf_release(ptmp); + cfile->srch_inf.ntwrk_buf_start = NULL; + if (cfile->srch_inf.smallBuf) + cifs_small_buf_release(tmp); else - cifs_buf_release(ptmp); + cifs_buf_release(tmp); } - cifs_put_tlink(pCFileStruct->tlink); + cifs_put_tlink(cfile->tlink); kfree(file->private_data); file->private_data = NULL; } @@ -932,7 +930,8 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) cur->OffsetLow = cpu_to_le32((u32)li->offset); cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); if (++num == max_num) { - stored_rc = cifs_lockv(xid, tcon, cfile->netfid, + stored_rc = cifs_lockv(xid, tcon, + cfile->fid.netfid, (__u8)li->type, 0, num, buf); if (stored_rc) @@ -944,7 +943,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) } if (num) { - stored_rc = cifs_lockv(xid, tcon, cfile->netfid, + stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, (__u8)types[i], 0, num, buf); if (stored_rc) rc = stored_rc; @@ -1038,7 +1037,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) type = CIFS_WRLCK; lck = list_entry(el, struct lock_to_push, llist); lck->pid = flock->fl_pid; - lck->netfid = cfile->netfid; + lck->netfid = cfile->fid.netfid; lck->length = length; lck->type = type; lck->offset = flock->fl_start; @@ -1137,7 +1136,7 @@ static int cifs_mandatory_lock(unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, __u64 length, __u32 type, int lock, int unlock, bool wait) { - return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->netfid, + return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->fid.netfid, current->tgid, length, offset, unlock, lock, (__u8)type, wait, 0); } @@ -1151,7 +1150,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; - __u16 netfid = cfile->netfid; + __u16 netfid = cfile->fid.netfid; if (posix_lck) { int posix_lock_type; @@ -1295,7 +1294,8 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, */ list_move(&li->llist, &tmp_llist); if (++num == max_num) { - stored_rc = cifs_lockv(xid, tcon, cfile->netfid, + stored_rc = cifs_lockv(xid, tcon, + cfile->fid.netfid, li->type, num, 0, buf); if (stored_rc) { /* @@ -1318,7 +1318,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, cur++; } if (num) { - stored_rc = cifs_lockv(xid, tcon, cfile->netfid, + stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, types[i], num, 0, buf); if (stored_rc) { cifs_move_llist(&tmp_llist, &cfile->llist); @@ -1343,7 +1343,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; - __u16 netfid = cfile->netfid; + __u16 netfid = cfile->fid.netfid; if (posix_lck) { int posix_lock_type; @@ -1423,7 +1423,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) tcon->ses->server); cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - netfid = cfile->netfid; + netfid = cfile->fid.netfid; cinode = CIFS_I(file->f_path.dentry->d_inode); if (cap_unix(tcon->ses) && @@ -1514,7 +1514,7 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, /* iov[0] is reserved for smb header */ iov[1].iov_base = (char *)write_data + total_written; iov[1].iov_len = len; - io_parms.netfid = open_file->netfid; + io_parms.netfid = open_file->fid.netfid; io_parms.pid = pid; io_parms.tcon = pTcon; io_parms.offset = *poffset; @@ -2078,7 +2078,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, tcon = tlink_tcon(smbfile->tlink); if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) - rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); + rc = CIFSSMBFlush(xid, tcon, smbfile->fid.netfid); free_xid(xid); mutex_unlock(&inode->i_mutex); @@ -2106,7 +2106,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) tcon = tlink_tcon(smbfile->tlink); if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) - rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); + rc = CIFSSMBFlush(xid, tcon, smbfile->fid.netfid); free_xid(xid); mutex_unlock(&inode->i_mutex); @@ -2802,7 +2802,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, if (rc != 0) break; } - io_parms.netfid = open_file->netfid; + io_parms.netfid = open_file->fid.netfid; io_parms.pid = pid; io_parms.tcon = tcon; io_parms.offset = *poffset; @@ -3374,7 +3374,7 @@ void cifs_oplock_break(struct work_struct *work) * disconnected since oplock already released by the server */ if (!cfile->oplock_break_cancelled) { - rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid, + rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->fid.netfid, current->tgid, 0, 0, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, false, cinode->clientCanCacheRead ? 1 : 0); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index bb39ea475a20..ea7428a82a31 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -294,7 +294,7 @@ int cifs_get_file_info_unix(struct file *filp) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); xid = get_xid(); - rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data); + rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->fid.netfid, &find_data); if (!rc) { cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb); } else if (rc == -EREMOTE) { @@ -562,7 +562,7 @@ int cifs_get_file_info(struct file *filp) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); xid = get_xid(); - rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); + rc = CIFSSMBQFileInfo(xid, tcon, cfile->fid.netfid, &find_data); switch (rc) { case 0: cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false); @@ -930,7 +930,7 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, */ open_file = find_writable_file(cifsInode, true); if (open_file) { - netfid = open_file->netfid; + netfid = open_file->fid.netfid; netpid = open_file->pid; pTcon = tlink_tcon(open_file->tlink); goto set_via_filehandle; @@ -1887,7 +1887,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, */ open_file = find_writable_file(cifsInode, true); if (open_file) { - __u16 nfid = open_file->netfid; + __u16 nfid = open_file->fid.netfid; __u32 npid = open_file->pid; pTcon = tlink_tcon(open_file->tlink); rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, @@ -2061,7 +2061,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) args->device = 0; open_file = find_writable_file(cifsInode, true); if (open_file) { - u16 nfid = open_file->netfid; + u16 nfid = open_file->fid.netfid; u32 npid = open_file->pid; pTcon = tlink_tcon(open_file->tlink); rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index ae082a66de2f..5b3481bd3d96 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -75,8 +75,9 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) tcon = tlink_tcon(pSMBFile->tlink); caps = le64_to_cpu(tcon->fsUnixInfo.Capability); if (CIFS_UNIX_EXTATTR_CAP & caps) { - rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid, - &ExtAttrBits, &ExtAttrMask); + rc = CIFSGetExtAttr(xid, tcon, + pSMBFile->fid.netfid, + &ExtAttrBits, &ExtAttrMask); if (rc == 0) rc = put_user(ExtAttrBits & FS_FL_USER_VISIBLE, @@ -94,8 +95,12 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = -EFAULT; break; } - /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid, - extAttrBits, &ExtAttrMask);*/ + /* + * rc = CIFSGetExtAttr(xid, tcon, + * pSMBFile->fid.netfid, + * extAttrBits, + * &ExtAttrMask); + */ } cFYI(1, "set flags not implemented yet"); break; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index ce41fee07e5b..a921b0712eff 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -466,7 +466,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) list_for_each(tmp2, &tcon->openFileList) { netfile = list_entry(tmp2, struct cifsFileInfo, tlist); - if (pSMB->Fid != netfile->netfid) + if (pSMB->Fid != netfile->fid.netfid) continue; cFYI(1, "file id match, oplock break"); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index d87f82678bc7..9e76e3b3289b 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -279,7 +279,7 @@ ffirst_retry: search_flags |= CIFS_SEARCH_BACKUP_SEARCH; rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb->local_nls, - &cifsFile->netfid, search_flags, &cifsFile->srch_inf, + &cifsFile->fid.netfid, search_flags, &cifsFile->srch_inf, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); if (rc == 0) @@ -545,7 +545,7 @@ static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon, !cifsFile->invalidHandle) { cifsFile->invalidHandle = true; spin_unlock(&cifs_file_list_lock); - CIFSFindClose(xid, pTcon, cifsFile->netfid); + CIFSFindClose(xid, pTcon, cifsFile->fid.netfid); } else spin_unlock(&cifs_file_list_lock); if (cifsFile->srch_inf.ntwrk_buf_start) { @@ -577,8 +577,8 @@ static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon, while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && (rc == 0) && !cifsFile->srch_inf.endOfSearch) { cFYI(1, "calling findnext2"); - rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, search_flags, - &cifsFile->srch_inf); + rc = CIFSFindNext(xid, pTcon, cifsFile->fid.netfid, + search_flags, &cifsFile->srch_inf); /* FindFirst/Next set last_entry to NULL on malformed reply */ if (cifsFile->srch_inf.last_entry) cifs_save_resume_key(cifsFile->srch_inf.last_entry, @@ -781,7 +781,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) } } /* else { cifsFile->invalidHandle = true; - CIFSFindClose(xid, pTcon, cifsFile->netfid); + CIFSFindClose(xid, pTcon, cifsFile->fid.netfid); } */ pTcon = tlink_tcon(cifsFile->tlink); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 725fa6195867..b170da0a882d 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -63,7 +63,7 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf, static bool cifs_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2) { - return ob1->netfid == ob2->netfid; + return ob1->fid.netfid == ob2->fid.netfid; } static unsigned int -- cgit v1.2.3 From fb1214e48f735cdb68446adb77ec37aa3de60697 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:26 -0700 Subject: CIFS: Move open code to ops struct Acked-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 7 ++++++ fs/cifs/cifsproto.h | 7 +++--- fs/cifs/dir.c | 17 ++++++++------- fs/cifs/file.c | 63 +++++++++++++++++++++++++---------------------------- fs/cifs/smb1ops.c | 29 ++++++++++++++++++++++++ 5 files changed, 79 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a2a3865dee1b..e649fac7d6fe 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -171,6 +171,7 @@ struct cifs_tcon; struct dfs_info3_param; struct cifs_fattr; struct smb_vol; +struct cifs_fid; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, void *, @@ -262,6 +263,12 @@ struct smb_version_operations { /* open, rename and delete file */ int (*rename_pending_delete)(const char *, struct dentry *, const unsigned int); + /* open a file for non-posix mounts */ + int (*open)(const unsigned int, struct cifs_tcon *, const char *, int, + int, int, struct cifs_fid *, __u32 *, FILE_ALL_INFO *, + struct cifs_sb_info *); + /* set fid protocol-specific info */ + void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); }; struct smb_version_values { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 79c088469757..b67f7758f422 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -121,9 +121,10 @@ extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset); extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); -extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle, - struct file *file, struct tcon_link *tlink, - __u32 oplock); +extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, + struct file *file, + struct tcon_link *tlink, + __u32 oplock); extern int cifs_posix_open(char *full_path, struct inode **inode, struct super_block *sb, int mode, unsigned int f_flags, __u32 *oplock, __u16 *netfid, diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 781025be48bc..70823dc4f960 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -377,11 +377,12 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, unsigned int xid; struct tcon_link *tlink; struct cifs_tcon *tcon; - __u16 fileHandle; + struct cifs_fid fid; __u32 oplock; - struct cifsFileInfo *pfile_info; + struct cifsFileInfo *file_info; - /* Posix open is only called (at lookup time) for file create now. For + /* + * Posix open is only called (at lookup time) for file create now. For * opens (rather than creates), because we do not know if it is a file * or directory yet, and current Samba no longer allows us to do posix * open on dirs, we could end up wasting an open call on what turns out @@ -415,20 +416,20 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, tcon = tlink_tcon(tlink); rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, - &oplock, &fileHandle, opened); + &oplock, &fid.netfid, opened); if (rc) goto out; rc = finish_open(file, direntry, generic_file_open, opened); if (rc) { - CIFSSMBClose(xid, tcon, fileHandle); + CIFSSMBClose(xid, tcon, fid.netfid); goto out; } - pfile_info = cifs_new_fileinfo(fileHandle, file, tlink, oplock); - if (pfile_info == NULL) { - CIFSSMBClose(xid, tcon, fileHandle); + file_info = cifs_new_fileinfo(&fid, file, tlink, oplock); + if (file_info == NULL) { + CIFSSMBClose(xid, tcon, fid.netfid); rc = -ENOMEM; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 712f2a4d0d49..1246cf74e1fb 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -169,16 +169,19 @@ posix_open_ret: static int cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, - struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock, - __u16 *pnetfid, unsigned int xid) + struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock, + struct cifs_fid *fid, unsigned int xid) { int rc; - int desiredAccess; + int desired_access; int disposition; int create_options = CREATE_NOT_DIR; FILE_ALL_INFO *buf; - desiredAccess = cifs_convert_flags(f_flags); + if (!tcon->ses->server->ops->open) + return -ENOSYS; + + desired_access = cifs_convert_flags(f_flags); /********************************************************************* * open flag mapping table: @@ -215,16 +218,9 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; - if (tcon->ses->capabilities & CAP_NT_SMBS) - rc = CIFSSMBOpen(xid, tcon, full_path, disposition, - desiredAccess, create_options, pnetfid, poplock, buf, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags - & CIFS_MOUNT_MAP_SPECIAL_CHR); - else - rc = SMBLegacyOpen(xid, tcon, full_path, disposition, - desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags - & CIFS_MOUNT_MAP_SPECIAL_CHR); + rc = tcon->ses->server->ops->open(xid, tcon, full_path, disposition, + desired_access, create_options, fid, + oplock, buf, cifs_sb); if (rc) goto out; @@ -234,7 +230,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, xid); else rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, - xid, pnetfid); + xid, &fid->netfid); out: kfree(buf); @@ -242,7 +238,7 @@ out: } struct cifsFileInfo * -cifs_new_fileinfo(__u16 fileHandle, struct file *file, +cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock) { struct dentry *dentry = file->f_path.dentry; @@ -255,7 +251,6 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file, return cfile; cfile->count = 1; - cfile->fid.netfid = fileHandle; cfile->pid = current->tgid; cfile->uid = current_fsuid(); cfile->dentry = dget(dentry); @@ -265,6 +260,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file, mutex_init(&cfile->fh_mutex); INIT_WORK(&cfile->oplock_break, cifs_oplock_break); INIT_LIST_HEAD(&cfile->llist); + tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock); spin_lock(&cifs_file_list_lock); list_add(&cfile->tlist, &(tlink_tcon(tlink)->openFileList)); @@ -275,9 +271,6 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file, list_add_tail(&cfile->flist, &cinode->openFileList); spin_unlock(&cifs_file_list_lock); - cifs_set_oplock_level(cinode, oplock); - cinode->can_cache_brlcks = cinode->clientCanCacheAll; - file->private_data = cfile; return cfile; } @@ -364,10 +357,10 @@ int cifs_open(struct inode *inode, struct file *file) struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; struct tcon_link *tlink; - struct cifsFileInfo *pCifsFile = NULL; + struct cifsFileInfo *cfile = NULL; char *full_path = NULL; bool posix_open_ok = false; - __u16 netfid; + struct cifs_fid fid; xid = get_xid(); @@ -399,7 +392,7 @@ int cifs_open(struct inode *inode, struct file *file) /* can not refresh inode info since size could be stale */ rc = cifs_posix_open(full_path, &inode, inode->i_sb, cifs_sb->mnt_file_mode /* ignored */, - file->f_flags, &oplock, &netfid, xid); + file->f_flags, &oplock, &fid.netfid, xid); if (rc == 0) { cFYI(1, "posix open succeeded"); posix_open_ok = true; @@ -415,20 +408,22 @@ int cifs_open(struct inode *inode, struct file *file) } else if ((rc != -EIO) && (rc != -EREMOTE) && (rc != -EOPNOTSUPP)) /* path not found or net err */ goto out; - /* else fallthrough to retry open the old way on network i/o - or DFS errors */ + /* + * Else fallthrough to retry open the old way on network i/o + * or DFS errors. + */ } if (!posix_open_ok) { rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, - file->f_flags, &oplock, &netfid, xid); + file->f_flags, &oplock, &fid, xid); if (rc) goto out; } - pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock); - if (pCifsFile == NULL) { - CIFSSMBClose(xid, tcon, netfid); + cfile = cifs_new_fileinfo(&fid, file, tlink, oplock); + if (cfile == NULL) { + CIFSSMBClose(xid, tcon, fid.netfid); rc = -ENOMEM; goto out; } @@ -436,8 +431,10 @@ int cifs_open(struct inode *inode, struct file *file) cifs_fscache_set_inode_cookie(inode, file); if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { - /* time to set mode which we can not set earlier due to - problems creating new read-only files */ + /* + * Time to set mode which we can not set earlier due to + * problems creating new read-only files. + */ struct cifs_unix_set_info_args args = { .mode = inode->i_mode, .uid = NO_CHANGE_64, @@ -447,8 +444,8 @@ int cifs_open(struct inode *inode, struct file *file) .mtime = NO_CHANGE_64, .device = 0, }; - CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid, - pCifsFile->pid); + CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid, + cfile->pid); } out: diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index b170da0a882d..907b30865000 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -607,6 +607,33 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path, cifsInode->cifsAttrs = dosattrs; } +static int +cifs_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, + int disposition, int desired_access, int create_options, + struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf, + struct cifs_sb_info *cifs_sb) +{ + if (!(tcon->ses->capabilities & CAP_NT_SMBS)) + return SMBLegacyOpen(xid, tcon, path, disposition, + desired_access, CREATE_NOT_DIR, + &fid->netfid, oplock, buf, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags + & CIFS_MOUNT_MAP_SPECIAL_CHR); + return CIFSSMBOpen(xid, tcon, path, disposition, desired_access, + create_options, &fid->netfid, oplock, buf, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); +} + +static void +cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) +{ + struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + cfile->fid.netfid = fid->netfid; + cifs_set_oplock_level(cinode, oplock); + cinode->can_cache_brlcks = cinode->clientCanCacheAll; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -646,6 +673,8 @@ struct smb_version_operations smb1_operations = { .rmdir = CIFSSMBRmDir, .unlink = CIFSSMBDelFile, .rename_pending_delete = cifs_rename_pending_delete, + .open = cifs_open_file, + .set_fid = cifs_set_fid, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 0ff78a221bf7839a7f20be9929433d17e868e987 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:26 -0700 Subject: CIFS: Move close code to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 2 ++ fs/cifs/file.c | 10 +++++++--- fs/cifs/smb1ops.c | 8 ++++++++ 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index e649fac7d6fe..39bf2f3e866a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -269,6 +269,8 @@ struct smb_version_operations { struct cifs_sb_info *); /* set fid protocol-specific info */ void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); + /* close a file */ + int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); }; struct smb_version_values { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 1246cf74e1fb..14938ee4f6e4 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -326,10 +326,13 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) cancel_work_sync(&cifs_file->oplock_break); if (!tcon->need_reconnect && !cifs_file->invalidHandle) { + struct TCP_Server_Info *server = tcon->ses->server; unsigned int xid; - int rc; + int rc = -ENOSYS; + xid = get_xid(); - rc = CIFSSMBClose(xid, tcon, cifs_file->fid.netfid); + if (server->ops->close) + rc = server->ops->close(xid, tcon, &cifs_file->fid); free_xid(xid); } @@ -423,7 +426,8 @@ int cifs_open(struct inode *inode, struct file *file) cfile = cifs_new_fileinfo(&fid, file, tlink, oplock); if (cfile == NULL) { - CIFSSMBClose(xid, tcon, fid.netfid); + if (tcon->ses->server->ops->close) + tcon->ses->server->ops->close(xid, tcon, &fid); rc = -ENOMEM; goto out; } diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 907b30865000..bb758476e139 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -634,6 +634,13 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) cinode->can_cache_brlcks = cinode->clientCanCacheAll; } +static int +cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) +{ + return CIFSSMBClose(xid, tcon, fid->netfid); +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -675,6 +682,7 @@ struct smb_version_operations smb1_operations = { .rename_pending_delete = cifs_rename_pending_delete, .open = cifs_open_file, .set_fid = cifs_set_fid, + .close = cifs_close_file, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From f0df737ee820ec62055baf2b28e24db4fb1ad71d Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:26 -0700 Subject: CIFS: Add open/close file support for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/Makefile | 2 +- fs/cifs/cifsglob.h | 4 +++ fs/cifs/smb2file.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2inode.c | 4 +-- fs/cifs/smb2ops.c | 22 +++++++++++++- fs/cifs/smb2pdu.c | 53 +++++++++++++++++++++++++-------- fs/cifs/smb2pdu.h | 4 +++ fs/cifs/smb2proto.h | 14 ++++++++- 8 files changed, 172 insertions(+), 17 deletions(-) create mode 100644 fs/cifs/smb2file.c (limited to 'fs') diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index feee94309271..aa0d68b086eb 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -17,4 +17,4 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o \ - smb2misc.o smb2pdu.o smb2inode.o + smb2misc.o smb2pdu.o smb2inode.o smb2file.o diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 39bf2f3e866a..8a69dae81d3a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -757,6 +757,10 @@ struct cifs_search_info { struct cifs_fid { __u16 netfid; +#ifdef CONFIG_CIFS_SMB2 + __u64 persistent_fid; /* persist file id for smb2 */ + __u64 volatile_fid; /* volatile file id for smb2 */ +#endif }; struct cifsFileInfo { diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c new file mode 100644 index 000000000000..a7618dfb7712 --- /dev/null +++ b/fs/cifs/smb2file.c @@ -0,0 +1,86 @@ +/* + * fs/cifs/smb2file.c + * + * Copyright (C) International Business Machines Corp., 2002, 2011 + * Author(s): Steve French (sfrench@us.ibm.com), + * Pavel Shilovsky ((pshilovsky@samba.org) 2012 + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include +#include +#include "cifsfs.h" +#include "cifspdu.h" +#include "cifsglob.h" +#include "cifsproto.h" +#include "cifs_debug.h" +#include "cifs_fs_sb.h" +#include "cifs_unicode.h" +#include "fscache.h" +#include "smb2proto.h" + +int +smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, + int disposition, int desired_access, int create_options, + struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf, + struct cifs_sb_info *cifs_sb) +{ + int rc; + __le16 *smb2_path; + struct smb2_file_all_info *smb2_data = NULL; + + smb2_path = cifs_convert_path_to_utf16(path, cifs_sb); + if (smb2_path == NULL) { + rc = -ENOMEM; + goto out; + } + + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + GFP_KERNEL); + if (smb2_data == NULL) { + rc = -ENOMEM; + goto out; + } + + desired_access |= FILE_READ_ATTRIBUTES; + + rc = SMB2_open(xid, tcon, smb2_path, &fid->persistent_fid, + &fid->volatile_fid, desired_access, disposition, + 0, 0, smb2_data); + if (rc) + goto out; + + if (buf) { + /* open response does not have IndexNumber field - get it */ + rc = SMB2_get_srv_num(xid, tcon, fid->persistent_fid, + fid->volatile_fid, + &smb2_data->IndexNumber); + if (rc) { + /* let get_inode_info disable server inode numbers */ + smb2_data->IndexNumber = 0; + rc = 0; + } + move_smb2_info_to_cifs(buf, smb2_data); + } + +out: + *oplock = 0; + kfree(smb2_data); + kfree(smb2_path); + return rc; +} diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 02a9bda4248c..ee3a1ef686dd 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -54,7 +54,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, desired_access, create_disposition, file_attributes, - create_options); + create_options, NULL); if (rc) { kfree(utf16_path); return rc; @@ -86,7 +86,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, return rc; } -static void +void move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src) { memcpy(dst, src, (size_t)(&src->CurrentByteOffset) - (size_t)src); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index bf9b318cfa4b..eba12b31dc60 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -170,7 +170,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, return -ENOMEM; rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, - FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0); + FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, NULL); if (rc) { kfree(utf16_path); return rc; @@ -292,6 +292,23 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) #endif } +static void +smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) +{ + /* struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); */ + cfile->fid.persistent_fid = fid->persistent_fid; + cfile->fid.volatile_fid = fid->volatile_fid; + /* cifs_set_oplock_level(cinode, oplock); */ + /* cinode->can_cache_brlcks = cinode->clientCanCacheAll; */ +} + +static int +smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) +{ + return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -322,6 +339,9 @@ struct smb_version_operations smb21_operations = { .mkdir_setinfo = smb2_mkdir_setinfo, .rmdir = smb2_rmdir, .unlink = smb2_unlink, + .open = smb2_open_file, + .set_fid = smb2_set_fid, + .close = smb2_close_file, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 62b3f17d0613..231e9701ab85 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -833,7 +833,8 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access, - __u32 create_disposition, __u32 file_attributes, __u32 create_options) + __u32 create_disposition, __u32 file_attributes, __u32 create_options, + struct smb2_file_all_info *buf) { struct smb2_create_req *req; struct smb2_create_rsp *rsp; @@ -856,9 +857,9 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, if (rc) return rc; - if (enable_oplocks) + /* if (server->oplocks) req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_BATCH; - else + else */ req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE; req->ImpersonationLevel = IL_IMPERSONATION; req->DesiredAccess = cpu_to_le32(desired_access); @@ -906,6 +907,15 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, } *persistent_fid = rsp->PersistentFileId; *volatile_fid = rsp->VolatileFileId; + + if (buf) { + memcpy(buf, &rsp->CreationTime, 32); + buf->AllocationSize = rsp->AllocationSize; + buf->EndOfFile = rsp->EndofFile; + buf->Attributes = rsp->FileAttributes; + buf->NumberOfLinks = cpu_to_le32(1); + buf->DeletePending = 0; + } creat_exit: free_rsp_buf(resp_buftype, rsp); return rc; @@ -1019,10 +1029,10 @@ validate_and_copy_buf(unsigned int offset, unsigned int buffer_length, return 0; } -int -SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, - struct smb2_file_all_info *data) +static int +query_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, u8 info_class, + size_t output_len, size_t min_len, void *data) { struct smb2_query_info_req *req; struct smb2_query_info_rsp *rsp = NULL; @@ -1044,14 +1054,13 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, return rc; req->InfoType = SMB2_O_INFO_FILE; - req->FileInfoClass = FILE_ALL_INFORMATION; + req->FileInfoClass = info_class; req->PersistentFileId = persistent_fid; req->VolatileFileId = volatile_fid; /* 4 for rfc1002 length field and 1 for Buffer */ req->InputBufferOffset = cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); - req->OutputBufferLength = - cpu_to_le32(sizeof(struct smb2_file_all_info) + MAX_NAME * 2); + req->OutputBufferLength = cpu_to_le32(output_len); iov[0].iov_base = (char *)req; /* 4 for rfc1002 length field */ @@ -1067,14 +1076,34 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, rc = validate_and_copy_buf(le16_to_cpu(rsp->OutputBufferOffset), le32_to_cpu(rsp->OutputBufferLength), - &rsp->hdr, sizeof(struct smb2_file_all_info), - (char *)data); + &rsp->hdr, min_len, data); qinf_exit: free_rsp_buf(resp_buftype, rsp); return rc; } +int +SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, + struct smb2_file_all_info *data) +{ + return query_info(xid, tcon, persistent_fid, volatile_fid, + FILE_ALL_INFORMATION, + sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + sizeof(struct smb2_file_all_info), data); +} + +int +SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, __le64 *uniqueid) +{ + return query_info(xid, tcon, persistent_fid, volatile_fid, + FILE_INTERNAL_INFORMATION, + sizeof(struct smb2_file_internal_info), + sizeof(struct smb2_file_internal_info), uniqueid); +} + /* * This is a no-op for now. We're not really interested in the reply, but * rather in the fact that the server sent one and that server->lstrp diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 15dc8eea8273..0e962cbf8166 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -548,6 +548,10 @@ struct smb2_query_info_rsp { #define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50 #define FILE_STANDARD_LINK_INFORMATION 54 +struct smb2_file_internal_info { + __le64 IndexNumber; +} __packed; /* level 6 Query */ + /* * This level 18, although with struct with same name is different from cifs * level 0x107. Level 0x107 has an extra u64 between AccessFlags and diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index f4ac72799f70..624d344e1a57 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -48,6 +48,8 @@ extern int smb2_setup_async_request(struct TCP_Server_Info *server, struct mid_q_entry **ret_mid); extern void smb2_echo_request(struct work_struct *work); +extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst, + struct smb2_file_all_info *src); extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, FILE_ALL_INFO *data, @@ -62,6 +64,12 @@ extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, extern int smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); +extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, + const char *full_path, int disposition, + int desired_access, int create_options, + struct cifs_fid *fid, __u32 *oplock, + FILE_ALL_INFO *buf, struct cifs_sb_info *cifs_sb); + /* * SMB2 Worker functions - most of protocol specific implementation details * are contained within these calls. @@ -77,12 +85,16 @@ extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon); extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access, __u32 create_disposition, - __u32 file_attributes, __u32 create_options); + __u32 file_attributes, __u32 create_options, + struct smb2_file_all_info *buf); extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, struct smb2_file_all_info *data); +extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, + __le64 *uniqueid); extern int SMB2_echo(struct TCP_Server_Info *server); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From 4ad6504453644f57f7b6cf9c7bfc9d1372c5ad15 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:26 -0700 Subject: CIFS: Move guery file info code to ops struct and make cifs_get_file_info(_unix) calls static. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 +++ fs/cifs/cifsproto.h | 2 -- fs/cifs/inode.c | 12 +++++++++--- fs/cifs/smb1ops.c | 8 ++++++++ 4 files changed, 20 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 8a69dae81d3a..500ecb921b85 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -236,6 +236,9 @@ struct smb_version_operations { int (*query_path_info)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *, FILE_ALL_INFO *, bool *); + /* query file data from the server */ + int (*query_file_info)(const unsigned int, struct cifs_tcon *, + struct cifs_fid *, FILE_ALL_INFO *); /* get server index number */ int (*get_srv_inum)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *, diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index b67f7758f422..2c6ad78a16cc 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -137,11 +137,9 @@ extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr); extern struct inode *cifs_iget(struct super_block *sb, struct cifs_fattr *fattr); -extern int cifs_get_file_info(struct file *filp); extern int cifs_get_inode_info(struct inode **inode, const char *full_path, FILE_ALL_INFO *data, struct super_block *sb, int xid, const __u16 *fid); -extern int cifs_get_file_info_unix(struct file *filp); extern int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *search_path, struct super_block *sb, unsigned int xid); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index ea7428a82a31..c6f6b02cf3b5 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -282,7 +282,8 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL; } -int cifs_get_file_info_unix(struct file *filp) +static int +cifs_get_file_info_unix(struct file *filp) { int rc; unsigned int xid; @@ -550,7 +551,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_gid = cifs_sb->mnt_gid; } -int cifs_get_file_info(struct file *filp) +static int +cifs_get_file_info(struct file *filp) { int rc; unsigned int xid; @@ -560,9 +562,13 @@ int cifs_get_file_info(struct file *filp) struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsFileInfo *cfile = filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + struct TCP_Server_Info *server = tcon->ses->server; + + if (!server->ops->query_file_info) + return -ENOSYS; xid = get_xid(); - rc = CIFSSMBQFileInfo(xid, tcon, cfile->fid.netfid, &find_data); + rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data); switch (rc) { case 0: cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index bb758476e139..cbbc122a501a 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -489,6 +489,13 @@ cifs_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon, CIFS_MOUNT_MAP_SPECIAL_CHR); } +static int +cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid, FILE_ALL_INFO *data) +{ + return CIFSSMBQFileInfo(xid, tcon, fid->netfid, data); +} + static char * cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon) @@ -672,6 +679,7 @@ struct smb_version_operations smb1_operations = { .qfs_tcon = cifs_qfs_tcon, .is_path_accessible = cifs_is_path_accessible, .query_path_info = cifs_query_path_info, + .query_file_info = cifs_query_file_info, .get_srv_inum = cifs_get_srv_inum, .build_path_to_root = cifs_build_path_to_root, .echo = CIFSSMBEcho, -- cgit v1.2.3 From b7546bc54c4acf1a79c83030fa0591cd24875125 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:27 -0700 Subject: CIFS: Add SMB2 support for query_file_info Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index eba12b31dc60..0fd580168252 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -190,6 +190,26 @@ smb2_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon, return 0; } +static int +smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid, FILE_ALL_INFO *data) +{ + int rc; + struct smb2_file_all_info *smb2_data; + + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + GFP_KERNEL); + if (smb2_data == NULL) + return -ENOMEM; + + rc = SMB2_query_info(xid, tcon, fid->persistent_fid, fid->volatile_fid, + smb2_data); + if (!rc) + move_smb2_info_to_cifs(data, smb2_data); + kfree(smb2_data); + return rc; +} + static char * smb2_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon) @@ -334,6 +354,7 @@ struct smb_version_operations smb21_operations = { .echo = SMB2_echo, .query_path_info = smb2_query_path_info, .get_srv_inum = smb2_get_srv_inum, + .query_file_info = smb2_query_file_info, .build_path_to_root = smb2_build_path_to_root, .mkdir = smb2_mkdir, .mkdir_setinfo = smb2_mkdir_setinfo, -- cgit v1.2.3 From 253641388a49259f6bfefecfb14fa057ca58dc21 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:27 -0700 Subject: CIFS: Move create code use ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/dir.c | 95 ++++++++++++++++++++++++++++++------------------------- fs/cifs/file.c | 10 +++--- fs/cifs/smb1ops.c | 2 +- 3 files changed, 58 insertions(+), 49 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 70823dc4f960..b99a1670dad4 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -160,17 +160,18 @@ check_name(struct dentry *direntry) static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, struct tcon_link *tlink, unsigned oflags, umode_t mode, - __u32 *oplock, __u16 *fileHandle, int *created) + __u32 *oplock, struct cifs_fid *fid, int *created) { int rc = -ENOENT; int create_options = CREATE_NOT_DIR; - int desiredAccess; + int desired_access; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifs_tcon *tcon = tlink_tcon(tlink); char *full_path = NULL; FILE_ALL_INFO *buf = NULL; struct inode *newinode = NULL; int disposition; + struct TCP_Server_Info *server = tcon->ses->server; *oplock = 0; if (tcon->ses->server->oplocks) @@ -185,8 +186,8 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { - rc = cifs_posix_open(full_path, &newinode, - inode->i_sb, mode, oflags, oplock, fileHandle, xid); + rc = cifs_posix_open(full_path, &newinode, inode->i_sb, mode, + oflags, oplock, &fid->netfid, xid); switch (rc) { case 0: if (newinode == NULL) { @@ -202,7 +203,7 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, * close it and proceed as if it were a normal * lookup. */ - CIFSSMBClose(xid, tcon, *fileHandle); + CIFSSMBClose(xid, tcon, fid->netfid); goto cifs_create_get_file_info; } /* success, no need to query */ @@ -244,11 +245,11 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, */ } - desiredAccess = 0; + desired_access = 0; if (OPEN_FMODE(oflags) & FMODE_READ) - desiredAccess |= GENERIC_READ; /* is this too little? */ + desired_access |= GENERIC_READ; /* is this too little? */ if (OPEN_FMODE(oflags) & FMODE_WRITE) - desiredAccess |= GENERIC_WRITE; + desired_access |= GENERIC_WRITE; disposition = FILE_OVERWRITE_IF; if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) @@ -260,8 +261,15 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, else cFYI(1, "Create flag not set in create function"); - /* BB add processing to set equivalent of mode - e.g. via CreateX with - ACLs */ + /* + * BB add processing to set equivalent of mode - e.g. via CreateX with + * ACLs + */ + + if (!server->ops->open) { + rc = -ENOSYS; + goto out; + } buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { @@ -279,28 +287,18 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; - if (tcon->ses->capabilities & CAP_NT_SMBS) - rc = CIFSSMBOpen(xid, tcon, full_path, disposition, - desiredAccess, create_options, - fileHandle, oplock, buf, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - else - rc = -EIO; /* no NT SMB support fall into legacy open below */ - - if (rc == -EIO) { - /* old server, retry the open legacy style */ - rc = SMBLegacyOpen(xid, tcon, full_path, disposition, - desiredAccess, create_options, - fileHandle, oplock, buf, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - } + rc = server->ops->open(xid, tcon, full_path, disposition, + desired_access, create_options, fid, oplock, + buf, cifs_sb); if (rc) { cFYI(1, "cifs_create returned 0x%x", rc); goto out; } - /* If Open reported that we actually created a file - then we now have to set the mode if possible */ + /* + * If Open reported that we actually created a file then we now have to + * set the mode if possible. + */ if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) { struct cifs_unix_set_info_args args = { .mode = mode, @@ -321,11 +319,13 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, args.uid = NO_CHANGE_64; args.gid = NO_CHANGE_64; } - CIFSSMBUnixSetFileInfo(xid, tcon, &args, *fileHandle, - current->tgid); + CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid->netfid, + current->tgid); } else { - /* BB implement mode setting via Windows security - descriptors e.g. */ + /* + * BB implement mode setting via Windows security + * descriptors e.g. + */ /* CIFSSMBWinSetPerms(xid,tcon,path,mode,-1,-1,nls);*/ /* Could set r/o dos attribute if mode & 0222 == 0 */ @@ -334,11 +334,11 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, cifs_create_get_file_info: /* server might mask mode so we have to query for it */ if (tcon->unix_ext) - rc = cifs_get_inode_info_unix(&newinode, full_path, - inode->i_sb, xid); + rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, + xid); else { - rc = cifs_get_inode_info(&newinode, full_path, buf, - inode->i_sb, xid, fileHandle); + rc = cifs_get_inode_info(&newinode, full_path, buf, inode->i_sb, + xid, &fid->netfid); if (newinode) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) newinode->i_mode = mode; @@ -356,7 +356,8 @@ cifs_create_get_file_info: cifs_create_set_dentry: if (rc != 0) { cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); - CIFSSMBClose(xid, tcon, *fileHandle); + if (server->ops->close) + server->ops->close(xid, tcon, fid); goto out; } d_drop(direntry); @@ -377,6 +378,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, unsigned int xid; struct tcon_link *tlink; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; struct cifs_fid fid; __u32 oplock; struct cifsFileInfo *file_info; @@ -414,22 +416,25 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, goto out_free_xid; tcon = tlink_tcon(tlink); + server = tcon->ses->server; rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, - &oplock, &fid.netfid, opened); + &oplock, &fid, opened); if (rc) goto out; rc = finish_open(file, direntry, generic_file_open, opened); if (rc) { - CIFSSMBClose(xid, tcon, fid.netfid); + if (server->ops->close) + server->ops->close(xid, tcon, &fid); goto out; } file_info = cifs_new_fileinfo(&fid, file, tlink, oplock); if (file_info == NULL) { - CIFSSMBClose(xid, tcon, fid.netfid); + if (server->ops->close) + server->ops->close(xid, tcon, &fid); rc = -ENOMEM; } @@ -454,7 +459,9 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, */ unsigned oflags = O_EXCL | O_CREAT | O_RDWR; struct tcon_link *tlink; - __u16 fileHandle; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; + struct cifs_fid fid; __u32 oplock; int created = FILE_CREATED; @@ -467,9 +474,11 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, goto out_free_xid; rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, - &oplock, &fileHandle, &created); - if (!rc) - CIFSSMBClose(xid, tlink_tcon(tlink), fileHandle); + &oplock, &fid, &created); + tcon = tlink_tcon(tlink); + server = tcon->ses->server; + if (!rc && server->ops->close) + server->ops->close(xid, tcon, &fid); cifs_put_tlink(tlink); out_free_xid: diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 14938ee4f6e4..dbca2b293e55 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -312,13 +312,13 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) if (list_empty(&cifsi->openFileList)) { cFYI(1, "closing last open instance for inode %p", cifs_file->dentry->d_inode); - - /* in strict cache mode we need invalidate mapping on the last - close because it may cause a error when we open this file - again and get at least level II oplock */ + /* + * In strict cache mode we need invalidate mapping on the last + * close because it may cause a error when we open this file + * again and get at least level II oplock. + */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) CIFS_I(inode)->invalid_mapping = true; - cifs_set_oplock_level(cifsi, 0); } spin_unlock(&cifs_file_list_lock); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index cbbc122a501a..dd64754ed8cb 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -622,7 +622,7 @@ cifs_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, { if (!(tcon->ses->capabilities & CAP_NT_SMBS)) return SMBLegacyOpen(xid, tcon, path, disposition, - desired_access, CREATE_NOT_DIR, + desired_access, create_options, &fid->netfid, oplock, buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); -- cgit v1.2.3 From 2ae78ba85cd7b4c3a164434ab1f6c9e515c6fa1d Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:27 -0700 Subject: CIFS: Move reopen code to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 121 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 64 insertions(+), 57 deletions(-) (limited to 'fs') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index dbca2b293e55..628ee17007f8 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -459,59 +459,66 @@ out: return rc; } -/* Try to reacquire byte range locks that were released when session */ -/* to server was lost */ +/* + * Try to reacquire byte range locks that were released when session + * to server was lost + */ static int cifs_relock_file(struct cifsFileInfo *cifsFile) { int rc = 0; -/* BB list all locks open on this file and relock */ + /* BB list all locks open on this file and relock */ return rc; } -static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) +static int +cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) { int rc = -EACCES; unsigned int xid; __u32 oplock; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; - struct cifsInodeInfo *pCifsInode; + struct TCP_Server_Info *server; + struct cifsInodeInfo *cinode; struct inode *inode; char *full_path = NULL; - int desiredAccess; + int desired_access; int disposition = FILE_OPEN; int create_options = CREATE_NOT_DIR; - __u16 netfid; + struct cifs_fid fid; xid = get_xid(); - mutex_lock(&pCifsFile->fh_mutex); - if (!pCifsFile->invalidHandle) { - mutex_unlock(&pCifsFile->fh_mutex); + mutex_lock(&cfile->fh_mutex); + if (!cfile->invalidHandle) { + mutex_unlock(&cfile->fh_mutex); rc = 0; free_xid(xid); return rc; } - inode = pCifsFile->dentry->d_inode; + inode = cfile->dentry->d_inode; cifs_sb = CIFS_SB(inode->i_sb); - tcon = tlink_tcon(pCifsFile->tlink); + tcon = tlink_tcon(cfile->tlink); + server = tcon->ses->server; -/* can not grab rename sem here because various ops, including - those that already have the rename sem can end up causing writepage - to get called and if the server was down that means we end up here, - and we can never tell if the caller already has the rename_sem */ - full_path = build_path_from_dentry(pCifsFile->dentry); + /* + * Can not grab rename sem here because various ops, including those + * that already have the rename sem can end up causing writepage to get + * called and if the server was down that means we end up here, and we + * can never tell if the caller already has the rename_sem. + */ + full_path = build_path_from_dentry(cfile->dentry); if (full_path == NULL) { rc = -ENOMEM; - mutex_unlock(&pCifsFile->fh_mutex); + mutex_unlock(&cfile->fh_mutex); free_xid(xid); return rc; } - cFYI(1, "inode = 0x%p file flags 0x%x for %s", - inode, pCifsFile->f_flags, full_path); + cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags, + full_path); if (tcon->ses->server->oplocks) oplock = REQ_OPLOCK; @@ -525,69 +532,69 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) * O_CREAT, O_EXCL and O_TRUNC already had their effect on the * original open. Must mask them off for a reopen. */ - unsigned int oflags = pCifsFile->f_flags & + unsigned int oflags = cfile->f_flags & ~(O_CREAT | O_EXCL | O_TRUNC); rc = cifs_posix_open(full_path, NULL, inode->i_sb, - cifs_sb->mnt_file_mode /* ignored */, - oflags, &oplock, &netfid, xid); + cifs_sb->mnt_file_mode /* ignored */, + oflags, &oplock, &fid.netfid, xid); if (rc == 0) { cFYI(1, "posix reopen succeeded"); goto reopen_success; } - /* fallthrough to retry open the old way on errors, especially - in the reconnect path it is important to retry hard */ + /* + * fallthrough to retry open the old way on errors, especially + * in the reconnect path it is important to retry hard + */ } - desiredAccess = cifs_convert_flags(pCifsFile->f_flags); + desired_access = cifs_convert_flags(cfile->f_flags); if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; - /* Can not refresh inode by passing in file_info buf to be returned - by SMBOpen and then calling get_inode_info with returned buf - since file might have write behind data that needs to be flushed - and server version of file size can be stale. If we knew for sure - that inode was not dirty locally we could do this */ - - rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess, - create_options, &netfid, &oplock, NULL, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + /* + * Can not refresh inode by passing in file_info buf to be returned by + * CIFSSMBOpen and then calling get_inode_info with returned buf since + * file might have write behind data that needs to be flushed and server + * version of file size can be stale. If we knew for sure that inode was + * not dirty locally we could do this. + */ + rc = server->ops->open(xid, tcon, full_path, disposition, + desired_access, create_options, &fid, &oplock, + NULL, cifs_sb); if (rc) { - mutex_unlock(&pCifsFile->fh_mutex); - cFYI(1, "cifs_open returned 0x%x", rc); + mutex_unlock(&cfile->fh_mutex); + cFYI(1, "cifs_reopen returned 0x%x", rc); cFYI(1, "oplock: %d", oplock); goto reopen_error_exit; } reopen_success: - pCifsFile->fid.netfid = netfid; - pCifsFile->invalidHandle = false; - mutex_unlock(&pCifsFile->fh_mutex); - pCifsInode = CIFS_I(inode); + cfile->invalidHandle = false; + mutex_unlock(&cfile->fh_mutex); + cinode = CIFS_I(inode); if (can_flush) { rc = filemap_write_and_wait(inode->i_mapping); mapping_set_error(inode->i_mapping, rc); if (tcon->unix_ext) - rc = cifs_get_inode_info_unix(&inode, - full_path, inode->i_sb, xid); + rc = cifs_get_inode_info_unix(&inode, full_path, + inode->i_sb, xid); else - rc = cifs_get_inode_info(&inode, - full_path, NULL, inode->i_sb, - xid, NULL); - } /* else we are writing out data to server already - and could deadlock if we tried to flush data, and - since we do not know if we have data that would - invalidate the current end of file on the server - we can not go to the server to get the new inod - info */ - - cifs_set_oplock_level(pCifsInode, oplock); - - cifs_relock_file(pCifsFile); + rc = cifs_get_inode_info(&inode, full_path, NULL, + inode->i_sb, xid, NULL); + } + /* + * Else we are writing out data to server already and could deadlock if + * we tried to flush data, and since we do not know if we have data that + * would invalidate the current end of file on the server we can not go + * to the server to get the new inode info. + */ + + server->ops->set_fid(cfile, &fid, oplock); + cifs_relock_file(cfile); reopen_error_exit: kfree(full_path); -- cgit v1.2.3 From 1d8c4c0009deda22b436b1f0ab9f2885863717fc Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:27 -0700 Subject: CIFS: Make flush code use ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 2 ++ fs/cifs/file.c | 20 ++++++++++++++++---- fs/cifs/smb1ops.c | 8 ++++++++ 3 files changed, 26 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 500ecb921b85..abb831019039 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -274,6 +274,8 @@ struct smb_version_operations { void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); /* close a file */ int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); + /* send a flush request to the server */ + int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); }; struct smb_version_values { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 628ee17007f8..aa1dccf0df9e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2062,6 +2062,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, unsigned int xid; int rc = 0; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; struct cifsFileInfo *smbfile = file->private_data; struct inode *inode = file->f_path.dentry->d_inode; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); @@ -2085,8 +2086,13 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, } tcon = tlink_tcon(smbfile->tlink); - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) - rc = CIFSSMBFlush(xid, tcon, smbfile->fid.netfid); + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { + server = tcon->ses->server; + if (server->ops->flush) + rc = server->ops->flush(xid, tcon, &smbfile->fid); + else + rc = -ENOSYS; + } free_xid(xid); mutex_unlock(&inode->i_mutex); @@ -2098,6 +2104,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) unsigned int xid; int rc = 0; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; struct cifsFileInfo *smbfile = file->private_data; struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); struct inode *inode = file->f_mapping->host; @@ -2113,8 +2120,13 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) file->f_path.dentry->d_name.name, datasync); tcon = tlink_tcon(smbfile->tlink); - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) - rc = CIFSSMBFlush(xid, tcon, smbfile->fid.netfid); + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { + server = tcon->ses->server; + if (server->ops->flush) + rc = server->ops->flush(xid, tcon, &smbfile->fid); + else + rc = -ENOSYS; + } free_xid(xid); mutex_unlock(&inode->i_mutex); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index dd64754ed8cb..df20dd9e64ca 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -648,6 +648,13 @@ cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon, return CIFSSMBClose(xid, tcon, fid->netfid); } +static int +cifs_flush_file(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) +{ + return CIFSSMBFlush(xid, tcon, fid->netfid); +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -691,6 +698,7 @@ struct smb_version_operations smb1_operations = { .open = cifs_open_file, .set_fid = cifs_set_fid, .close = cifs_close_file, + .flush = cifs_flush_file, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 7a5cfb1965854132f2f382eade8c6ce2eeb6f692 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:28 -0700 Subject: CIFS: Add SMB2 support for flush Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 8 ++++++++ fs/cifs/smb2pdu.c | 38 ++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 15 +++++++++++++++ fs/cifs/smb2proto.h | 2 ++ 4 files changed, 63 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0fd580168252..d81e1daeb8f0 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -329,6 +329,13 @@ smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); } +static int +smb2_flush_file(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) +{ + return SMB2_flush(xid, tcon, fid->persistent_fid, fid->volatile_fid); +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -363,6 +370,7 @@ struct smb_version_operations smb21_operations = { .open = smb2_open_file, .set_fid = smb2_set_fid, .close = smb2_close_file, + .flush = smb2_flush_file, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 231e9701ab85..ff374063f4e2 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1152,3 +1152,41 @@ SMB2_echo(struct TCP_Server_Info *server) cifs_small_buf_release(req); return rc; } + +int +SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, + u64 volatile_fid) +{ + struct smb2_flush_req *req; + struct TCP_Server_Info *server; + struct cifs_ses *ses = tcon->ses; + struct kvec iov[1]; + int resp_buftype; + int rc = 0; + + cFYI(1, "Flush"); + + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + rc = small_smb2_init(SMB2_FLUSH, tcon, (void **) &req); + if (rc) + return rc; + + req->PersistentFileId = persistent_fid; + req->VolatileFileId = volatile_fid; + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field */ + iov[0].iov_len = get_rfc1002_length(req) + 4; + + rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); + + if ((rc != 0) && tcon) + cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE); + + free_rsp_buf(resp_buftype, iov[0].iov_base); + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 0e962cbf8166..f5bf63f66971 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -453,6 +453,21 @@ struct smb2_close_rsp { __le32 Attributes; } __packed; +struct smb2_flush_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 24 */ + __le16 Reserved1; + __le32 Reserved2; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ +} __packed; + +struct smb2_flush_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; + __le16 Reserved; +} __packed; + struct smb2_echo_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 4 */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 624d344e1a57..51e6cd185c79 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -89,6 +89,8 @@ extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, struct smb2_file_all_info *buf); extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); +extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_file_id, u64 volatile_file_id); extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, struct smb2_file_all_info *data); -- cgit v1.2.3 From 24985c53d5b04a56ac7c8ae7f74b8cb807e2ed2f Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:28 -0700 Subject: CIFS: Move r/wsize negotiating to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 61 +++++++++++++++++++++++ fs/cifs/connect.c | 144 +---------------------------------------------------- fs/cifs/smb1ops.c | 86 ++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 142 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index abb831019039..e5cb1941e251 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -213,6 +213,10 @@ struct smb_version_operations { bool (*need_neg)(struct TCP_Server_Info *); /* negotiate to the server */ int (*negotiate)(const unsigned int, struct cifs_ses *); + /* set negotiated write size */ + unsigned int (*negotiate_wsize)(struct cifs_tcon *, struct smb_vol *); + /* set negotiated read size */ + unsigned int (*negotiate_rsize)(struct cifs_tcon *, struct smb_vol *); /* setup smb sessionn */ int (*sess_setup)(const unsigned int, struct cifs_ses *, const struct nls_table *); @@ -515,6 +519,63 @@ get_next_mid(struct TCP_Server_Info *server) return server->ops->get_next_mid(server); } +/* + * When the server supports very large reads and writes via POSIX extensions, + * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not + * including the RFC1001 length. + * + * Note that this might make for "interesting" allocation problems during + * writeback however as we have to allocate an array of pointers for the + * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. + * + * For reads, there is a similar problem as we need to allocate an array + * of kvecs to handle the receive, though that should only need to be done + * once. + */ +#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) +#define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4) + +/* + * When the server doesn't allow large posix writes, only allow a rsize/wsize + * of 2^17-1 minus the size of the call header. That allows for a read or + * write up to the maximum size described by RFC1002. + */ +#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) +#define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) + +/* + * The default wsize is 1M. find_get_pages seems to return a maximum of 256 + * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill + * a single wsize request with a single call. + */ +#define CIFS_DEFAULT_IOSIZE (1024 * 1024) + +/* + * Windows only supports a max of 60kb reads and 65535 byte writes. Default to + * those values when posix extensions aren't in force. In actuality here, we + * use 65536 to allow for a write that is a multiple of 4k. Most servers seem + * to be ok with the extra byte even though Windows doesn't send writes that + * are that large. + * + * Citation: + * + * http://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx + */ +#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) +#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) + +/* + * On hosts with high memory, we can't currently support wsize/rsize that are + * larger than we can kmap at once. Cap the rsize/wsize at + * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request + * larger than that anyway. + */ +#ifdef CONFIG_HIGHMEM +#define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE) +#else /* CONFIG_HIGHMEM */ +#define CIFS_KMAP_SIZE_LIMIT (1<<24) +#endif /* CONFIG_HIGHMEM */ + /* * Macros to allow the TCP_Server_Info->net field and related code to drop out * when CONFIG_NET_NS isn't set. diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 6df6fa14cba8..c31b30b572e0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3261,146 +3261,6 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, "mount option supported"); } -/* - * When the server supports very large reads and writes via POSIX extensions, - * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not - * including the RFC1001 length. - * - * Note that this might make for "interesting" allocation problems during - * writeback however as we have to allocate an array of pointers for the - * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. - * - * For reads, there is a similar problem as we need to allocate an array - * of kvecs to handle the receive, though that should only need to be done - * once. - */ -#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) -#define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4) - -/* - * When the server doesn't allow large posix writes, only allow a rsize/wsize - * of 2^17-1 minus the size of the call header. That allows for a read or - * write up to the maximum size described by RFC1002. - */ -#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) -#define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) - -/* - * The default wsize is 1M. find_get_pages seems to return a maximum of 256 - * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill - * a single wsize request with a single call. - */ -#define CIFS_DEFAULT_IOSIZE (1024 * 1024) - -/* - * Windows only supports a max of 60kb reads and 65535 byte writes. Default to - * those values when posix extensions aren't in force. In actuality here, we - * use 65536 to allow for a write that is a multiple of 4k. Most servers seem - * to be ok with the extra byte even though Windows doesn't send writes that - * are that large. - * - * Citation: - * - * http://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx - */ -#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) -#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) - -/* - * On hosts with high memory, we can't currently support wsize/rsize that are - * larger than we can kmap at once. Cap the rsize/wsize at - * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request - * larger than that anyway. - */ -#ifdef CONFIG_HIGHMEM -#define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE) -#else /* CONFIG_HIGHMEM */ -#define CIFS_KMAP_SIZE_LIMIT (1<<24) -#endif /* CONFIG_HIGHMEM */ - -static unsigned int -cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) -{ - __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); - struct TCP_Server_Info *server = tcon->ses->server; - unsigned int wsize; - - /* start with specified wsize, or default */ - if (pvolume_info->wsize) - wsize = pvolume_info->wsize; - else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) - wsize = CIFS_DEFAULT_IOSIZE; - else - wsize = CIFS_DEFAULT_NON_POSIX_WSIZE; - - /* can server support 24-bit write sizes? (via UNIX extensions) */ - if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) - wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE); - - /* - * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set? - * Limit it to max buffer offered by the server, minus the size of the - * WRITEX header, not including the 4 byte RFC1001 length. - */ - if (!(server->capabilities & CAP_LARGE_WRITE_X) || - (!(server->capabilities & CAP_UNIX) && - (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)))) - wsize = min_t(unsigned int, wsize, - server->maxBuf - sizeof(WRITE_REQ) + 4); - - /* limit to the amount that we can kmap at once */ - wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT); - - /* hard limit of CIFS_MAX_WSIZE */ - wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); - - return wsize; -} - -static unsigned int -cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) -{ - __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); - struct TCP_Server_Info *server = tcon->ses->server; - unsigned int rsize, defsize; - - /* - * Set default value... - * - * HACK alert! Ancient servers have very small buffers. Even though - * MS-CIFS indicates that servers are only limited by the client's - * bufsize for reads, testing against win98se shows that it throws - * INVALID_PARAMETER errors if you try to request too large a read. - * OS/2 just sends back short reads. - * - * If the server doesn't advertise CAP_LARGE_READ_X, then assume that - * it can't handle a read request larger than its MaxBufferSize either. - */ - if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP)) - defsize = CIFS_DEFAULT_IOSIZE; - else if (server->capabilities & CAP_LARGE_READ_X) - defsize = CIFS_DEFAULT_NON_POSIX_RSIZE; - else - defsize = server->maxBuf - sizeof(READ_RSP); - - rsize = pvolume_info->rsize ? pvolume_info->rsize : defsize; - - /* - * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to - * the client's MaxBufferSize. - */ - if (!(server->capabilities & CAP_LARGE_READ_X)) - rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); - - /* limit to the amount that we can kmap at once */ - rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT); - - /* hard limit of CIFS_MAX_RSIZE */ - rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); - - return rsize; -} - static void cleanup_volume_info_contents(struct smb_vol *volume_info) { @@ -3651,8 +3511,8 @@ try_mount_again: if (!tcon->ipc && server->ops->qfs_tcon) server->ops->qfs_tcon(xid, tcon); - cifs_sb->wsize = cifs_negotiate_wsize(tcon, volume_info); - cifs_sb->rsize = cifs_negotiate_rsize(tcon, volume_info); + cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info); + cifs_sb->rsize = server->ops->negotiate_rsize(tcon, volume_info); /* tune readahead according to rsize */ cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_CACHE_SIZE; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index df20dd9e64ca..7e8a2bdd69c8 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -17,6 +17,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" @@ -410,6 +411,89 @@ cifs_negotiate(const unsigned int xid, struct cifs_ses *ses) return rc; } +static unsigned int +cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +{ + __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); + struct TCP_Server_Info *server = tcon->ses->server; + unsigned int wsize; + + /* start with specified wsize, or default */ + if (volume_info->wsize) + wsize = volume_info->wsize; + else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) + wsize = CIFS_DEFAULT_IOSIZE; + else + wsize = CIFS_DEFAULT_NON_POSIX_WSIZE; + + /* can server support 24-bit write sizes? (via UNIX extensions) */ + if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) + wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE); + + /* + * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set? + * Limit it to max buffer offered by the server, minus the size of the + * WRITEX header, not including the 4 byte RFC1001 length. + */ + if (!(server->capabilities & CAP_LARGE_WRITE_X) || + (!(server->capabilities & CAP_UNIX) && + (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)))) + wsize = min_t(unsigned int, wsize, + server->maxBuf - sizeof(WRITE_REQ) + 4); + + /* limit to the amount that we can kmap at once */ + wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT); + + /* hard limit of CIFS_MAX_WSIZE */ + wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); + + return wsize; +} + +static unsigned int +cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +{ + __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); + struct TCP_Server_Info *server = tcon->ses->server; + unsigned int rsize, defsize; + + /* + * Set default value... + * + * HACK alert! Ancient servers have very small buffers. Even though + * MS-CIFS indicates that servers are only limited by the client's + * bufsize for reads, testing against win98se shows that it throws + * INVALID_PARAMETER errors if you try to request too large a read. + * OS/2 just sends back short reads. + * + * If the server doesn't advertise CAP_LARGE_READ_X, then assume that + * it can't handle a read request larger than its MaxBufferSize either. + */ + if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP)) + defsize = CIFS_DEFAULT_IOSIZE; + else if (server->capabilities & CAP_LARGE_READ_X) + defsize = CIFS_DEFAULT_NON_POSIX_RSIZE; + else + defsize = server->maxBuf - sizeof(READ_RSP); + + rsize = volume_info->rsize ? volume_info->rsize : defsize; + + /* + * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to + * the client's MaxBufferSize. + */ + if (!(server->capabilities & CAP_LARGE_READ_X)) + rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); + + /* limit to the amount that we can kmap at once */ + rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT); + + /* hard limit of CIFS_MAX_RSIZE */ + rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); + + return rsize; +} + static void cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) { @@ -678,6 +762,8 @@ struct smb_version_operations smb1_operations = { .check_trans2 = cifs_check_trans2, .need_neg = cifs_need_neg, .negotiate = cifs_negotiate, + .negotiate_wsize = cifs_negotiate_wsize, + .negotiate_rsize = cifs_negotiate_rsize, .sess_setup = CIFS_SessSetup, .logoff = CIFSSMBLogoff, .tree_connect = CIFSTCon, -- cgit v1.2.3 From 3a3bab509f3f0e7295caab24e9102ce303edb50b Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:28 -0700 Subject: CIFS: Add SMB2 r/wsize negotiating Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index d81e1daeb8f0..20afb756e97a 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -17,6 +17,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include "cifsglob.h" #include "smb2pdu.h" #include "smb2proto.h" @@ -157,6 +158,48 @@ smb2_negotiate(const unsigned int xid, struct cifs_ses *ses) return rc; } +static unsigned int +smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +{ + struct TCP_Server_Info *server = tcon->ses->server; + unsigned int wsize; + + /* start with specified wsize, or default */ + wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE; + wsize = min_t(unsigned int, wsize, server->max_write); + /* + * limit write size to 2 ** 16, because we don't support multicredit + * requests now. + */ + wsize = min_t(unsigned int, wsize, 2 << 15); + + /* limit to the amount that we can kmap at once */ + wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT); + + return wsize; +} + +static unsigned int +smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +{ + struct TCP_Server_Info *server = tcon->ses->server; + unsigned int rsize; + + /* start with specified rsize, or default */ + rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE; + rsize = min_t(unsigned int, rsize, server->max_read); + /* + * limit write size to 2 ** 16, because we don't support multicredit + * requests now. + */ + rsize = min_t(unsigned int, rsize, 2 << 15); + + /* limit to the amount that we can kmap at once */ + rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT); + + return rsize; +} + static int smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path) @@ -352,6 +395,8 @@ struct smb_version_operations smb21_operations = { .print_stats = smb2_print_stats, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, + .negotiate_wsize = smb2_negotiate_wsize, + .negotiate_rsize = smb2_negotiate_rsize, .sess_setup = SMB2_sess_setup, .logoff = SMB2_logoff, .tree_connect = SMB2_tcon, -- cgit v1.2.3 From fc9c59662e0cd37577556d0de865268baeb9b293 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:28 -0700 Subject: CIFS: Move async read to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 +++ fs/cifs/file.c | 8 +++++++- fs/cifs/smb1ops.c | 1 + fs/cifs/smb2maperror.c | 3 ++- 4 files changed, 13 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index e5cb1941e251..fcf81c05635f 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -172,6 +172,7 @@ struct dfs_info3_param; struct cifs_fattr; struct smb_vol; struct cifs_fid; +struct cifs_readdata; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, void *, @@ -280,6 +281,8 @@ struct smb_version_operations { int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* send a flush request to the server */ int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); + /* async read from the server */ + int (*async_readv)(struct cifs_readdata *); }; struct smb_version_values { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index aa1dccf0df9e..ec7c2e6bcbdf 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2488,6 +2488,9 @@ static int cifs_retry_async_readv(struct cifs_readdata *rdata) { int rc; + struct TCP_Server_Info *server; + + server = tlink_tcon(rdata->cfile->tlink)->ses->server; do { if (rdata->cfile->invalidHandle) { @@ -2495,7 +2498,7 @@ cifs_retry_async_readv(struct cifs_readdata *rdata) if (rc != 0) continue; } - rc = cifs_async_readv(rdata); + rc = server->ops->async_readv(rdata); } while (rc == -EAGAIN); return rc; @@ -2647,6 +2650,9 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, open_file = file->private_data; tcon = tlink_tcon(open_file->tlink); + if (!tcon->ses->server->ops->async_readv) + return -ENOSYS; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; else diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 7e8a2bdd69c8..e2dbd22cb136 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -785,6 +785,7 @@ struct smb_version_operations smb1_operations = { .set_fid = cifs_set_fid, .close = cifs_close_file, .flush = cifs_flush_file, + .async_readv = cifs_async_readv, }; struct smb_version_values smb1_values = { diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index be41478acc05..eaf5466d4041 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -2455,7 +2455,8 @@ map_smb2_to_linux_error(char *buf, bool log_err) return 0; /* mask facility */ - if (log_err && (smb2err != (STATUS_MORE_PROCESSING_REQUIRED))) + if (log_err && (smb2err != STATUS_MORE_PROCESSING_REQUIRED) && + (smb2err != STATUS_END_OF_FILE)) smb2_print_status(smb2err); else if (cifsFYI & CIFS_RC) smb2_print_status(smb2err); -- cgit v1.2.3 From 09a4707e7638247302c6d798061aed117141fb74 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:29 -0700 Subject: CIFS: Add SMB2 support for cifs_iovec_read Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 25 ++++++++++ fs/cifs/cifsproto.h | 20 +------- fs/cifs/cifssmb.c | 2 +- fs/cifs/file.c | 4 ++ fs/cifs/smb2glob.h | 6 +++ fs/cifs/smb2misc.c | 3 ++ fs/cifs/smb2ops.c | 19 ++++++++ fs/cifs/smb2pdu.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 28 +++++++++++ fs/cifs/smb2proto.h | 1 + 10 files changed, 225 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index fcf81c05635f..93dd582bb8d1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -857,12 +857,37 @@ struct cifsFileInfo { struct cifs_io_parms { __u16 netfid; +#ifdef CONFIG_CIFS_SMB2 + __u64 persistent_fid; /* persist file id for smb2 */ + __u64 volatile_fid; /* volatile file id for smb2 */ +#endif __u32 pid; __u64 offset; unsigned int length; struct cifs_tcon *tcon; }; +struct cifs_readdata; + +/* asynchronous read support */ +struct cifs_readdata { + struct kref refcount; + struct list_head list; + struct completion done; + struct cifsFileInfo *cfile; + struct address_space *mapping; + __u64 offset; + unsigned int bytes; + pid_t pid; + int result; + struct list_head pages; + struct work_struct work; + int (*marshal_iov) (struct cifs_readdata *rdata, + unsigned int remaining); + unsigned int nr_iov; + struct kvec iov[1]; +}; + /* * Take a reference on the file private data. Must be called with * cifs_file_list_lock held. diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 2c6ad78a16cc..6656eb5dbf70 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -464,27 +464,9 @@ extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24); -/* asynchronous read support */ -struct cifs_readdata { - struct kref refcount; - struct list_head list; - struct completion done; - struct cifsFileInfo *cfile; - struct address_space *mapping; - __u64 offset; - unsigned int bytes; - pid_t pid; - int result; - struct list_head pages; - struct work_struct work; - int (*marshal_iov) (struct cifs_readdata *rdata, - unsigned int remaining); - unsigned int nr_iov; - struct kvec iov[1]; -}; - void cifs_readdata_release(struct kref *refcount); int cifs_async_readv(struct cifs_readdata *rdata); +int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid); /* asynchronous write support */ struct cifs_writedata { diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 4c48b9c60b26..8a07f218266f 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1440,7 +1440,7 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) return 0; } -static int +int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) { int length, len; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ec7c2e6bcbdf..29ac8ee46039 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2732,6 +2732,10 @@ restart_loop: cifs_stats_bytes_read(tcon, total_read); *poffset += total_read; + /* mask nodata case */ + if (rc == -ENODATA) + rc = 0; + return total_read ? total_read : rc; } diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index 33c1d89090c0..11505d73ff32 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -41,4 +41,10 @@ #define SMB2_OP_RENAME 6 #define SMB2_OP_DELETE 7 +/* Used when constructing chained read requests. */ +#define CHAINED_REQUEST 1 +#define START_OF_CHAIN 2 +#define END_OF_CHAIN 4 +#define RELATED_REQUEST 8 + #endif /* _SMB2_GLOB_H */ diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index e4d3b9964167..9275883c8530 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -244,6 +244,9 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) ((struct smb2_query_info_rsp *)hdr)->OutputBufferLength); break; case SMB2_READ: + *off = ((struct smb2_read_rsp *)hdr)->DataOffset; + *len = le32_to_cpu(((struct smb2_read_rsp *)hdr)->DataLength); + break; case SMB2_QUERY_DIRECTORY: case SMB2_IOCTL: case SMB2_CHANGE_NOTIFY: diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 20afb756e97a..d9ca357d9809 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -379,6 +379,20 @@ smb2_flush_file(const unsigned int xid, struct cifs_tcon *tcon, return SMB2_flush(xid, tcon, fid->persistent_fid, fid->volatile_fid); } +static unsigned int +smb2_read_data_offset(char *buf) +{ + struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf; + return rsp->DataOffset; +} + +static unsigned int +smb2_read_data_length(char *buf) +{ + struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf; + return le32_to_cpu(rsp->DataLength); +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -388,6 +402,9 @@ struct smb_version_operations smb21_operations = { .get_credits_field = smb2_get_credits_field, .get_credits = smb2_get_credits, .get_next_mid = smb2_get_next_mid, + .read_data_offset = smb2_read_data_offset, + .read_data_length = smb2_read_data_length, + .map_error = map_smb2_to_linux_error, .find_mid = smb2_find_mid, .check_message = smb2_check_message, .dump_detail = smb2_dump_detail, @@ -416,12 +433,14 @@ struct smb_version_operations smb21_operations = { .set_fid = smb2_set_fid, .close = smb2_close_file, .flush = smb2_flush_file, + .async_readv = smb2_async_readv, }; struct smb_version_values smb21_values = { .version_string = SMB21_VERSION_STRING, .header_size = sizeof(struct smb2_hdr), .max_header_size = MAX_SMB2_HDR_SIZE, + .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, .lock_cmd = SMB2_LOCK, .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index ff374063f4e2..e18671852d41 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include "smb2pdu.h" @@ -42,6 +43,7 @@ #include "cifs_debug.h" #include "ntlmssp.h" #include "smb2status.h" +#include "smb2glob.h" /* * The following table defines the expected "StructureSize" of SMB2 requests @@ -1190,3 +1192,138 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, free_rsp_buf(resp_buftype, iov[0].iov_base); return rc; } + +/* + * To form a chain of read requests, any read requests after the first should + * have the end_of_chain boolean set to true. + */ +static int +smb2_new_read_req(struct kvec *iov, struct cifs_io_parms *io_parms, + unsigned int remaining_bytes, int request_type) +{ + int rc = -EACCES; + struct smb2_read_req *req = NULL; + + rc = small_smb2_init(SMB2_READ, io_parms->tcon, (void **) &req); + if (rc) + return rc; + if (io_parms->tcon->ses->server == NULL) + return -ECONNABORTED; + + req->hdr.ProcessId = cpu_to_le32(io_parms->pid); + + req->PersistentFileId = io_parms->persistent_fid; + req->VolatileFileId = io_parms->volatile_fid; + req->ReadChannelInfoOffset = 0; /* reserved */ + req->ReadChannelInfoLength = 0; /* reserved */ + req->Channel = 0; /* reserved */ + req->MinimumCount = 0; + req->Length = cpu_to_le32(io_parms->length); + req->Offset = cpu_to_le64(io_parms->offset); + + if (request_type & CHAINED_REQUEST) { + if (!(request_type & END_OF_CHAIN)) { + /* 4 for rfc1002 length field */ + req->hdr.NextCommand = + cpu_to_le32(get_rfc1002_length(req) + 4); + } else /* END_OF_CHAIN */ + req->hdr.NextCommand = 0; + if (request_type & RELATED_REQUEST) { + req->hdr.Flags |= SMB2_FLAGS_RELATED_OPERATIONS; + /* + * Related requests use info from previous read request + * in chain. + */ + req->hdr.SessionId = 0xFFFFFFFF; + req->hdr.TreeId = 0xFFFFFFFF; + req->PersistentFileId = 0xFFFFFFFF; + req->VolatileFileId = 0xFFFFFFFF; + } + } + if (remaining_bytes > io_parms->length) + req->RemainingBytes = cpu_to_le32(remaining_bytes); + else + req->RemainingBytes = 0; + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field */ + iov[0].iov_len = get_rfc1002_length(req) + 4; + return rc; +} + +static void +smb2_readv_callback(struct mid_q_entry *mid) +{ + struct cifs_readdata *rdata = mid->callback_data; + struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); + struct TCP_Server_Info *server = tcon->ses->server; + struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov[0].iov_base; + unsigned int credits_received = 1; + + cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__, + mid->mid, mid->mid_state, rdata->result, rdata->bytes); + + switch (mid->mid_state) { + case MID_RESPONSE_RECEIVED: + credits_received = le16_to_cpu(buf->CreditRequest); + /* result already set, check signature */ + /* if (server->sec_mode & + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + if (smb2_verify_signature(mid->resp_buf, server)) + cERROR(1, "Unexpected SMB signature"); */ + /* FIXME: should this be counted toward the initiating task? */ + task_io_account_read(rdata->bytes); + cifs_stats_bytes_read(tcon, rdata->bytes); + break; + case MID_REQUEST_SUBMITTED: + case MID_RETRY_NEEDED: + rdata->result = -EAGAIN; + break; + default: + if (rdata->result != -ENODATA) + rdata->result = -EIO; + } + + if (rdata->result) + cifs_stats_fail_inc(tcon, SMB2_READ_HE); + + queue_work(cifsiod_wq, &rdata->work); + DeleteMidQEntry(mid); + add_credits(server, credits_received, 0); +} + +/* smb2_async_readv - send an async write, and set up mid to handle result */ +int +smb2_async_readv(struct cifs_readdata *rdata) +{ + int rc; + struct smb2_hdr *buf; + struct cifs_io_parms io_parms; + + cFYI(1, "%s: offset=%llu bytes=%u", __func__, + rdata->offset, rdata->bytes); + + io_parms.tcon = tlink_tcon(rdata->cfile->tlink); + io_parms.offset = rdata->offset; + io_parms.length = rdata->bytes; + io_parms.persistent_fid = rdata->cfile->fid.persistent_fid; + io_parms.volatile_fid = rdata->cfile->fid.volatile_fid; + io_parms.pid = rdata->pid; + rc = smb2_new_read_req(&rdata->iov[0], &io_parms, 0, 0); + if (rc) + return rc; + + buf = (struct smb2_hdr *)rdata->iov[0].iov_base; + /* 4 for rfc1002 length field */ + rdata->iov[0].iov_len = get_rfc1002_length(rdata->iov[0].iov_base) + 4; + + kref_get(&rdata->refcount); + rc = cifs_call_async(io_parms.tcon->ses->server, rdata->iov, 1, + cifs_readv_receive, smb2_readv_callback, + rdata, 0); + if (rc) + kref_put(&rdata->refcount, cifs_readdata_release); + + cifs_small_buf_release(buf); + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index f5bf63f66971..4abb58106809 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -468,6 +468,34 @@ struct smb2_flush_rsp { __le16 Reserved; } __packed; +struct smb2_read_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 49 */ + __u8 Padding; /* offset from start of SMB2 header to place read */ + __u8 Reserved; + __le32 Length; + __le64 Offset; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + __le32 MinimumCount; + __le32 Channel; /* Reserved MBZ */ + __le32 RemainingBytes; + __le16 ReadChannelInfoOffset; /* Reserved MBZ */ + __le16 ReadChannelInfoLength; /* Reserved MBZ */ + __u8 Buffer[1]; +} __packed; + +struct smb2_read_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 17 */ + __u8 DataOffset; + __u8 Reserved; + __le32 DataLength; + __le32 DataRemaining; + __u32 Reserved2; + __u8 Buffer[1]; +} __packed; + struct smb2_echo_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 4 */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 51e6cd185c79..f442e4699974 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -97,6 +97,7 @@ extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, __le64 *uniqueid); +extern int smb2_async_readv(struct cifs_readdata *rdata); extern int SMB2_echo(struct TCP_Server_Info *server); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From c9de5c80d536e556568ccd45b9599870d4993b7d Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:29 -0700 Subject: CIFS: Move async write to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 +++ fs/cifs/cifssmb.c | 4 +++- fs/cifs/file.c | 13 +++++++++++-- fs/cifs/smb1ops.c | 1 + 4 files changed, 18 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 93dd582bb8d1..aef167470654 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -173,6 +173,7 @@ struct cifs_fattr; struct smb_vol; struct cifs_fid; struct cifs_readdata; +struct cifs_writedata; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, void *, @@ -283,6 +284,8 @@ struct smb_version_operations { int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* async read from the server */ int (*async_readv)(struct cifs_readdata *); + /* async write to the server */ + int (*async_writev)(struct cifs_writedata *); }; struct smb_version_values { diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 8a07f218266f..2a9b27387708 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1926,6 +1926,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata) { int i, rc; struct inode *inode = wdata->cfile->dentry->d_inode; + struct TCP_Server_Info *server; for (i = 0; i < wdata->nr_pages; i++) { lock_page(wdata->pages[i]); @@ -1933,7 +1934,8 @@ cifs_writev_requeue(struct cifs_writedata *wdata) } do { - rc = cifs_async_writev(wdata); + server = tlink_tcon(wdata->cfile->tlink)->ses->server; + rc = server->ops->async_writev(wdata); } while (rc == -EAGAIN); for (i = 0; i < wdata->nr_pages; i++) { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 29ac8ee46039..703c1648b068 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1754,6 +1754,7 @@ static int cifs_writepages(struct address_space *mapping, bool done = false, scanned = false, range_whole = false; pgoff_t end, index; struct cifs_writedata *wdata; + struct TCP_Server_Info *server; struct page *page; int rc = 0; @@ -1904,7 +1905,8 @@ retry: break; } wdata->pid = wdata->cfile->pid; - rc = cifs_async_writev(wdata); + server = tlink_tcon(wdata->cfile->tlink)->ses->server; + rc = server->ops->async_writev(wdata); } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN); for (i = 0; i < nr_pages; ++i) @@ -2235,6 +2237,9 @@ static int cifs_uncached_retry_writev(struct cifs_writedata *wdata) { int rc; + struct TCP_Server_Info *server; + + server = tlink_tcon(wdata->cfile->tlink)->ses->server; do { if (wdata->cfile->invalidHandle) { @@ -2242,7 +2247,7 @@ cifs_uncached_retry_writev(struct cifs_writedata *wdata) if (rc != 0) continue; } - rc = cifs_async_writev(wdata); + rc = server->ops->async_writev(wdata); } while (rc == -EAGAIN); return rc; @@ -2277,6 +2282,10 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); open_file = file->private_data; tcon = tlink_tcon(open_file->tlink); + + if (!tcon->ses->server->ops->async_writev) + return -ENOSYS; + offset = *poffset; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index e2dbd22cb136..50c3697af5aa 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -786,6 +786,7 @@ struct smb_version_operations smb1_operations = { .close = cifs_close_file, .flush = cifs_flush_file, .async_readv = cifs_async_readv, + .async_writev = cifs_async_writev, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 33319141252fd14b58cf13685156c23dcaac2527 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:29 -0700 Subject: CIFS: Add SMB2 support for cifs_iovec_write Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 4 ++ fs/cifs/cifsglob.h | 47 ++++++++++++++++++++ fs/cifs/cifsproto.h | 18 -------- fs/cifs/cifssmb.c | 26 ----------- fs/cifs/smb2ops.c | 1 + fs/cifs/smb2pdu.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 30 +++++++++++++ fs/cifs/smb2proto.h | 1 + 8 files changed, 206 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index db8a404a51dd..2829f374dbf7 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -89,6 +89,10 @@ extern mempool_t *cifs_mid_poolp; struct workqueue_struct *cifsiod_wq; +#ifdef CONFIG_HIGHMEM +DEFINE_MUTEX(cifs_kmap_mutex); +#endif + static int cifs_read_super(struct super_block *sb) { diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index aef167470654..330f6259bb6d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -582,6 +582,33 @@ get_next_mid(struct TCP_Server_Info *server) #define CIFS_KMAP_SIZE_LIMIT (1<<24) #endif /* CONFIG_HIGHMEM */ +#ifdef CONFIG_HIGHMEM +/* + * On arches that have high memory, kmap address space is limited. By + * serializing the kmap operations on those arches, we ensure that we don't + * end up with a bunch of threads in writeback with partially mapped page + * arrays, stuck waiting for kmap to come back. That situation prevents + * progress and can deadlock. + */ + +extern struct mutex cifs_kmap_mutex; + +static inline void +cifs_kmap_lock(void) +{ + mutex_lock(&cifs_kmap_mutex); +} + +static inline void +cifs_kmap_unlock(void) +{ + mutex_unlock(&cifs_kmap_mutex); +} +#else /* !CONFIG_HIGHMEM */ +#define cifs_kmap_lock() do { ; } while (0) +#define cifs_kmap_unlock() do { ; } while (0) +#endif /* CONFIG_HIGHMEM */ + /* * Macros to allow the TCP_Server_Info->net field and related code to drop out * when CONFIG_NET_NS isn't set. @@ -891,6 +918,26 @@ struct cifs_readdata { struct kvec iov[1]; }; +struct cifs_writedata; + +/* asynchronous write support */ +struct cifs_writedata { + struct kref refcount; + struct list_head list; + struct completion done; + enum writeback_sync_modes sync_mode; + struct work_struct work; + struct cifsFileInfo *cfile; + __u64 offset; + pid_t pid; + unsigned int bytes; + int result; + void (*marshal_iov) (struct kvec *iov, + struct cifs_writedata *wdata); + unsigned int nr_pages; + struct page *pages[1]; +}; + /* * Take a reference on the file private data. Must be called with * cifs_file_list_lock held. diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 6656eb5dbf70..8b320c7b582c 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -468,24 +468,6 @@ void cifs_readdata_release(struct kref *refcount); int cifs_async_readv(struct cifs_readdata *rdata); int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid); -/* asynchronous write support */ -struct cifs_writedata { - struct kref refcount; - struct list_head list; - struct completion done; - enum writeback_sync_modes sync_mode; - struct work_struct work; - struct cifsFileInfo *cfile; - __u64 offset; - pid_t pid; - unsigned int bytes; - int result; - void (*marshal_iov) (struct kvec *iov, - struct cifs_writedata *wdata); - unsigned int nr_pages; - struct page *pages[1]; -}; - int cifs_async_writev(struct cifs_writedata *wdata); void cifs_writev_complete(struct work_struct *work); struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 2a9b27387708..f27b13ea08b8 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -86,32 +86,6 @@ static struct { #endif /* CONFIG_CIFS_WEAK_PW_HASH */ #endif /* CIFS_POSIX */ -#ifdef CONFIG_HIGHMEM -/* - * On arches that have high memory, kmap address space is limited. By - * serializing the kmap operations on those arches, we ensure that we don't - * end up with a bunch of threads in writeback with partially mapped page - * arrays, stuck waiting for kmap to come back. That situation prevents - * progress and can deadlock. - */ -static DEFINE_MUTEX(cifs_kmap_mutex); - -static inline void -cifs_kmap_lock(void) -{ - mutex_lock(&cifs_kmap_mutex); -} - -static inline void -cifs_kmap_unlock(void) -{ - mutex_unlock(&cifs_kmap_mutex); -} -#else /* !CONFIG_HIGHMEM */ -#define cifs_kmap_lock() do { ; } while(0) -#define cifs_kmap_unlock() do { ; } while(0) -#endif /* CONFIG_HIGHMEM */ - /* * Mark as invalid, all open files on tree connections since they * were closed when session to server was lost. diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index d9ca357d9809..da31235023fb 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -434,6 +434,7 @@ struct smb_version_operations smb21_operations = { .close = smb2_close_file, .flush = smb2_flush_file, .async_readv = smb2_async_readv, + .async_writev = smb2_async_writev, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index e18671852d41..cb6acc7b1ca8 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "smb2pdu.h" #include "cifsglob.h" @@ -1327,3 +1328,125 @@ smb2_async_readv(struct cifs_readdata *rdata) cifs_small_buf_release(buf); return rc; } + +/* + * Check the mid_state and signature on received buffer (if any), and queue the + * workqueue completion task. + */ +static void +smb2_writev_callback(struct mid_q_entry *mid) +{ + struct cifs_writedata *wdata = mid->callback_data; + struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); + unsigned int written; + struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf; + unsigned int credits_received = 1; + + switch (mid->mid_state) { + case MID_RESPONSE_RECEIVED: + credits_received = le16_to_cpu(rsp->hdr.CreditRequest); + wdata->result = smb2_check_receive(mid, tcon->ses->server, 0); + if (wdata->result != 0) + break; + + written = le32_to_cpu(rsp->DataLength); + /* + * Mask off high 16 bits when bytes written as returned + * by the server is greater than bytes requested by the + * client. OS/2 servers are known to set incorrect + * CountHigh values. + */ + if (written > wdata->bytes) + written &= 0xFFFF; + + if (written < wdata->bytes) + wdata->result = -ENOSPC; + else + wdata->bytes = written; + break; + case MID_REQUEST_SUBMITTED: + case MID_RETRY_NEEDED: + wdata->result = -EAGAIN; + break; + default: + wdata->result = -EIO; + break; + } + + if (wdata->result) + cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); + + queue_work(cifsiod_wq, &wdata->work); + DeleteMidQEntry(mid); + add_credits(tcon->ses->server, credits_received, 0); +} + +/* smb2_async_writev - send an async write, and set up mid to handle result */ +int +smb2_async_writev(struct cifs_writedata *wdata) +{ + int i, rc = -EACCES; + struct smb2_write_req *req = NULL; + struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); + struct kvec *iov = NULL; + + rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req); + if (rc) + goto async_writev_out; + + /* 1 iov per page + 1 for header */ + iov = kzalloc((wdata->nr_pages + 1) * sizeof(*iov), GFP_NOFS); + if (iov == NULL) { + rc = -ENOMEM; + goto async_writev_out; + } + + req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid); + + req->PersistentFileId = wdata->cfile->fid.persistent_fid; + req->VolatileFileId = wdata->cfile->fid.volatile_fid; + req->WriteChannelInfoOffset = 0; + req->WriteChannelInfoLength = 0; + req->Channel = 0; + req->Offset = cpu_to_le64(wdata->offset); + /* 4 for rfc1002 length field */ + req->DataOffset = cpu_to_le16( + offsetof(struct smb2_write_req, Buffer) - 4); + req->RemainingBytes = 0; + + /* 4 for rfc1002 length field and 1 for Buffer */ + iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; + iov[0].iov_base = (char *)req; + + /* + * This function should marshal up the page array into the kvec + * array, reserving [0] for the header. It should kmap the pages + * and set the iov_len properly for each one. It may also set + * wdata->bytes too. + */ + cifs_kmap_lock(); + wdata->marshal_iov(iov, wdata); + cifs_kmap_unlock(); + + cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); + + req->Length = cpu_to_le32(wdata->bytes); + + inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */); + + kref_get(&wdata->refcount); + rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1, + NULL, smb2_writev_callback, wdata, 0); + + if (rc) + kref_put(&wdata->refcount, cifs_writedata_release); + + /* send is done, unmap pages */ + for (i = 0; i < wdata->nr_pages; i++) + kunmap(wdata->pages[i]); + +async_writev_out: + cifs_small_buf_release(req); + kfree(iov); + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 4abb58106809..21ec9ed280f9 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -496,6 +496,36 @@ struct smb2_read_rsp { __u8 Buffer[1]; } __packed; +/* For write request Flags field below the following flag is defined: */ +#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001 + +struct smb2_write_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 49 */ + __le16 DataOffset; /* offset from start of SMB2 header to write data */ + __le32 Length; + __le64 Offset; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + __le32 Channel; /* Reserved MBZ */ + __le32 RemainingBytes; + __le16 WriteChannelInfoOffset; /* Reserved MBZ */ + __le16 WriteChannelInfoLength; /* Reserved MBZ */ + __le32 Flags; + __u8 Buffer[1]; +} __packed; + +struct smb2_write_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 17 */ + __u8 DataOffset; + __u8 Reserved; + __le32 DataLength; + __le32 DataRemaining; + __u32 Reserved2; + __u8 Buffer[1]; +} __packed; + struct smb2_echo_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 4 */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index f442e4699974..ec983be0ba31 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -98,6 +98,7 @@ extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, __le64 *uniqueid); extern int smb2_async_readv(struct cifs_readdata *rdata); +extern int smb2_async_writev(struct cifs_writedata *wdata); extern int SMB2_echo(struct TCP_Server_Info *server); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From f9c6e234c3ca64b8d49336908df99948518d6261 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:29 -0700 Subject: CIFS: Move readpage code to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 5 +++++ fs/cifs/file.c | 28 +++++++++++++++++----------- fs/cifs/smb1ops.c | 10 ++++++++++ 3 files changed, 32 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 330f6259bb6d..5b1751d81901 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -174,6 +174,7 @@ struct smb_vol; struct cifs_fid; struct cifs_readdata; struct cifs_writedata; +struct cifs_io_parms; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, void *, @@ -286,6 +287,10 @@ struct smb_version_operations { int (*async_readv)(struct cifs_readdata *); /* async write to the server */ int (*async_writev)(struct cifs_writedata *); + /* sync read from the server */ + int (*sync_read)(const unsigned int, struct cifsFileInfo *, + struct cifs_io_parms *, unsigned int *, char **, + int *); }; struct smb_version_values { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 703c1648b068..fae03c52f314 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2782,8 +2782,8 @@ ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, return cifs_user_readv(iocb, iov, nr_segs, pos); } -static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, - loff_t *poffset) +static ssize_t +cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) { int rc = -EACCES; unsigned int bytes_read = 0; @@ -2792,8 +2792,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, unsigned int rsize; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; unsigned int xid; - char *current_offset; + char *cur_offset; struct cifsFileInfo *open_file; struct cifs_io_parms io_parms; int buf_type = CIFS_NO_BUFFER; @@ -2812,6 +2813,12 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, } open_file = file->private_data; tcon = tlink_tcon(open_file->tlink); + server = tcon->ses->server; + + if (!server->ops->sync_read) { + free_xid(xid); + return -ENOSYS; + } if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; @@ -2821,9 +2828,8 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, if ((file->f_flags & O_ACCMODE) == O_WRONLY) cFYI(1, "attempting read on write only file instance"); - for (total_read = 0, current_offset = read_data; - read_size > total_read; - total_read += bytes_read, current_offset += bytes_read) { + for (total_read = 0, cur_offset = read_data; read_size > total_read; + total_read += bytes_read, cur_offset += bytes_read) { current_read_size = min_t(uint, read_size - total_read, rsize); /* * For windows me and 9x we do not want to request more than it @@ -2841,13 +2847,13 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, if (rc != 0) break; } - io_parms.netfid = open_file->fid.netfid; io_parms.pid = pid; io_parms.tcon = tcon; - io_parms.offset = *poffset; + io_parms.offset = *offset; io_parms.length = current_read_size; - rc = CIFSSMBRead(xid, &io_parms, &bytes_read, - ¤t_offset, &buf_type); + rc = server->ops->sync_read(xid, open_file, &io_parms, + &bytes_read, &cur_offset, + &buf_type); } if (rc || (bytes_read == 0)) { if (total_read) { @@ -2858,7 +2864,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, } } else { cifs_stats_bytes_read(tcon, total_read); - *poffset += bytes_read; + *offset += bytes_read; } } free_xid(xid); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 50c3697af5aa..cea958ee8b7a 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -739,6 +739,15 @@ cifs_flush_file(const unsigned int xid, struct cifs_tcon *tcon, return CIFSSMBFlush(xid, tcon, fid->netfid); } +static int +cifs_sync_read(const unsigned int xid, struct cifsFileInfo *cfile, + struct cifs_io_parms *parms, unsigned int *bytes_read, + char **buf, int *buf_type) +{ + parms->netfid = cfile->fid.netfid; + return CIFSSMBRead(xid, parms, bytes_read, buf, buf_type); +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -787,6 +796,7 @@ struct smb_version_operations smb1_operations = { .flush = cifs_flush_file, .async_readv = cifs_async_readv, .async_writev = cifs_async_writev, + .sync_read = cifs_sync_read, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From d8e050398d23ef7c019c96200b80d73f4e5cec0c Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:30 -0700 Subject: CIFS: Add readpage support for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 12 ++++++++++++ fs/cifs/smb2pdu.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2proto.h | 2 ++ 3 files changed, 65 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index da31235023fb..fb289d2abae7 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -393,6 +393,17 @@ smb2_read_data_length(char *buf) return le32_to_cpu(rsp->DataLength); } + +static int +smb2_sync_read(const unsigned int xid, struct cifsFileInfo *cfile, + struct cifs_io_parms *parms, unsigned int *bytes_read, + char **buf, int *buf_type) +{ + parms->persistent_fid = cfile->fid.persistent_fid; + parms->volatile_fid = cfile->fid.volatile_fid; + return SMB2_read(xid, parms, bytes_read, buf, buf_type); +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -435,6 +446,7 @@ struct smb_version_operations smb21_operations = { .flush = smb2_flush_file, .async_readv = smb2_async_readv, .async_writev = smb2_async_writev, + .sync_read = smb2_sync_read, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index cb6acc7b1ca8..23c569386f32 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1329,6 +1329,57 @@ smb2_async_readv(struct cifs_readdata *rdata) return rc; } +int +SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, + unsigned int *nbytes, char **buf, int *buf_type) +{ + int resp_buftype, rc = -EACCES; + struct smb2_read_rsp *rsp = NULL; + struct kvec iov[1]; + + *nbytes = 0; + rc = smb2_new_read_req(iov, io_parms, 0, 0); + if (rc) + return rc; + + rc = SendReceive2(xid, io_parms->tcon->ses, iov, 1, + &resp_buftype, CIFS_LOG_ERROR); + + rsp = (struct smb2_read_rsp *)iov[0].iov_base; + + if (rsp->hdr.Status == STATUS_END_OF_FILE) { + free_rsp_buf(resp_buftype, iov[0].iov_base); + return 0; + } + + if (rc) { + cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); + cERROR(1, "Send error in read = %d", rc); + } else { + *nbytes = le32_to_cpu(rsp->DataLength); + if ((*nbytes > CIFS_MAX_MSGSIZE) || + (*nbytes > io_parms->length)) { + cFYI(1, "bad length %d for count %d", *nbytes, + io_parms->length); + rc = -EIO; + *nbytes = 0; + } + } + + if (*buf) { + memcpy(*buf, (char *)rsp->hdr.ProtocolId + rsp->DataOffset, + *nbytes); + free_rsp_buf(resp_buftype, iov[0].iov_base); + } else if (resp_buftype != CIFS_NO_BUFFER) { + *buf = iov[0].iov_base; + if (resp_buftype == CIFS_SMALL_BUFFER) + *buf_type = CIFS_SMALL_BUFFER; + else if (resp_buftype == CIFS_LARGE_BUFFER) + *buf_type = CIFS_LARGE_BUFFER; + } + return rc; +} + /* * Check the mid_state and signature on received buffer (if any), and queue the * workqueue completion task. diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index ec983be0ba31..97e62f3df410 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -98,6 +98,8 @@ extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, __le64 *uniqueid); extern int smb2_async_readv(struct cifs_readdata *rdata); +extern int SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, + unsigned int *nbytes, char **buf, int *buf_type); extern int smb2_async_writev(struct cifs_writedata *wdata); extern int SMB2_echo(struct TCP_Server_Info *server); -- cgit v1.2.3 From ba9ad7257ae50b8aa72a3f44da839830e065363c Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:30 -0700 Subject: CIFS: Move writepage to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 4 ++++ fs/cifs/cifsproto.h | 3 +-- fs/cifs/cifssmb.c | 6 ++---- fs/cifs/file.c | 36 ++++++++++++++++++++---------------- fs/cifs/smb1ops.c | 11 +++++++++++ 5 files changed, 38 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 5b1751d81901..48c7c83a7a99 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -291,6 +291,10 @@ struct smb_version_operations { int (*sync_read)(const unsigned int, struct cifsFileInfo *, struct cifs_io_parms *, unsigned int *, char **, int *); + /* sync write to the server */ + int (*sync_write)(const unsigned int, struct cifsFileInfo *, + struct cifs_io_parms *, unsigned int *, struct kvec *, + unsigned long); }; struct smb_version_values { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 8b320c7b582c..f20d0c8ee7ce 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -369,8 +369,7 @@ extern int CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, const char *buf, const char __user *ubuf, const int long_op); extern int CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, - unsigned int *nbytes, struct kvec *iov, const int nvec, - const int long_op); + unsigned int *nbytes, struct kvec *iov, const int nvec); extern int CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon, const char *search_name, __u64 *inode_number, const struct nls_table *nls_codepage, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index f27b13ea08b8..733119dad493 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2123,8 +2123,7 @@ async_writev_out: int CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, - unsigned int *nbytes, struct kvec *iov, int n_vec, - const int long_op) + unsigned int *nbytes, struct kvec *iov, int n_vec) { int rc = -EACCES; WRITE_REQ *pSMB = NULL; @@ -2195,8 +2194,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, iov[0].iov_len = smb_hdr_len + 8; - rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, - long_op); + rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); if (rc) { cFYI(1, "Send error Write2 = %d", rc); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index fae03c52f314..39fff77e38d4 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1477,15 +1477,16 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, cifsi->server_eof = end_of_write; } -static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, - const char *write_data, size_t write_size, - loff_t *poffset) +static ssize_t +cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, + size_t write_size, loff_t *offset) { int rc = 0; unsigned int bytes_written = 0; unsigned int total_written; struct cifs_sb_info *cifs_sb; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; unsigned int xid; struct dentry *dentry = open_file->dentry; struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode); @@ -1494,9 +1495,13 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, cifs_sb = CIFS_SB(dentry->d_sb); cFYI(1, "write %zd bytes to offset %lld of %s", write_size, - *poffset, dentry->d_name.name); + *offset, dentry->d_name.name); + + tcon = tlink_tcon(open_file->tlink); + server = tcon->ses->server; - pTcon = tlink_tcon(open_file->tlink); + if (!server->ops->sync_write) + return -ENOSYS; xid = get_xid(); @@ -1522,13 +1527,12 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, /* iov[0] is reserved for smb header */ iov[1].iov_base = (char *)write_data + total_written; iov[1].iov_len = len; - io_parms.netfid = open_file->fid.netfid; io_parms.pid = pid; - io_parms.tcon = pTcon; - io_parms.offset = *poffset; + io_parms.tcon = tcon; + io_parms.offset = *offset; io_parms.length = len; - rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov, - 1, 0); + rc = server->ops->sync_write(xid, open_file, &io_parms, + &bytes_written, iov, 1); } if (rc || (bytes_written == 0)) { if (total_written) @@ -1539,18 +1543,18 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, } } else { spin_lock(&dentry->d_inode->i_lock); - cifs_update_eof(cifsi, *poffset, bytes_written); + cifs_update_eof(cifsi, *offset, bytes_written); spin_unlock(&dentry->d_inode->i_lock); - *poffset += bytes_written; + *offset += bytes_written; } } - cifs_stats_bytes_written(pTcon, total_written); + cifs_stats_bytes_written(tcon, total_written); if (total_written > 0) { spin_lock(&dentry->d_inode->i_lock); - if (*poffset > dentry->d_inode->i_size) - i_size_write(dentry->d_inode, *poffset); + if (*offset > dentry->d_inode->i_size) + i_size_write(dentry->d_inode, *offset); spin_unlock(&dentry->d_inode->i_lock); } mark_inode_dirty_sync(dentry->d_inode); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index cea958ee8b7a..aa55c2fbf793 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -748,6 +748,16 @@ cifs_sync_read(const unsigned int xid, struct cifsFileInfo *cfile, return CIFSSMBRead(xid, parms, bytes_read, buf, buf_type); } +static int +cifs_sync_write(const unsigned int xid, struct cifsFileInfo *cfile, + struct cifs_io_parms *parms, unsigned int *written, + struct kvec *iov, unsigned long nr_segs) +{ + + parms->netfid = cfile->fid.netfid; + return CIFSSMBWrite2(xid, parms, written, iov, nr_segs); +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -797,6 +807,7 @@ struct smb_version_operations smb1_operations = { .async_readv = cifs_async_readv, .async_writev = cifs_async_writev, .sync_read = cifs_sync_read, + .sync_write = cifs_sync_write, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 009d344398bb3e844b31eb9e6a7860748c6f6dd3 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:30 -0700 Subject: CIFS: Add writepage support for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 12 +++++++++++ fs/cifs/smb2pdu.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2proto.h | 2 ++ 3 files changed, 75 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index fb289d2abae7..f9c3dbee9010 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -404,6 +404,17 @@ smb2_sync_read(const unsigned int xid, struct cifsFileInfo *cfile, return SMB2_read(xid, parms, bytes_read, buf, buf_type); } +static int +smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile, + struct cifs_io_parms *parms, unsigned int *written, + struct kvec *iov, unsigned long nr_segs) +{ + + parms->persistent_fid = cfile->fid.persistent_fid; + parms->volatile_fid = cfile->fid.volatile_fid; + return SMB2_write(xid, parms, written, iov, nr_segs); +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -447,6 +458,7 @@ struct smb_version_operations smb21_operations = { .async_readv = smb2_async_readv, .async_writev = smb2_async_writev, .sync_read = smb2_sync_read, + .sync_write = smb2_sync_write, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 23c569386f32..00dc45a7881c 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1501,3 +1501,64 @@ async_writev_out: kfree(iov); return rc; } + +/* + * SMB2_write function gets iov pointer to kvec array with n_vec as a length. + * The length field from io_parms must be at least 1 and indicates a number of + * elements with data to write that begins with position 1 in iov array. All + * data length is specified by count. + */ +int +SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, + unsigned int *nbytes, struct kvec *iov, int n_vec) +{ + int rc = 0; + struct smb2_write_req *req = NULL; + struct smb2_write_rsp *rsp = NULL; + int resp_buftype; + *nbytes = 0; + + if (n_vec < 1) + return rc; + + rc = small_smb2_init(SMB2_WRITE, io_parms->tcon, (void **) &req); + if (rc) + return rc; + + if (io_parms->tcon->ses->server == NULL) + return -ECONNABORTED; + + req->hdr.ProcessId = cpu_to_le32(io_parms->pid); + + req->PersistentFileId = io_parms->persistent_fid; + req->VolatileFileId = io_parms->volatile_fid; + req->WriteChannelInfoOffset = 0; + req->WriteChannelInfoLength = 0; + req->Channel = 0; + req->Length = cpu_to_le32(io_parms->length); + req->Offset = cpu_to_le64(io_parms->offset); + /* 4 for rfc1002 length field */ + req->DataOffset = cpu_to_le16( + offsetof(struct smb2_write_req, Buffer) - 4); + req->RemainingBytes = 0; + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field and 1 for Buffer */ + iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; + + /* length of entire message including data to be written */ + inc_rfc1001_len(req, io_parms->length - 1 /* Buffer */); + + rc = SendReceive2(xid, io_parms->tcon->ses, iov, n_vec + 1, + &resp_buftype, 0); + + if (rc) { + cifs_stats_fail_inc(io_parms->tcon, SMB2_WRITE_HE); + cERROR(1, "Send error in write = %d", rc); + } else { + rsp = (struct smb2_write_rsp *)iov[0].iov_base; + *nbytes = le32_to_cpu(rsp->DataLength); + free_rsp_buf(resp_buftype, rsp); + } + return rc; +} diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 97e62f3df410..192d0c7d91eb 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -101,6 +101,8 @@ extern int smb2_async_readv(struct cifs_readdata *rdata); extern int SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, char **buf, int *buf_type); extern int smb2_async_writev(struct cifs_writedata *wdata); +extern int SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, + unsigned int *nbytes, struct kvec *iov, int n_vec); extern int SMB2_echo(struct TCP_Server_Info *server); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From 3c1bf7e48e9e463b65b1b90da4500a93dd2b27a7 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:30 -0700 Subject: CIFS: Enable signing in SMB2 Use hmac-sha256 and rather than hmac-md5 that is used for CIFS/SMB. Signature field in SMB2 header is 16 bytes instead of 8 bytes. Automatically enable signing by client when requested by the server when signing ability is available to the client. Signed-off-by: Shirish Pargaonkar Signed-off-by: Sachin Prabhu Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/Kconfig | 1 + fs/cifs/cifsencrypt.c | 30 ++++++++- fs/cifs/cifsglob.h | 2 + fs/cifs/cifsproto.h | 1 + fs/cifs/smb2glob.h | 4 ++ fs/cifs/smb2pdu.c | 52 +++++++++++++-- fs/cifs/smb2proto.h | 2 + fs/cifs/smb2transport.c | 165 +++++++++++++++++++++++++++++++++++++++++++++--- fs/cifs/transport.c | 24 +++---- 9 files changed, 253 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index a08306a8bec9..802930179c2b 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -9,6 +9,7 @@ config CIFS select CRYPTO_ARC4 select CRYPTO_ECB select CRYPTO_DES + select CRYPTO_SHA256 help This is the client VFS module for the Common Internet File System (CIFS) protocol which is the successor to the Server Message Block diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 6a0d741159f0..724738c1a560 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -686,12 +686,17 @@ calc_seckey(struct cifs_ses *ses) void cifs_crypto_shash_release(struct TCP_Server_Info *server) { + if (server->secmech.hmacsha256) + crypto_free_shash(server->secmech.hmacsha256); + if (server->secmech.md5) crypto_free_shash(server->secmech.md5); if (server->secmech.hmacmd5) crypto_free_shash(server->secmech.hmacmd5); + kfree(server->secmech.sdeschmacsha256); + kfree(server->secmech.sdeschmacmd5); kfree(server->secmech.sdescmd5); @@ -716,6 +721,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) goto crypto_allocate_md5_fail; } + server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0); + if (IS_ERR(server->secmech.hmacsha256)) { + cERROR(1, "could not allocate crypto hmacsha256\n"); + rc = PTR_ERR(server->secmech.hmacsha256); + goto crypto_allocate_hmacsha256_fail; + } + size = sizeof(struct shash_desc) + crypto_shash_descsize(server->secmech.hmacmd5); server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); @@ -727,7 +739,6 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5; server->secmech.sdeschmacmd5->shash.flags = 0x0; - size = sizeof(struct shash_desc) + crypto_shash_descsize(server->secmech.md5); server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL); @@ -739,12 +750,29 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) server->secmech.sdescmd5->shash.tfm = server->secmech.md5; server->secmech.sdescmd5->shash.flags = 0x0; + size = sizeof(struct shash_desc) + + crypto_shash_descsize(server->secmech.hmacsha256); + server->secmech.sdeschmacsha256 = kmalloc(size, GFP_KERNEL); + if (!server->secmech.sdeschmacsha256) { + cERROR(1, "%s: Can't alloc hmacsha256\n", __func__); + rc = -ENOMEM; + goto crypto_allocate_hmacsha256_sdesc_fail; + } + server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256; + server->secmech.sdeschmacsha256->shash.flags = 0x0; + return 0; +crypto_allocate_hmacsha256_sdesc_fail: + kfree(server->secmech.sdescmd5); + crypto_allocate_md5_sdesc_fail: kfree(server->secmech.sdeschmacmd5); crypto_allocate_hmacmd5_sdesc_fail: + crypto_free_shash(server->secmech.hmacsha256); + +crypto_allocate_hmacsha256_fail: crypto_free_shash(server->secmech.md5); crypto_allocate_md5_fail: diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 48c7c83a7a99..6217df707909 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -128,8 +128,10 @@ struct sdesc { struct cifs_secmech { struct crypto_shash *hmacmd5; /* hmac-md5 hash function */ struct crypto_shash *md5; /* md5 hash function */ + struct crypto_shash *hmacsha256; /* hmac-sha256 hash function */ struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */ struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */ + struct sdesc *sdeschmacsha256; /* ctxt to generate smb2 signature */ }; /* per smb session structure/fields */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f20d0c8ee7ce..541738fcaebc 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -65,6 +65,7 @@ extern char *cifs_compose_mount_options(const char *sb_mountdata, extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server); extern void DeleteMidQEntry(struct mid_q_entry *midEntry); +extern void cifs_delete_mid(struct mid_q_entry *mid); extern void cifs_wake_up_task(struct mid_q_entry *mid); extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, unsigned int nvec, mid_receive_t *receive, diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index 11505d73ff32..8635574ea8b6 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -47,4 +47,8 @@ #define END_OF_CHAIN 4 #define RELATED_REQUEST 8 +#define SMB2_SIGNATURE_SIZE (16) +#define SMB2_NTLMV2_SESSKEY_SIZE (16) +#define SMB2_HMACSHA256_SIZE (32) + #endif /* _SMB2_GLOB_H */ diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 00dc45a7881c..30c92c847fe8 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -118,9 +118,9 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ , /* BB how does SMB2 do case sensitive? */ /* if (tcon->nocase) hdr->Flags |= SMBFLG_CASELESS; */ - /* if (tcon->ses && tcon->ses->server && + if (tcon->ses && tcon->ses->server && (tcon->ses->server->sec_mode & SECMODE_SIGN_REQUIRED)) - hdr->Flags |= SMB2_FLAGS_SIGNED; */ + hdr->Flags |= SMB2_FLAGS_SIGNED; out: pdu->StructureSize2 = cpu_to_le16(parmsize); return; @@ -441,6 +441,38 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) rc = -EIO; goto neg_exit; } + + cFYI(1, "sec_flags 0x%x", sec_flags); + if (sec_flags & CIFSSEC_MUST_SIGN) { + cFYI(1, "Signing required"); + if (!(server->sec_mode & (SMB2_NEGOTIATE_SIGNING_REQUIRED | + SMB2_NEGOTIATE_SIGNING_ENABLED))) { + cERROR(1, "signing required but server lacks support"); + rc = -EOPNOTSUPP; + goto neg_exit; + } + server->sec_mode |= SECMODE_SIGN_REQUIRED; + } else if (sec_flags & CIFSSEC_MAY_SIGN) { + cFYI(1, "Signing optional"); + if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { + cFYI(1, "Server requires signing"); + server->sec_mode |= SECMODE_SIGN_REQUIRED; + } else { + server->sec_mode &= + ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); + } + } else { + cFYI(1, "Signing disabled"); + if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { + cERROR(1, "Server requires packet signing to be enabled" + " in /proc/fs/cifs/SecurityFlags."); + rc = -EOPNOTSUPP; + goto neg_exit; + } + server->sec_mode &= + ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); + } + #ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */ rc = decode_neg_token_init(security_blob, blob_length, &server->sec_type); @@ -669,6 +701,8 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) /* since no tcon, smb2_init can not do this, so do here */ req->hdr.SessionId = ses->Suid; + if (server->sec_mode & SECMODE_SIGN_REQUIRED) + req->hdr.Flags |= SMB2_FLAGS_SIGNED; rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0); /* @@ -1268,10 +1302,16 @@ smb2_readv_callback(struct mid_q_entry *mid) case MID_RESPONSE_RECEIVED: credits_received = le16_to_cpu(buf->CreditRequest); /* result already set, check signature */ - /* if (server->sec_mode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - if (smb2_verify_signature(mid->resp_buf, server)) - cERROR(1, "Unexpected SMB signature"); */ + if (server->sec_mode & + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { + int rc; + + rc = smb2_verify_signature2(rdata->iov, rdata->nr_iov, + server); + if (rc) + cERROR(1, "SMB signature verification returned " + "error = %d", rc); + } /* FIXME: should this be counted toward the initiating task? */ task_io_account_read(rdata->bytes); cifs_stats_bytes_read(tcon, rdata->bytes); diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 192d0c7d91eb..dbbdc39fa209 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -39,6 +39,8 @@ extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr); extern __le16 *cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb); +extern int smb2_verify_signature2(struct kvec *, unsigned int, + struct TCP_Server_Info *); extern int smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 31f5d420b3ea..66479f252ae5 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -36,6 +36,149 @@ #include "smb2proto.h" #include "cifs_debug.h" #include "smb2status.h" +#include "smb2glob.h" + +static int +smb2_calc_signature2(const struct kvec *iov, int n_vec, + struct TCP_Server_Info *server) +{ + int i, rc; + unsigned char smb2_signature[SMB2_HMACSHA256_SIZE]; + unsigned char *sigptr = smb2_signature; + struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; + + memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); + memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); + + rc = crypto_shash_setkey(server->secmech.hmacsha256, + server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); + if (rc) { + cERROR(1, "%s: Could not update with response\n", __func__); + return rc; + } + + rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); + if (rc) { + cERROR(1, "%s: Could not init md5\n", __func__); + return rc; + } + + for (i = 0; i < n_vec; i++) { + if (iov[i].iov_len == 0) + continue; + if (iov[i].iov_base == NULL) { + cERROR(1, "null iovec entry"); + return -EIO; + } + /* + * The first entry includes a length field (which does not get + * signed that occupies the first 4 bytes before the header). + */ + if (i == 0) { + if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ + break; /* nothing to sign or corrupt header */ + rc = + crypto_shash_update( + &server->secmech.sdeschmacsha256->shash, + iov[i].iov_base + 4, iov[i].iov_len - 4); + } else { + rc = + crypto_shash_update( + &server->secmech.sdeschmacsha256->shash, + iov[i].iov_base, iov[i].iov_len); + } + if (rc) { + cERROR(1, "%s: Could not update with payload\n", + __func__); + return rc; + } + } + + rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash, + sigptr); + if (rc) + cERROR(1, "%s: Could not generate sha256 hash\n", __func__); + + memcpy(smb2_pdu->Signature, sigptr, SMB2_SIGNATURE_SIZE); + + return rc; +} + +/* must be called with server->srv_mutex held */ +static int +smb2_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server) +{ + int rc = 0; + struct smb2_hdr *smb2_pdu = iov[0].iov_base; + + if (!(smb2_pdu->Flags & SMB2_FLAGS_SIGNED) || + server->tcpStatus == CifsNeedNegotiate) + return rc; + + if (!server->session_estab) { + strncpy(smb2_pdu->Signature, "BSRSPYL", 8); + return rc; + } + + rc = smb2_calc_signature2(iov, n_vec, server); + + return rc; +} + +int +smb2_verify_signature2(struct kvec *iov, unsigned int n_vec, + struct TCP_Server_Info *server) +{ + unsigned int rc; + char server_response_sig[16]; + struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; + + if ((smb2_pdu->Command == SMB2_NEGOTIATE) || + (smb2_pdu->Command == SMB2_OPLOCK_BREAK) || + (!server->session_estab)) + return 0; + + /* + * BB what if signatures are supposed to be on for session but + * server does not send one? BB + */ + + /* Do not need to verify session setups with signature "BSRSPYL " */ + if (memcmp(smb2_pdu->Signature, "BSRSPYL ", 8) == 0) + cFYI(1, "dummy signature received for smb command 0x%x", + smb2_pdu->Command); + + /* + * Save off the origiginal signature so we can modify the smb and check + * our calculated signature against what the server sent. + */ + memcpy(server_response_sig, smb2_pdu->Signature, SMB2_SIGNATURE_SIZE); + + memset(smb2_pdu->Signature, 0, SMB2_SIGNATURE_SIZE); + + mutex_lock(&server->srv_mutex); + rc = smb2_calc_signature2(iov, n_vec, server); + mutex_unlock(&server->srv_mutex); + + if (rc) + return rc; + + if (memcmp(server_response_sig, smb2_pdu->Signature, + SMB2_SIGNATURE_SIZE)) + return -EACCES; + else + return 0; +} + +static int +smb2_verify_signature(struct smb2_hdr *smb2_pdu, struct TCP_Server_Info *server) +{ + struct kvec iov; + + iov.iov_base = (char *)smb2_pdu; + iov.iov_len = get_rfc1002_length(smb2_pdu) + 4; + return smb2_verify_signature2(&iov, 1, server); +} /* * Set message id for the request. Should be called after wait_for_free_request @@ -118,12 +261,15 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, dump_smb(mid->resp_buf, min_t(u32, 80, len)); /* convert the length into a more usable form */ - /* BB - uncomment with SMB2 signing implementation */ - /* if ((len > 24) && + if ((len > 24) && (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) { - if (smb2_verify_signature(mid->resp_buf, server)) - cERROR(1, "Unexpected SMB signature"); - } */ + int rc; + + rc = smb2_verify_signature(mid->resp_buf, server); + if (rc) + cERROR(1, "SMB signature verification returned error = " + "%d", rc); + } return map_smb2_to_linux_error(mid->resp_buf, log_error); } @@ -141,9 +287,9 @@ smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, rc = smb2_get_mid_entry(ses, hdr, &mid); if (rc) return rc; - /* rc = smb2_sign_smb2(iov, nvec, ses->server); + rc = smb2_sign_smb2(iov, nvec, ses->server); if (rc) - delete_mid(mid); */ + cifs_delete_mid(mid); *ret_mid = mid; return rc; } @@ -162,11 +308,12 @@ smb2_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, if (mid == NULL) return -ENOMEM; - /* rc = smb2_sign_smb2(iov, nvec, server); + rc = smb2_sign_smb2(iov, nvec, server); if (rc) { DeleteMidQEntry(mid); return rc; - }*/ + } + *ret_mid = mid; return rc; } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index d9b639b95fa8..c4d7825dfc0a 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -109,8 +109,8 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) mempool_free(midEntry, cifs_mid_poolp); } -static void -delete_mid(struct mid_q_entry *mid) +void +cifs_delete_mid(struct mid_q_entry *mid) { spin_lock(&GlobalMid_Lock); list_del(&mid->qhead); @@ -419,7 +419,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, if (rc == 0) return 0; - delete_mid(mid); + cifs_delete_mid(mid); add_credits(server, 1, optype); wake_up(&server->request_q); return rc; @@ -532,7 +532,7 @@ cifs_setup_request(struct cifs_ses *ses, struct kvec *iov, return rc; rc = cifs_sign_smbv(iov, nvec, ses->server, &mid->sequence_number); if (rc) - delete_mid(mid); + cifs_delete_mid(mid); *ret_mid = mid; return rc; } @@ -652,11 +652,11 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, rc = ses->server->ops->check_receive(midQ, ses->server, flags & CIFS_LOG_ERROR); - /* mark it so buf will not be freed by delete_mid */ + /* mark it so buf will not be freed by cifs_delete_mid */ if ((flags & CIFS_NO_RESP) == 0) midQ->resp_buf = NULL; out: - delete_mid(midQ); + cifs_delete_mid(midQ); add_credits(ses->server, credits, optype); return rc; @@ -762,7 +762,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); rc = cifs_check_receive(midQ, ses->server, 0); out: - delete_mid(midQ); + cifs_delete_mid(midQ); add_credits(ses->server, 1, 0); return rc; @@ -846,7 +846,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); if (rc) { - delete_mid(midQ); + cifs_delete_mid(midQ); mutex_unlock(&ses->server->srv_mutex); return rc; } @@ -859,7 +859,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, mutex_unlock(&ses->server->srv_mutex); if (rc < 0) { - delete_mid(midQ); + cifs_delete_mid(midQ); return rc; } @@ -880,7 +880,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, blocking lock to return. */ rc = send_cancel(ses->server, in_buf, midQ); if (rc) { - delete_mid(midQ); + cifs_delete_mid(midQ); return rc; } } else { @@ -892,7 +892,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, /* If we get -ENOLCK back the lock may have already been removed. Don't exit in this case. */ if (rc && rc != -ENOLCK) { - delete_mid(midQ); + cifs_delete_mid(midQ); return rc; } } @@ -929,7 +929,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); rc = cifs_check_receive(midQ, ses->server, 0); out: - delete_mid(midQ); + cifs_delete_mid(midQ); if (rstart && rc == -EACCES) return -ERESTARTSYS; return rc; -- cgit v1.2.3 From 8ceb984379462f94bdebef3288d569c6e1f912ea Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:30 -0700 Subject: CIFS: Move rename to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 ++ fs/cifs/cifsproto.h | 5 ++- fs/cifs/cifssmb.c | 22 +++++++------ fs/cifs/inode.c | 92 ++++++++++++++++++++++++++++------------------------- fs/cifs/smb1ops.c | 1 + 5 files changed, 66 insertions(+), 57 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6217df707909..a0105c547ffd 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -275,6 +275,9 @@ struct smb_version_operations { /* open, rename and delete file */ int (*rename_pending_delete)(const char *, struct dentry *, const unsigned int); + /* send rename request */ + int (*rename)(const unsigned int, struct cifs_tcon *, const char *, + const char *, struct cifs_sb_info *); /* open a file for non-posix mounts */ int (*open)(const unsigned int, struct cifs_tcon *, const char *, int, int, int, struct cifs_fid *, __u32 *, FILE_ALL_INFO *, diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 541738fcaebc..eecd233c6912 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -310,9 +310,8 @@ extern int CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon, extern int CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); extern int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, - const char *fromName, const char *toName, - const struct nls_table *nls_codepage, - int remap_special_chars); + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); extern int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *tcon, int netfid, const char *target_name, const struct nls_table *nls_codepage, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 733119dad493..b8cd335d6f11 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2531,8 +2531,8 @@ CIFSSMBFlush(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, - const char *fromName, const char *toName, - const struct nls_table *nls_codepage, int remap) + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb) { int rc = 0; RENAME_REQ *pSMB = NULL; @@ -2540,6 +2540,7 @@ CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned; int name_len, name_len2; __u16 count; + int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; cFYI(1, "In CIFSSMBRename"); renameRetry: @@ -2554,9 +2555,9 @@ renameRetry: ATTR_DIRECTORY); if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = - cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName, - PATH_MAX, nls_codepage, remap); + name_len = cifsConvertToUTF16((__le16 *) pSMB->OldFileName, + from_name, PATH_MAX, + cifs_sb->local_nls, remap); name_len++; /* trailing null */ name_len *= 2; pSMB->OldFileName[name_len] = 0x04; /* pad */ @@ -2564,17 +2565,18 @@ renameRetry: pSMB->OldFileName[name_len + 1] = 0x00; name_len2 = cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], - toName, PATH_MAX, nls_codepage, remap); + to_name, PATH_MAX, cifs_sb->local_nls, + remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fromName, PATH_MAX); + name_len = strnlen(from_name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->OldFileName, fromName, name_len); - name_len2 = strnlen(toName, PATH_MAX); + strncpy(pSMB->OldFileName, from_name, name_len); + name_len2 = strnlen(to_name, PATH_MAX); name_len2++; /* trailing null */ pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */ - strncpy(&pSMB->OldFileName[name_len + 1], toName, name_len2); + strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2); name_len2++; /* trailing null */ name_len2++; /* signature byte */ } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index c6f6b02cf3b5..2f3235f08c3f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1512,29 +1512,32 @@ rmdir_exit: } static int -cifs_do_rename(unsigned int xid, struct dentry *from_dentry, - const char *fromPath, struct dentry *to_dentry, - const char *toPath) +cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, + const char *from_path, struct dentry *to_dentry, + const char *to_path) { struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); struct tcon_link *tlink; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; __u16 srcfid; int oplock, rc; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); - pTcon = tlink_tcon(tlink); + tcon = tlink_tcon(tlink); + server = tcon->ses->server; + + if (!server->ops->rename) + return -ENOSYS; /* try path-based rename first */ - rc = CIFSSMBRename(xid, pTcon, fromPath, toPath, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + rc = server->ops->rename(xid, tcon, from_path, to_path, cifs_sb); /* - * don't bother with rename by filehandle unless file is busy and - * source Note that cross directory moves do not work with + * Don't bother with rename by filehandle unless file is busy and + * source. Note that cross directory moves do not work with * rename by filehandle to various Windows servers. */ if (rc == 0 || rc != -ETXTBSY) @@ -1545,29 +1548,28 @@ cifs_do_rename(unsigned int xid, struct dentry *from_dentry, goto do_rename_exit; /* open the file to be renamed -- we need DELETE perms */ - rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, + rc = CIFSSMBOpen(xid, tcon, from_path, FILE_OPEN, DELETE, CREATE_NOT_DIR, &srcfid, &oplock, NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == 0) { - rc = CIFSSMBRenameOpenFile(xid, pTcon, srcfid, + rc = CIFSSMBRenameOpenFile(xid, tcon, srcfid, (const char *) to_dentry->d_name.name, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - - CIFSSMBClose(xid, pTcon, srcfid); + CIFSSMBClose(xid, tcon, srcfid); } do_rename_exit: cifs_put_tlink(tlink); return rc; } -int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, - struct inode *target_dir, struct dentry *target_dentry) +int +cifs_rename(struct inode *source_dir, struct dentry *source_dentry, + struct inode *target_dir, struct dentry *target_dentry) { - char *fromName = NULL; - char *toName = NULL; + char *from_name = NULL; + char *to_name = NULL; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *tcon; @@ -1588,25 +1590,25 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, * we already have the rename sem so we do not need to * grab it again here to protect the path integrity */ - fromName = build_path_from_dentry(source_dentry); - if (fromName == NULL) { + from_name = build_path_from_dentry(source_dentry); + if (from_name == NULL) { rc = -ENOMEM; goto cifs_rename_exit; } - toName = build_path_from_dentry(target_dentry); - if (toName == NULL) { + to_name = build_path_from_dentry(target_dentry); + if (to_name == NULL) { rc = -ENOMEM; goto cifs_rename_exit; } - rc = cifs_do_rename(xid, source_dentry, fromName, - target_dentry, toName); + rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, + to_name); if (rc == -EEXIST && tcon->unix_ext) { /* - * Are src and dst hardlinks of same inode? We can - * only tell with unix extensions enabled + * Are src and dst hardlinks of same inode? We can only tell + * with unix extensions enabled. */ info_buf_source = kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), @@ -1617,19 +1619,19 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, } info_buf_target = info_buf_source + 1; - tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName, - info_buf_source, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + tmprc = CIFSSMBUnixQPathInfo(xid, tcon, from_name, + info_buf_source, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); if (tmprc != 0) goto unlink_target; - tmprc = CIFSSMBUnixQPathInfo(xid, tcon, toName, - info_buf_target, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + tmprc = CIFSSMBUnixQPathInfo(xid, tcon, to_name, + info_buf_target, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); if (tmprc == 0 && (info_buf_source->UniqueId == info_buf_target->UniqueId)) { @@ -1637,8 +1639,11 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, rc = 0; goto cifs_rename_exit; } - } /* else ... BB we could add the same check for Windows by - checking the UniqueId via FILE_INTERNAL_INFO */ + } + /* + * else ... BB we could add the same check for Windows by + * checking the UniqueId via FILE_INTERNAL_INFO + */ unlink_target: /* Try unlinking the target dentry if it's not negative */ @@ -1646,15 +1651,14 @@ unlink_target: tmprc = cifs_unlink(target_dir, target_dentry); if (tmprc) goto cifs_rename_exit; - - rc = cifs_do_rename(xid, source_dentry, fromName, - target_dentry, toName); + rc = cifs_do_rename(xid, source_dentry, from_name, + target_dentry, to_name); } cifs_rename_exit: kfree(info_buf_source); - kfree(fromName); - kfree(toName); + kfree(from_name); + kfree(to_name); free_xid(xid); cifs_put_tlink(tlink); return rc; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index aa55c2fbf793..377392003655 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -800,6 +800,7 @@ struct smb_version_operations smb1_operations = { .rmdir = CIFSSMBRmDir, .unlink = CIFSSMBDelFile, .rename_pending_delete = cifs_rename_pending_delete, + .rename = CIFSSMBRename, .open = cifs_open_file, .set_fid = cifs_set_fid, .close = cifs_close_file, -- cgit v1.2.3 From 35143eb5c2e3ae6c91b29144449d23f05f573796 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:31 -0700 Subject: CIFS: Add SMB2 support for rename operation Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2inode.c | 25 ++++++++++++ fs/cifs/smb2ops.c | 1 + fs/cifs/smb2pdu.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 28 ++++++++++++++ fs/cifs/smb2proto.h | 6 +++ 5 files changed, 167 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index ee3a1ef686dd..a6952bafe331 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -74,6 +74,10 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, * SMB2_open() call. */ break; + case SMB2_OP_RENAME: + tmprc = SMB2_rename(xid, tcon, persistent_fid, volatile_fid, + (__le16 *)data); + break; default: cERROR(1, "Invalid command"); break; @@ -170,3 +174,24 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, 0, CREATE_DELETE_ON_CLOSE, NULL, SMB2_OP_DELETE); } + +int +smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb) +{ + __le16 *smb2_to_name = NULL; + int rc; + + smb2_to_name = cifs_convert_path_to_utf16(to_name, cifs_sb); + if (smb2_to_name == NULL) { + rc = -ENOMEM; + goto smb2_rename_path; + } + + rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, DELETE, + FILE_OPEN, 0, 0, smb2_to_name, SMB2_OP_RENAME); +smb2_rename_path: + kfree(smb2_to_name); + return rc; +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f9c3dbee9010..5aaccb0f3aae 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -451,6 +451,7 @@ struct smb_version_operations smb21_operations = { .mkdir_setinfo = smb2_mkdir_setinfo, .rmdir = smb2_rmdir, .unlink = smb2_unlink, + .rename = smb2_rename_path, .open = smb2_open_file, .set_fid = smb2_set_fid, .close = smb2_close_file, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 30c92c847fe8..1dc11ce7934e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1602,3 +1602,110 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, } return rc; } + +static int +send_set_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, int info_class, + unsigned int num, void **data, unsigned int *size) +{ + struct smb2_set_info_req *req; + struct smb2_set_info_rsp *rsp = NULL; + struct kvec *iov; + int rc = 0; + int resp_buftype; + unsigned int i; + struct TCP_Server_Info *server; + struct cifs_ses *ses = tcon->ses; + + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + if (!num) + return -EINVAL; + + iov = kmalloc(sizeof(struct kvec) * num, GFP_KERNEL); + if (!iov) + return -ENOMEM; + + rc = small_smb2_init(SMB2_SET_INFO, tcon, (void **) &req); + if (rc) { + kfree(iov); + return rc; + } + + req->InfoType = SMB2_O_INFO_FILE; + req->FileInfoClass = info_class; + req->PersistentFileId = persistent_fid; + req->VolatileFileId = volatile_fid; + + /* 4 for RFC1001 length and 1 for Buffer */ + req->BufferOffset = + cpu_to_le16(sizeof(struct smb2_set_info_req) - 1 - 4); + req->BufferLength = cpu_to_le32(*size); + + inc_rfc1001_len(req, *size - 1 /* Buffer */); + + memcpy(req->Buffer, *data, *size); + + iov[0].iov_base = (char *)req; + /* 4 for RFC1001 length */ + iov[0].iov_len = get_rfc1002_length(req) + 4; + + for (i = 1; i < num; i++) { + inc_rfc1001_len(req, size[i]); + le32_add_cpu(&req->BufferLength, size[i]); + iov[i].iov_base = (char *)data[i]; + iov[i].iov_len = size[i]; + } + + rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0); + rsp = (struct smb2_set_info_rsp *)iov[0].iov_base; + + if (rc != 0) { + cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE); + goto out; + } + + if (rsp == NULL) { + rc = -EIO; + goto out; + } + +out: + free_rsp_buf(resp_buftype, rsp); + kfree(iov); + return rc; +} + +int +SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, __le16 *target_file) +{ + struct smb2_file_rename_info info; + void **data; + unsigned int size[2]; + int rc; + int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX)); + + data = kmalloc(sizeof(void *) * 2, GFP_KERNEL); + if (!data) + return -ENOMEM; + + info.ReplaceIfExists = 1; /* 1 = replace existing target with new */ + /* 0 = fail if target already exists */ + info.RootDirectory = 0; /* MBZ for network ops (why does spec say?) */ + info.FileNameLength = cpu_to_le32(len); + + data[0] = &info; + size[0] = sizeof(struct smb2_file_rename_info); + + data[1] = target_file; + size[1] = len + 2 /* null */; + + rc = send_set_info(xid, tcon, persistent_fid, volatile_fid, + FILE_RENAME_INFORMATION, 2, data, size); + kfree(data); + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 21ec9ed280f9..b03ca37d0d58 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -568,6 +568,25 @@ struct smb2_query_info_rsp { __u8 Buffer[1]; } __packed; +struct smb2_set_info_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 33 */ + __u8 InfoType; + __u8 FileInfoClass; + __le32 BufferLength; + __le16 BufferOffset; + __u16 Reserved; + __le32 AdditionalInformation; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + __u8 Buffer[1]; +} __packed; + +struct smb2_set_info_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 2 */ +} __packed; + /* * PDU infolevel structure definitions * BB consider moving to a different header @@ -625,6 +644,15 @@ struct smb2_file_internal_info { __le64 IndexNumber; } __packed; /* level 6 Query */ +struct smb2_file_rename_info { /* encoding of request for level 10 */ + __u8 ReplaceIfExists; /* 1 = replace existing target with new */ + /* 0 = fail if target already exists */ + __u8 Reserved[7]; + __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ + __le32 FileNameLength; + char FileName[0]; /* New name to be assigned */ +} __packed; /* level 10 Set */ + /* * This level 18, although with struct with same name is different from cifs * level 0x107. Level 0x107 has an extra u64 between AccessFlags and diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index dbbdc39fa209..b43036e8ad2a 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -65,6 +65,9 @@ extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); extern int smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); +extern int smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *full_path, int disposition, @@ -106,5 +109,8 @@ extern int smb2_async_writev(struct cifs_writedata *wdata); extern int SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, struct kvec *iov, int n_vec); extern int SMB2_echo(struct TCP_Server_Info *server); +extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, + __le16 *target_file); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From d6e906f1b571d15ff5778a049802f6ef6f70159a Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 18 Sep 2012 16:20:31 -0700 Subject: CIFS: Move hardlink to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 4 +++ fs/cifs/cifsproto.h | 8 +++--- fs/cifs/cifssmb.c | 20 ++++++++------- fs/cifs/link.c | 74 +++++++++++++++++++++++++++++++---------------------- fs/cifs/smb1ops.c | 1 + 5 files changed, 63 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a0105c547ffd..8595d498ad80 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -278,6 +278,10 @@ struct smb_version_operations { /* send rename request */ int (*rename)(const unsigned int, struct cifs_tcon *, const char *, const char *, struct cifs_sb_info *); + /* send create hardlink request */ + int (*create_hardlink)(const unsigned int, struct cifs_tcon *, + const char *, const char *, + struct cifs_sb_info *); /* open a file for non-posix mounts */ int (*open)(const unsigned int, struct cifs_tcon *, const char *, int, int, int, struct cifs_fid *, __u32 *, FILE_ALL_INFO *, diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index eecd233c6912..3c50555c7735 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -316,11 +316,9 @@ extern int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *tcon, int netfid, const char *target_name, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSCreateHardLink(const unsigned int xid, - struct cifs_tcon *tcon, - const char *fromName, const char *toName, - const struct nls_table *nls_codepage, - int remap_special_chars); +extern int CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); extern int CIFSUnixCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index b8cd335d6f11..eb3d2cf76e6e 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2924,8 +2924,8 @@ createHardLinkRetry: int CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, - const char *fromName, const char *toName, - const struct nls_table *nls_codepage, int remap) + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb) { int rc = 0; NT_RENAME_REQ *pSMB = NULL; @@ -2933,6 +2933,7 @@ CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned; int name_len, name_len2; __u16 count; + int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; cFYI(1, "In CIFSCreateHardLink"); winCreateHardLinkRetry: @@ -2952,8 +2953,8 @@ winCreateHardLinkRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->OldFileName, from_name, + PATH_MAX, cifs_sb->local_nls, remap); name_len++; /* trailing null */ name_len *= 2; @@ -2962,17 +2963,18 @@ winCreateHardLinkRetry: pSMB->OldFileName[name_len + 1] = 0x00; /* pad */ name_len2 = cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], - toName, PATH_MAX, nls_codepage, remap); + to_name, PATH_MAX, cifs_sb->local_nls, + remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fromName, PATH_MAX); + name_len = strnlen(from_name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->OldFileName, fromName, name_len); - name_len2 = strnlen(toName, PATH_MAX); + strncpy(pSMB->OldFileName, from_name, name_len); + name_len2 = strnlen(to_name, PATH_MAX); name_len2++; /* trailing null */ pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */ - strncpy(&pSMB->OldFileName[name_len + 1], toName, name_len2); + strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2); name_len2++; /* trailing null */ name_len2++; /* signature byte */ } diff --git a/fs/cifs/link.c b/fs/cifs/link.c index e6ce3b112875..51dc2fb6e854 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -391,72 +391,86 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, { int rc = -EACCES; unsigned int xid; - char *fromName = NULL; - char *toName = NULL; + char *from_name = NULL; + char *to_name = NULL; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; struct cifsInodeInfo *cifsInode; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); - pTcon = tlink_tcon(tlink); + tcon = tlink_tcon(tlink); xid = get_xid(); - fromName = build_path_from_dentry(old_file); - toName = build_path_from_dentry(direntry); - if ((fromName == NULL) || (toName == NULL)) { + from_name = build_path_from_dentry(old_file); + to_name = build_path_from_dentry(direntry); + if ((from_name == NULL) || (to_name == NULL)) { rc = -ENOMEM; goto cifs_hl_exit; } - if (pTcon->unix_ext) - rc = CIFSUnixCreateHardLink(xid, pTcon, fromName, toName, + if (tcon->unix_ext) + rc = CIFSUnixCreateHardLink(xid, tcon, from_name, to_name, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); else { - rc = CIFSCreateHardLink(xid, pTcon, fromName, toName, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + server = tcon->ses->server; + if (!server->ops->create_hardlink) + return -ENOSYS; + rc = server->ops->create_hardlink(xid, tcon, from_name, to_name, + cifs_sb); if ((rc == -EIO) || (rc == -EINVAL)) rc = -EOPNOTSUPP; } d_drop(direntry); /* force new lookup from server of target */ - /* if source file is cached (oplocked) revalidate will not go to server - until the file is closed or oplock broken so update nlinks locally */ + /* + * if source file is cached (oplocked) revalidate will not go to server + * until the file is closed or oplock broken so update nlinks locally + */ if (old_file->d_inode) { cifsInode = CIFS_I(old_file->d_inode); if (rc == 0) { spin_lock(&old_file->d_inode->i_lock); inc_nlink(old_file->d_inode); spin_unlock(&old_file->d_inode->i_lock); -/* BB should we make this contingent on superblock flag NOATIME? */ -/* old_file->d_inode->i_ctime = CURRENT_TIME;*/ - /* parent dir timestamps will update from srv - within a second, would it really be worth it - to set the parent dir cifs inode time to zero - to force revalidate (faster) for it too? */ + /* + * BB should we make this contingent on superblock flag + * NOATIME? + */ + /* old_file->d_inode->i_ctime = CURRENT_TIME; */ + /* + * parent dir timestamps will update from srv within a + * second, would it really be worth it to set the parent + * dir cifs inode time to zero to force revalidate + * (faster) for it too? + */ } - /* if not oplocked will force revalidate to get info - on source file from srv */ + /* + * if not oplocked will force revalidate to get info on source + * file from srv + */ cifsInode->time = 0; - /* Will update parent dir timestamps from srv within a second. - Would it really be worth it to set the parent dir (cifs - inode) time field to zero to force revalidate on parent - directory faster ie - CIFS_I(inode)->time = 0; */ + /* + * Will update parent dir timestamps from srv within a second. + * Would it really be worth it to set the parent dir (cifs + * inode) time field to zero to force revalidate on parent + * directory faster ie + * + * CIFS_I(inode)->time = 0; + */ } cifs_hl_exit: - kfree(fromName); - kfree(toName); + kfree(from_name); + kfree(to_name); free_xid(xid); cifs_put_tlink(tlink); return rc; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 377392003655..e5d63444f0ff 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -801,6 +801,7 @@ struct smb_version_operations smb1_operations = { .unlink = CIFSSMBDelFile, .rename_pending_delete = cifs_rename_pending_delete, .rename = CIFSSMBRename, + .create_hardlink = CIFSCreateHardLink, .open = cifs_open_file, .set_fid = cifs_set_fid, .close = cifs_close_file, -- cgit v1.2.3 From 568798cc6211553e2494a6876fa19d064c822e79 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:31 -0700 Subject: CIFS: Add SMB2 support for hardlink operation Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2glob.h | 1 + fs/cifs/smb2inode.c | 34 ++++++++++++++++++++++++++++------ fs/cifs/smb2ops.c | 1 + fs/cifs/smb2pdu.c | 31 +++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 9 +++++++++ fs/cifs/smb2proto.h | 6 ++++++ 6 files changed, 76 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index 8635574ea8b6..21555d8744fd 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -40,6 +40,7 @@ #define SMB2_OP_MKDIR 5 #define SMB2_OP_RENAME 6 #define SMB2_OP_DELETE 7 +#define SMB2_OP_HARDLINK 8 /* Used when constructing chained read requests. */ #define CHAINED_REQUEST 1 diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index a6952bafe331..1921c9c87ccd 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -78,6 +78,10 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, tmprc = SMB2_rename(xid, tcon, persistent_fid, volatile_fid, (__le16 *)data); break; + case SMB2_OP_HARDLINK: + tmprc = SMB2_set_hardlink(xid, tcon, persistent_fid, + volatile_fid, (__le16 *)data); + break; default: cERROR(1, "Invalid command"); break; @@ -175,10 +179,10 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, SMB2_OP_DELETE); } -int -smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, - const char *from_name, const char *to_name, - struct cifs_sb_info *cifs_sb) +static int +smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb, __u32 access, int command) { __le16 *smb2_to_name = NULL; int rc; @@ -189,9 +193,27 @@ smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, goto smb2_rename_path; } - rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, DELETE, - FILE_OPEN, 0, 0, smb2_to_name, SMB2_OP_RENAME); + rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, access, + FILE_OPEN, 0, 0, smb2_to_name, command); smb2_rename_path: kfree(smb2_to_name); return rc; } + +int +smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb) +{ + return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, + DELETE, SMB2_OP_RENAME); +} + +int +smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb) +{ + return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, + FILE_READ_ATTRIBUTES, SMB2_OP_HARDLINK); +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 5aaccb0f3aae..75693e983e76 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -452,6 +452,7 @@ struct smb_version_operations smb21_operations = { .rmdir = smb2_rmdir, .unlink = smb2_unlink, .rename = smb2_rename_path, + .create_hardlink = smb2_create_hardlink, .open = smb2_open_file, .set_fid = smb2_set_fid, .close = smb2_close_file, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 1dc11ce7934e..a684c4ab42d6 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1709,3 +1709,34 @@ SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon, kfree(data); return rc; } + +int +SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, __le16 *target_file) +{ + struct smb2_file_link_info info; + void **data; + unsigned int size[2]; + int rc; + int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX)); + + data = kmalloc(sizeof(void *) * 2, GFP_KERNEL); + if (!data) + return -ENOMEM; + + info.ReplaceIfExists = 0; /* 1 = replace existing link with new */ + /* 0 = fail if link already exists */ + info.RootDirectory = 0; /* MBZ for network ops (why does spec say?) */ + info.FileNameLength = cpu_to_le32(len); + + data[0] = &info; + size[0] = sizeof(struct smb2_file_link_info); + + data[1] = target_file; + size[1] = len + 2 /* null */; + + rc = send_set_info(xid, tcon, persistent_fid, volatile_fid, + FILE_LINK_INFORMATION, 2, data, size); + kfree(data); + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index b03ca37d0d58..0f3c4828cd00 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -653,6 +653,15 @@ struct smb2_file_rename_info { /* encoding of request for level 10 */ char FileName[0]; /* New name to be assigned */ } __packed; /* level 10 Set */ +struct smb2_file_link_info { /* encoding of request for level 11 */ + __u8 ReplaceIfExists; /* 1 = replace existing link with new */ + /* 0 = fail if link already exists */ + __u8 Reserved[7]; + __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ + __le32 FileNameLength; + char FileName[0]; /* Name to be assigned to new link */ +} __packed; /* level 11 Set */ + /* * This level 18, although with struct with same name is different from cifs * level 0x107. Level 0x107 has an extra u64 between AccessFlags and diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index b43036e8ad2a..99f6945c8643 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -68,6 +68,9 @@ extern int smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, extern int smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb); +extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *full_path, int disposition, @@ -112,5 +115,8 @@ extern int SMB2_echo(struct TCP_Server_Info *server); extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, __le16 *target_file); +extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, + __le16 *target_file); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From d143341815bdc7c45d5289a3ab5743c838332518 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:31 -0700 Subject: CIFS: Move set_file_size to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 6 +++ fs/cifs/cifsproto.h | 10 ++--- fs/cifs/cifssmb.c | 40 ++++++++++---------- fs/cifs/inode.c | 106 +++++++++++++++++++++++++++------------------------- fs/cifs/smb1ops.c | 2 + 5 files changed, 89 insertions(+), 75 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 8595d498ad80..803c21218633 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -252,6 +252,12 @@ struct smb_version_operations { int (*get_srv_inum)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *, u64 *uniqueid, FILE_ALL_INFO *); + /* set size by path */ + int (*set_path_size)(const unsigned int, struct cifs_tcon *, + const char *, __u64, struct cifs_sb_info *, bool); + /* set size by file handle */ + int (*set_file_size)(const unsigned int, struct cifs_tcon *, + struct cifsFileInfo *, __u64, bool); /* build a full path to the root of the mount */ char * (*build_path_to_root)(struct smb_vol *, struct cifs_sb_info *, struct cifs_tcon *); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 3c50555c7735..3d99fe9afccc 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -270,13 +270,11 @@ extern int CIFSSMBSetAttrLegacy(unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nls_codepage); #endif /* possibly unneeded function */ extern int CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, - const char *fileName, __u64 size, - bool setAllocationSizeFlag, - const struct nls_table *nls_codepage, - int remap_special_chars); + const char *file_name, __u64 size, + struct cifs_sb_info *cifs_sb, bool set_allocation); extern int CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, - __u64 size, __u16 fileHandle, __u32 opener_pid, - bool AllocSizeFlag); + struct cifsFileInfo *cfile, __u64 size, + bool set_allocation); struct cifs_unix_set_info_args { __u64 ctime; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index eb3d2cf76e6e..c4f43cf671dc 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -5395,16 +5395,16 @@ QFSPosixRetry: } -/* We can not use write of zero bytes trick to - set file size due to need for large file support. Also note that - this SetPathInfo is preferred to SetFileInfo based method in next - routine which is only needed to work around a sharing violation bug - in Samba which this routine can run into */ - +/* + * We can not use write of zero bytes trick to set file size due to need for + * large file support. Also note that this SetPathInfo is preferred to + * SetFileInfo based method in next routine which is only needed to work around + * a sharing violation bugin Samba which this routine can run into. + */ int CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, - const char *fileName, __u64 size, bool SetAllocation, - const struct nls_table *nls_codepage, int remap) + const char *file_name, __u64 size, struct cifs_sb_info *cifs_sb, + bool set_allocation) { struct smb_com_transaction2_spi_req *pSMB = NULL; struct smb_com_transaction2_spi_rsp *pSMBr = NULL; @@ -5412,6 +5412,8 @@ CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, int name_len; int rc = 0; int bytes_returned = 0; + int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; + __u16 params, byte_count, data_count, param_offset, offset; cFYI(1, "In SetEOF"); @@ -5423,14 +5425,14 @@ SetEOFRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, file_name, + PATH_MAX, cifs_sb->local_nls, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fileName, PATH_MAX); + name_len = strnlen(file_name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->FileName, fileName, name_len); + strncpy(pSMB->FileName, file_name, name_len); } params = 6 + name_len; data_count = sizeof(struct file_end_of_file_info); @@ -5444,7 +5446,7 @@ SetEOFRetry: param_offset = offsetof(struct smb_com_transaction2_spi_req, InformationLevel) - 4; offset = param_offset + params; - if (SetAllocation) { + if (set_allocation) { if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2); @@ -5491,8 +5493,8 @@ SetEOFRetry: } int -CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size, - __u16 fid, __u32 pid_of_opener, bool SetAllocation) +CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, + struct cifsFileInfo *cfile, __u64 size, bool set_allocation) { struct smb_com_transaction2_sfi_req *pSMB = NULL; struct file_end_of_file_info *parm_data; @@ -5506,8 +5508,8 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size, if (rc) return rc; - pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); - pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); + pSMB->hdr.Pid = cpu_to_le16((__u16)cfile->pid); + pSMB->hdr.PidHigh = cpu_to_le16((__u16)(cfile->pid >> 16)); params = 6; pSMB->MaxSetupCount = 0; @@ -5536,8 +5538,8 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size, + offset); pSMB->DataOffset = cpu_to_le16(offset); parm_data->FileSize = cpu_to_le64(size); - pSMB->Fid = fid; - if (SetAllocation) { + pSMB->Fid = cfile->fid.netfid; + if (set_allocation) { if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 2f3235f08c3f..85e1b0a405a8 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1883,7 +1883,8 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, struct cifsInodeInfo *cifsInode = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = NULL; - struct cifs_tcon *pTcon = NULL; + struct cifs_tcon *tcon = NULL; + struct TCP_Server_Info *server; struct cifs_io_parms io_parms; /* @@ -1897,19 +1898,21 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, */ open_file = find_writable_file(cifsInode, true); if (open_file) { - __u16 nfid = open_file->fid.netfid; - __u32 npid = open_file->pid; - pTcon = tlink_tcon(open_file->tlink); - rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, - npid, false); + tcon = tlink_tcon(open_file->tlink); + server = tcon->ses->server; + if (server->ops->set_file_size) + rc = server->ops->set_file_size(xid, tcon, open_file, + attrs->ia_size, false); + else + rc = -ENOSYS; cifsFileInfo_put(open_file); cFYI(1, "SetFSize for attrs rc = %d", rc); if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { unsigned int bytes_written; - io_parms.netfid = nfid; - io_parms.pid = npid; - io_parms.tcon = pTcon; + io_parms.netfid = open_file->fid.netfid; + io_parms.pid = open_file->pid; + io_parms.tcon = tcon; io_parms.offset = 0; io_parms.length = attrs->ia_size; rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, @@ -1919,52 +1922,55 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, } else rc = -EINVAL; - if (rc != 0) { - if (pTcon == NULL) { - tlink = cifs_sb_tlink(cifs_sb); - if (IS_ERR(tlink)) - return PTR_ERR(tlink); - pTcon = tlink_tcon(tlink); - } + if (!rc) + goto set_size_out; - /* Set file size by pathname rather than by handle - either because no valid, writeable file handle for - it was found or because there was an error setting - it by handle */ - rc = CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, - false, cifs_sb->local_nls, + if (tcon == NULL) { + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + tcon = tlink_tcon(tlink); + server = tcon->ses->server; + } + + /* + * Set file size by pathname rather than by handle either because no + * valid, writeable file handle for it was found or because there was + * an error setting it by handle. + */ + if (server->ops->set_path_size) + rc = server->ops->set_path_size(xid, tcon, full_path, + attrs->ia_size, cifs_sb, false); + else + rc = -ENOSYS; + cFYI(1, "SetEOF by path (setattrs) rc = %d", rc); + if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { + __u16 netfid; + int oplock = 0; + + rc = SMBLegacyOpen(xid, tcon, full_path, FILE_OPEN, + GENERIC_WRITE, CREATE_NOT_DIR, &netfid, + &oplock, NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - cFYI(1, "SetEOF by path (setattrs) rc = %d", rc); - if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { - __u16 netfid; - int oplock = 0; - - rc = SMBLegacyOpen(xid, pTcon, full_path, - FILE_OPEN, GENERIC_WRITE, - CREATE_NOT_DIR, &netfid, &oplock, NULL, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == 0) { - unsigned int bytes_written; - - io_parms.netfid = netfid; - io_parms.pid = current->tgid; - io_parms.tcon = pTcon; - io_parms.offset = 0; - io_parms.length = attrs->ia_size; - rc = CIFSSMBWrite(xid, &io_parms, - &bytes_written, - NULL, NULL, 1); - cFYI(1, "wrt seteof rc %d", rc); - CIFSSMBClose(xid, pTcon, netfid); - } + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc == 0) { + unsigned int bytes_written; + + io_parms.netfid = netfid; + io_parms.pid = current->tgid; + io_parms.tcon = tcon; + io_parms.offset = 0; + io_parms.length = attrs->ia_size; + rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, NULL, + NULL, 1); + cFYI(1, "wrt seteof rc %d", rc); + CIFSSMBClose(xid, tcon, netfid); } - if (tlink) - cifs_put_tlink(tlink); } + if (tlink) + cifs_put_tlink(tlink); +set_size_out: if (rc == 0) { cifsInode->server_eof = attrs->ia_size; cifs_setsize(inode, attrs->ia_size); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index e5d63444f0ff..b73d2750296d 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -793,6 +793,8 @@ struct smb_version_operations smb1_operations = { .query_path_info = cifs_query_path_info, .query_file_info = cifs_query_file_info, .get_srv_inum = cifs_get_srv_inum, + .set_path_size = CIFSSMBSetEOF, + .set_file_size = CIFSSMBSetFileSize, .build_path_to_root = cifs_build_path_to_root, .echo = CIFSSMBEcho, .mkdir = CIFSSMBMkDir, -- cgit v1.2.3 From c839ff244ba2d54d0933596e29a4b03e3c846a9a Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:32 -0700 Subject: CIFS: Add SMB2 support for set_file_size Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2glob.h | 1 + fs/cifs/smb2inode.c | 15 +++++++++++++++ fs/cifs/smb2ops.c | 11 +++++++++++ fs/cifs/smb2pdu.c | 26 +++++++++++++++++++++++--- fs/cifs/smb2pdu.h | 4 ++++ fs/cifs/smb2proto.h | 6 ++++++ 6 files changed, 60 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index 21555d8744fd..05d429b1c37e 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -41,6 +41,7 @@ #define SMB2_OP_RENAME 6 #define SMB2_OP_DELETE 7 #define SMB2_OP_HARDLINK 8 +#define SMB2_OP_SET_EOF 9 /* Used when constructing chained read requests. */ #define CHAINED_REQUEST 1 diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 1921c9c87ccd..290583092293 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -82,6 +82,10 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, tmprc = SMB2_set_hardlink(xid, tcon, persistent_fid, volatile_fid, (__le16 *)data); break; + case SMB2_OP_SET_EOF: + tmprc = SMB2_set_eof(xid, tcon, persistent_fid, volatile_fid, + current->tgid, (__le64 *)data); + break; default: cERROR(1, "Invalid command"); break; @@ -217,3 +221,14 @@ smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, FILE_READ_ATTRIBUTES, SMB2_OP_HARDLINK); } + +int +smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, + const char *full_path, __u64 size, + struct cifs_sb_info *cifs_sb, bool set_alloc) +{ + __le64 eof = cpu_to_le64(size); + return smb2_open_op_close(xid, tcon, cifs_sb, full_path, + FILE_WRITE_DATA, FILE_OPEN, 0, 0, &eof, + SMB2_OP_SET_EOF); +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 75693e983e76..e0daf3a4dcc2 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -415,6 +415,15 @@ smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile, return SMB2_write(xid, parms, written, iov, nr_segs); } +static int +smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, + struct cifsFileInfo *cfile, __u64 size, bool set_alloc) +{ + __le64 eof = cpu_to_le64(size); + return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, cfile->pid, &eof); +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -446,6 +455,8 @@ struct smb_version_operations smb21_operations = { .query_path_info = smb2_query_path_info, .get_srv_inum = smb2_get_srv_inum, .query_file_info = smb2_query_file_info, + .set_path_size = smb2_set_path_size, + .set_file_size = smb2_set_file_size, .build_path_to_root = smb2_build_path_to_root, .mkdir = smb2_mkdir, .mkdir_setinfo = smb2_mkdir_setinfo, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a684c4ab42d6..74a8381400b1 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1605,7 +1605,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, static int send_set_info(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, int info_class, + u64 persistent_fid, u64 volatile_fid, u32 pid, int info_class, unsigned int num, void **data, unsigned int *size) { struct smb2_set_info_req *req; @@ -1635,6 +1635,8 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, return rc; } + req->hdr.ProcessId = cpu_to_le32(pid); + req->InfoType = SMB2_O_INFO_FILE; req->FileInfoClass = info_class; req->PersistentFileId = persistent_fid; @@ -1705,7 +1707,8 @@ SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon, size[1] = len + 2 /* null */; rc = send_set_info(xid, tcon, persistent_fid, volatile_fid, - FILE_RENAME_INFORMATION, 2, data, size); + current->tgid, FILE_RENAME_INFORMATION, 2, data, + size); kfree(data); return rc; } @@ -1736,7 +1739,24 @@ SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon, size[1] = len + 2 /* null */; rc = send_set_info(xid, tcon, persistent_fid, volatile_fid, - FILE_LINK_INFORMATION, 2, data, size); + current->tgid, FILE_LINK_INFORMATION, 2, data, size); kfree(data); return rc; } + +int +SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, + u64 volatile_fid, u32 pid, __le64 *eof) +{ + struct smb2_file_eof_info info; + void *data; + unsigned int size; + + info.EndOfFile = *eof; + + data = &info; + size = sizeof(struct smb2_file_eof_info); + + return send_set_info(xid, tcon, persistent_fid, volatile_fid, pid, + FILE_END_OF_FILE_INFORMATION, 1, &data, &size); +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 0f3c4828cd00..d775941f552a 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -690,4 +690,8 @@ struct smb2_file_all_info { /* data block encoding of response to level 18 */ char FileName[1]; } __packed; /* level 18 Query */ +struct smb2_file_eof_info { /* encoding of request for level 10 */ + __le64 EndOfFile; /* new end of file value */ +} __packed; /* level 20 Set */ + #endif /* _SMB2PDU_H */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 99f6945c8643..3d4866279530 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -56,6 +56,9 @@ extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, FILE_ALL_INFO *data, bool *adjust_tz); +extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, + const char *full_path, __u64 size, + struct cifs_sb_info *cifs_sb, bool set_alloc); extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, @@ -118,5 +121,8 @@ extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, __le16 *target_file); +extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, u32 pid, + __le64 *eof); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From 6bdf6dbd662176c0da5c3ac8ed10ac94e7776c85 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:32 -0700 Subject: CIFS: Move set_file_info to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 +++ fs/cifs/inode.c | 79 +++++------------------------------------------------- fs/cifs/smb1ops.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 73 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 803c21218633..dff35830601f 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -258,6 +258,9 @@ struct smb_version_operations { /* set size by file handle */ int (*set_file_size)(const unsigned int, struct cifs_tcon *, struct cifsFileInfo *, __u64, bool); + /* set attributes */ + int (*set_file_info)(struct inode *, const char *, FILE_BASIC_INFO *, + const unsigned int); /* build a full path to the root of the mount */ char * (*build_path_to_root)(struct smb_vol *, struct cifs_sb_info *, struct cifs_tcon *); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 85e1b0a405a8..e74e1bceb416 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -886,21 +886,18 @@ int cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, char *full_path, __u32 dosattr) { - int rc; - int oplock = 0; - __u16 netfid; - __u32 netpid; bool set_time = false; - struct cifsFileInfo *open_file; - struct cifsInodeInfo *cifsInode = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - struct tcon_link *tlink = NULL; - struct cifs_tcon *pTcon; + struct TCP_Server_Info *server; FILE_BASIC_INFO info_buf; if (attrs == NULL) return -EINVAL; + server = cifs_sb_master_tcon(cifs_sb)->ses->server; + if (!server->ops->set_file_info) + return -ENOSYS; + if (attrs->ia_valid & ATTR_ATIME) { set_time = true; info_buf.LastAccessTime = @@ -931,71 +928,7 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, info_buf.CreationTime = 0; /* don't change */ info_buf.Attributes = cpu_to_le32(dosattr); - /* - * If the file is already open for write, just use that fileid - */ - open_file = find_writable_file(cifsInode, true); - if (open_file) { - netfid = open_file->fid.netfid; - netpid = open_file->pid; - pTcon = tlink_tcon(open_file->tlink); - goto set_via_filehandle; - } - - tlink = cifs_sb_tlink(cifs_sb); - if (IS_ERR(tlink)) { - rc = PTR_ERR(tlink); - tlink = NULL; - goto out; - } - pTcon = tlink_tcon(tlink); - - /* - * NT4 apparently returns success on this call, but it doesn't - * really work. - */ - if (!(pTcon->ses->flags & CIFS_SES_NT4)) { - rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, - &info_buf, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == 0) { - cifsInode->cifsAttrs = dosattr; - goto out; - } else if (rc != -EOPNOTSUPP && rc != -EINVAL) - goto out; - } - - cFYI(1, "calling SetFileInfo since SetPathInfo for " - "times not supported by this server"); - rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, - SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, - CREATE_NOT_DIR, &netfid, &oplock, - NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - - if (rc != 0) { - if (rc == -EIO) - rc = -EINVAL; - goto out; - } - - netpid = current->tgid; - -set_via_filehandle: - rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid); - if (!rc) - cifsInode->cifsAttrs = dosattr; - - if (open_file == NULL) - CIFSSMBClose(xid, pTcon, netfid); - else - cifsFileInfo_put(open_file); -out: - if (tlink != NULL) - cifs_put_tlink(tlink); - return rc; + return server->ops->set_file_info(inode, full_path, &info_buf, xid); } /* diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index b73d2750296d..ed311968437a 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -758,6 +758,84 @@ cifs_sync_write(const unsigned int xid, struct cifsFileInfo *cfile, return CIFSSMBWrite2(xid, parms, written, iov, nr_segs); } +static int +smb_set_file_info(struct inode *inode, const char *full_path, + FILE_BASIC_INFO *buf, const unsigned int xid) +{ + int oplock = 0; + int rc; + __u16 netfid; + __u32 netpid; + struct cifsFileInfo *open_file; + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct tcon_link *tlink = NULL; + struct cifs_tcon *tcon; + FILE_BASIC_INFO info_buf; + + /* if the file is already open for write, just use that fileid */ + open_file = find_writable_file(cinode, true); + if (open_file) { + netfid = open_file->fid.netfid; + netpid = open_file->pid; + tcon = tlink_tcon(open_file->tlink); + goto set_via_filehandle; + } + + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + tlink = NULL; + goto out; + } + tcon = tlink_tcon(tlink); + + /* + * NT4 apparently returns success on this call, but it doesn't really + * work. + */ + if (!(tcon->ses->flags & CIFS_SES_NT4)) { + rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc == 0) { + cinode->cifsAttrs = le32_to_cpu(buf->Attributes); + goto out; + } else if (rc != -EOPNOTSUPP && rc != -EINVAL) + goto out; + } + + cFYI(1, "calling SetFileInfo since SetPathInfo for times not supported " + "by this server"); + rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, + SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR, + &netfid, &oplock, NULL, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + + if (rc != 0) { + if (rc == -EIO) + rc = -EINVAL; + goto out; + } + + netpid = current->tgid; + +set_via_filehandle: + rc = CIFSSMBSetFileInfo(xid, tcon, &info_buf, netfid, netpid); + if (!rc) + cinode->cifsAttrs = le32_to_cpu(buf->Attributes); + + if (open_file == NULL) + CIFSSMBClose(xid, tcon, netfid); + else + cifsFileInfo_put(open_file); +out: + if (tlink != NULL) + cifs_put_tlink(tlink); + return rc; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -795,6 +873,7 @@ struct smb_version_operations smb1_operations = { .get_srv_inum = cifs_get_srv_inum, .set_path_size = CIFSSMBSetEOF, .set_file_size = CIFSSMBSetFileSize, + .set_file_info = smb_set_file_info, .build_path_to_root = cifs_build_path_to_root, .echo = CIFSSMBEcho, .mkdir = CIFSSMBMkDir, -- cgit v1.2.3 From 1feeaac753e0a9b3864740556b7840643642abdb Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:32 -0700 Subject: CIFS: Add set_file_info support for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2inode.c | 22 ++++++++++++++++++++++ fs/cifs/smb2ops.c | 1 + fs/cifs/smb2pdu.c | 11 +++++++++++ fs/cifs/smb2proto.h | 5 +++++ 4 files changed, 39 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 290583092293..1bd6b0f0c10e 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -86,6 +86,10 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, tmprc = SMB2_set_eof(xid, tcon, persistent_fid, volatile_fid, current->tgid, (__le64 *)data); break; + case SMB2_OP_SET_INFO: + tmprc = SMB2_set_info(xid, tcon, persistent_fid, volatile_fid, + (FILE_BASIC_INFO *)data); + break; default: cERROR(1, "Invalid command"); break; @@ -232,3 +236,21 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, FILE_WRITE_DATA, FILE_OPEN, 0, 0, &eof, SMB2_OP_SET_EOF); } + +int +smb2_set_file_info(struct inode *inode, const char *full_path, + FILE_BASIC_INFO *buf, const unsigned int xid) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct tcon_link *tlink; + int rc; + + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path, + FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, 0, buf, + SMB2_OP_SET_INFO); + cifs_put_tlink(tlink); + return rc; +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e0daf3a4dcc2..52bc93125726 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -457,6 +457,7 @@ struct smb_version_operations smb21_operations = { .query_file_info = smb2_query_file_info, .set_path_size = smb2_set_path_size, .set_file_size = smb2_set_file_size, + .set_file_info = smb2_set_file_info, .build_path_to_root = smb2_build_path_to_root, .mkdir = smb2_mkdir, .mkdir_setinfo = smb2_mkdir_setinfo, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 74a8381400b1..a1314f99b4cc 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1760,3 +1760,14 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, return send_set_info(xid, tcon, persistent_fid, volatile_fid, pid, FILE_END_OF_FILE_INFORMATION, 1, &data, &size); } + +int +SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, FILE_BASIC_INFO *buf) +{ + unsigned int size; + size = sizeof(FILE_BASIC_INFO); + return send_set_info(xid, tcon, persistent_fid, volatile_fid, + current->tgid, FILE_BASIC_INFORMATION, 1, + (void **)&buf, &size); +} diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 3d4866279530..277472433158 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -59,6 +59,8 @@ extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, const char *full_path, __u64 size, struct cifs_sb_info *cifs_sb, bool set_alloc); +extern int smb2_set_file_info(struct inode *inode, const char *full_path, + FILE_BASIC_INFO *buf, const unsigned int xid); extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, @@ -124,5 +126,8 @@ extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 pid, __le64 *eof); +extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, + FILE_BASIC_INFO *buf); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From 92fc65a74a2be1388d774f7dbf82c9adea1745cf Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:32 -0700 Subject: CIFS: Move readdir code to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 15 +++++ fs/cifs/cifsproto.h | 2 +- fs/cifs/file.c | 60 ++++++++++--------- fs/cifs/netmisc.c | 3 +- fs/cifs/readdir.c | 165 ++++++++++++++++++++++++++++------------------------ fs/cifs/smb1ops.c | 31 ++++++++++ 6 files changed, 173 insertions(+), 103 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index dff35830601f..9adf211ca95a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -177,6 +177,7 @@ struct cifs_fid; struct cifs_readdata; struct cifs_writedata; struct cifs_io_parms; +struct cifs_search_info; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, void *, @@ -313,6 +314,20 @@ struct smb_version_operations { int (*sync_write)(const unsigned int, struct cifsFileInfo *, struct cifs_io_parms *, unsigned int *, struct kvec *, unsigned long); + /* open dir, start readdir */ + int (*query_dir_first)(const unsigned int, struct cifs_tcon *, + const char *, struct cifs_sb_info *, + struct cifs_fid *, __u16, + struct cifs_search_info *); + /* continue readdir */ + int (*query_dir_next)(const unsigned int, struct cifs_tcon *, + struct cifs_fid *, + __u16, struct cifs_search_info *srch_inf); + /* close dir */ + int (*close_dir)(const unsigned int, struct cifs_tcon *, + struct cifs_fid *); + /* calculate a size of SMB message */ + unsigned int (*calc_smb_size)(void *); }; struct smb_version_values { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 3d99fe9afccc..c7ad9a8cf82a 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -100,7 +100,7 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, unsigned int bytes_written); extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); -extern unsigned int smbCalcSize(struct smb_hdr *ptr); +extern unsigned int smbCalcSize(void *buf); extern int decode_negTokenInit(unsigned char *security_blob, int length, struct TCP_Server_Info *server); extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 39fff77e38d4..fb6b4413255b 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -618,39 +618,47 @@ int cifs_closedir(struct inode *inode, struct file *file) int rc = 0; unsigned int xid; struct cifsFileInfo *cfile = file->private_data; - char *tmp; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; + char *buf; cFYI(1, "Closedir inode = 0x%p", inode); + if (cfile == NULL) + return rc; + xid = get_xid(); + tcon = tlink_tcon(cfile->tlink); + server = tcon->ses->server; - if (cfile) { - struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + cFYI(1, "Freeing private data in close dir"); + spin_lock(&cifs_file_list_lock); + if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { + cfile->invalidHandle = true; + spin_unlock(&cifs_file_list_lock); + if (server->ops->close_dir) + rc = server->ops->close_dir(xid, tcon, &cfile->fid); + else + rc = -ENOSYS; + cFYI(1, "Closing uncompleted readdir with rc %d", rc); + /* not much we can do if it fails anyway, ignore rc */ + rc = 0; + } else + spin_unlock(&cifs_file_list_lock); - cFYI(1, "Freeing private data in close dir"); - spin_lock(&cifs_file_list_lock); - if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { - cfile->invalidHandle = true; - spin_unlock(&cifs_file_list_lock); - rc = CIFSFindClose(xid, tcon, cfile->fid.netfid); - cFYI(1, "Closing uncompleted readdir with rc %d", rc); - /* not much we can do if it fails anyway, ignore rc */ - rc = 0; - } else - spin_unlock(&cifs_file_list_lock); - tmp = cfile->srch_inf.ntwrk_buf_start; - if (tmp) { - cFYI(1, "closedir free smb buf in srch struct"); - cfile->srch_inf.ntwrk_buf_start = NULL; - if (cfile->srch_inf.smallBuf) - cifs_small_buf_release(tmp); - else - cifs_buf_release(tmp); - } - cifs_put_tlink(cfile->tlink); - kfree(file->private_data); - file->private_data = NULL; + buf = cfile->srch_inf.ntwrk_buf_start; + if (buf) { + cFYI(1, "closedir free smb buf in srch struct"); + cfile->srch_inf.ntwrk_buf_start = NULL; + if (cfile->srch_inf.smallBuf) + cifs_small_buf_release(buf); + else + cifs_buf_release(buf); } + + cifs_put_tlink(cfile->tlink); + kfree(file->private_data); + file->private_data = NULL; /* BB can we lock the filestruct while this is going on? */ free_xid(xid); return rc; diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 581c225f7f50..e7bab3be5cf9 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -913,8 +913,9 @@ map_smb_to_linux_error(char *buf, bool logErr) * portion, the number of word parameters and the data portion of the message */ unsigned int -smbCalcSize(struct smb_hdr *ptr) +smbCalcSize(void *buf) { + struct smb_hdr *ptr = (struct smb_hdr *)buf; return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) + 2 /* size of the bcc field */ + get_bcc(ptr)); } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 9e76e3b3289b..b0f4a428398d 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -220,7 +220,8 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb, } */ -static int initiate_cifs_search(const unsigned int xid, struct file *file) +static int +initiate_cifs_search(const unsigned int xid, struct file *file) { __u16 search_flags; int rc = 0; @@ -229,6 +230,7 @@ static int initiate_cifs_search(const unsigned int xid, struct file *file) struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); struct tcon_link *tlink = NULL; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; if (file->private_data == NULL) { tlink = cifs_sb_tlink(cifs_sb); @@ -248,6 +250,13 @@ static int initiate_cifs_search(const unsigned int xid, struct file *file) tcon = tlink_tcon(cifsFile->tlink); } + server = tcon->ses->server; + + if (!server->ops->query_dir_first) { + rc = -ENOSYS; + goto error_exit; + } + cifsFile->invalidHandle = true; cifsFile->srch_inf.endOfSearch = false; @@ -278,10 +287,10 @@ ffirst_retry: if (backup_cred(cifs_sb)) search_flags |= CIFS_SEARCH_BACKUP_SEARCH; - rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb->local_nls, - &cifsFile->fid.netfid, search_flags, &cifsFile->srch_inf, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); + rc = server->ops->query_dir_first(xid, tcon, full_path, cifs_sb, + &cifsFile->fid, search_flags, + &cifsFile->srch_inf); + if (rc == 0) cifsFile->invalidHandle = false; /* BB add following call to handle readdir on new NTFS symlink errors @@ -501,62 +510,67 @@ static int cifs_save_resume_key(const char *current_entry, return rc; } -/* find the corresponding entry in the search */ -/* Note that the SMB server returns search entries for . and .. which - complicates logic here if we choose to parse for them and we do not - assume that they are located in the findfirst return buffer.*/ -/* We start counting in the buffer with entry 2 and increment for every - entry (do not increment for . or .. entry) */ -static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon, - struct file *file, char **ppCurrentEntry, int *num_to_ret) +/* + * Find the corresponding entry in the search. Note that the SMB server returns + * search entries for . and .. which complicates logic here if we choose to + * parse for them and we do not assume that they are located in the findfirst + * return buffer. We start counting in the buffer with entry 2 and increment for + * every entry (do not increment for . or .. entry). + */ +static int +find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, + struct file *file, char **current_entry, int *num_to_ret) { __u16 search_flags; int rc = 0; int pos_in_buf = 0; loff_t first_entry_in_buffer; loff_t index_to_find = file->f_pos; - struct cifsFileInfo *cifsFile = file->private_data; + struct cifsFileInfo *cfile = file->private_data; struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + struct TCP_Server_Info *server = tcon->ses->server; /* check if index in the buffer */ - if ((cifsFile == NULL) || (ppCurrentEntry == NULL) || - (num_to_ret == NULL)) + if (!server->ops->query_dir_first || !server->ops->query_dir_next) + return -ENOSYS; + + if ((cfile == NULL) || (current_entry == NULL) || (num_to_ret == NULL)) return -ENOENT; - *ppCurrentEntry = NULL; - first_entry_in_buffer = - cifsFile->srch_inf.index_of_last_entry - - cifsFile->srch_inf.entries_in_buffer; + *current_entry = NULL; + first_entry_in_buffer = cfile->srch_inf.index_of_last_entry - + cfile->srch_inf.entries_in_buffer; - /* if first entry in buf is zero then is first buffer - in search response data which means it is likely . and .. - will be in this buffer, although some servers do not return - . and .. for the root of a drive and for those we need - to start two entries earlier */ + /* + * If first entry in buf is zero then is first buffer + * in search response data which means it is likely . and .. + * will be in this buffer, although some servers do not return + * . and .. for the root of a drive and for those we need + * to start two entries earlier. + */ dump_cifs_file_struct(file, "In fce "); - if (((index_to_find < cifsFile->srch_inf.index_of_last_entry) && - is_dir_changed(file)) || - (index_to_find < first_entry_in_buffer)) { + if (((index_to_find < cfile->srch_inf.index_of_last_entry) && + is_dir_changed(file)) || (index_to_find < first_entry_in_buffer)) { /* close and restart search */ cFYI(1, "search backing up - close and restart search"); spin_lock(&cifs_file_list_lock); - if (!cifsFile->srch_inf.endOfSearch && - !cifsFile->invalidHandle) { - cifsFile->invalidHandle = true; + if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { + cfile->invalidHandle = true; spin_unlock(&cifs_file_list_lock); - CIFSFindClose(xid, pTcon, cifsFile->fid.netfid); + if (server->ops->close) + server->ops->close(xid, tcon, &cfile->fid); } else spin_unlock(&cifs_file_list_lock); - if (cifsFile->srch_inf.ntwrk_buf_start) { + if (cfile->srch_inf.ntwrk_buf_start) { cFYI(1, "freeing SMB ff cache buf on search rewind"); - if (cifsFile->srch_inf.smallBuf) - cifs_small_buf_release(cifsFile->srch_inf. + if (cfile->srch_inf.smallBuf) + cifs_small_buf_release(cfile->srch_inf. ntwrk_buf_start); else - cifs_buf_release(cifsFile->srch_inf. + cifs_buf_release(cfile->srch_inf. ntwrk_buf_start); - cifsFile->srch_inf.ntwrk_buf_start = NULL; + cfile->srch_inf.ntwrk_buf_start = NULL; } rc = initiate_cifs_search(xid, file); if (rc) { @@ -565,65 +579,64 @@ static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon, return rc; } /* FindFirst/Next set last_entry to NULL on malformed reply */ - if (cifsFile->srch_inf.last_entry) - cifs_save_resume_key(cifsFile->srch_inf.last_entry, - cifsFile); + if (cfile->srch_inf.last_entry) + cifs_save_resume_key(cfile->srch_inf.last_entry, cfile); } search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME; if (backup_cred(cifs_sb)) search_flags |= CIFS_SEARCH_BACKUP_SEARCH; - while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && - (rc == 0) && !cifsFile->srch_inf.endOfSearch) { + while ((index_to_find >= cfile->srch_inf.index_of_last_entry) && + (rc == 0) && !cfile->srch_inf.endOfSearch) { cFYI(1, "calling findnext2"); - rc = CIFSFindNext(xid, pTcon, cifsFile->fid.netfid, - search_flags, &cifsFile->srch_inf); + rc = server->ops->query_dir_next(xid, tcon, &cfile->fid, + search_flags, + &cfile->srch_inf); /* FindFirst/Next set last_entry to NULL on malformed reply */ - if (cifsFile->srch_inf.last_entry) - cifs_save_resume_key(cifsFile->srch_inf.last_entry, - cifsFile); + if (cfile->srch_inf.last_entry) + cifs_save_resume_key(cfile->srch_inf.last_entry, cfile); if (rc) return -ENOENT; } - if (index_to_find < cifsFile->srch_inf.index_of_last_entry) { + if (index_to_find < cfile->srch_inf.index_of_last_entry) { /* we found the buffer that contains the entry */ /* scan and find it */ int i; - char *current_entry; - char *end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + - smbCalcSize((struct smb_hdr *) - cifsFile->srch_inf.ntwrk_buf_start); - - current_entry = cifsFile->srch_inf.srch_entries_start; - first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry - - cifsFile->srch_inf.entries_in_buffer; + char *cur_ent; + char *end_of_smb = cfile->srch_inf.ntwrk_buf_start + + server->ops->calc_smb_size( + cfile->srch_inf.ntwrk_buf_start); + + cur_ent = cfile->srch_inf.srch_entries_start; + first_entry_in_buffer = cfile->srch_inf.index_of_last_entry + - cfile->srch_inf.entries_in_buffer; pos_in_buf = index_to_find - first_entry_in_buffer; cFYI(1, "found entry - pos_in_buf %d", pos_in_buf); - for (i = 0; (i < (pos_in_buf)) && (current_entry != NULL); i++) { + for (i = 0; (i < (pos_in_buf)) && (cur_ent != NULL); i++) { /* go entry by entry figuring out which is first */ - current_entry = nxt_dir_entry(current_entry, end_of_smb, - cifsFile->srch_inf.info_level); + cur_ent = nxt_dir_entry(cur_ent, end_of_smb, + cfile->srch_inf.info_level); } - if ((current_entry == NULL) && (i < pos_in_buf)) { + if ((cur_ent == NULL) && (i < pos_in_buf)) { /* BB fixme - check if we should flag this error */ cERROR(1, "reached end of buf searching for pos in buf" - " %d index to find %lld rc %d", - pos_in_buf, index_to_find, rc); + " %d index to find %lld rc %d", pos_in_buf, + index_to_find, rc); } rc = 0; - *ppCurrentEntry = current_entry; + *current_entry = cur_ent; } else { cFYI(1, "index not in buffer - could not findnext into it"); return 0; } - if (pos_in_buf >= cifsFile->srch_inf.entries_in_buffer) { + if (pos_in_buf >= cfile->srch_inf.entries_in_buffer) { cFYI(1, "can not return entries pos_in_buf beyond last"); *num_to_ret = 0; } else - *num_to_ret = cifsFile->srch_inf.entries_in_buffer - pos_in_buf; + *num_to_ret = cfile->srch_inf.entries_in_buffer - pos_in_buf; return rc; } @@ -723,7 +736,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) int rc = 0; unsigned int xid; int i; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; struct cifsFileInfo *cifsFile = NULL; char *current_entry; int num_to_fill = 0; @@ -781,12 +794,12 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) } } /* else { cifsFile->invalidHandle = true; - CIFSFindClose(xid, pTcon, cifsFile->fid.netfid); + tcon->ses->server->close(xid, tcon, &cifsFile->fid); } */ - pTcon = tlink_tcon(cifsFile->tlink); - rc = find_cifs_entry(xid, pTcon, file, - ¤t_entry, &num_to_fill); + tcon = tlink_tcon(cifsFile->tlink); + rc = find_cifs_entry(xid, tcon, file, ¤t_entry, + &num_to_fill); if (rc) { cFYI(1, "fce error %d", rc); goto rddir2_exit; @@ -798,7 +811,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) } cFYI(1, "loop through %d times filling dir for net buf %p", num_to_fill, cifsFile->srch_inf.ntwrk_buf_start); - max_len = smbCalcSize((struct smb_hdr *) + max_len = tcon->ses->server->ops->calc_smb_size( cifsFile->srch_inf.ntwrk_buf_start); end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; @@ -815,10 +828,12 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) num_to_fill, i); break; } - /* if buggy server returns . and .. late do - we want to check for that here? */ - rc = cifs_filldir(current_entry, file, - filldir, direntry, tmp_buf, max_len); + /* + * if buggy server returns . and .. late do we want to + * check for that here? + */ + rc = cifs_filldir(current_entry, file, filldir, + direntry, tmp_buf, max_len); if (rc == -EOVERFLOW) { rc = 0; break; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index ed311968437a..068d609bd02a 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -836,6 +836,33 @@ out: return rc; } +static int +cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, + const char *path, struct cifs_sb_info *cifs_sb, + struct cifs_fid *fid, __u16 search_flags, + struct cifs_search_info *srch_inf) +{ + return CIFSFindFirst(xid, tcon, path, cifs_sb->local_nls, + &fid->netfid, search_flags, srch_inf, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); +} + +static int +cifs_query_dir_next(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid, __u16 search_flags, + struct cifs_search_info *srch_inf) +{ + return CIFSFindNext(xid, tcon, fid->netfid, search_flags, srch_inf); +} + +static int +cifs_close_dir(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) +{ + return CIFSFindClose(xid, tcon, fid->netfid); +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -891,6 +918,10 @@ struct smb_version_operations smb1_operations = { .async_writev = cifs_async_writev, .sync_read = cifs_sync_read, .sync_write = cifs_sync_write, + .query_dir_first = cifs_query_dir_first, + .query_dir_next = cifs_query_dir_next, + .close_dir = cifs_close_dir, + .calc_smb_size = smbCalcSize, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From d324f08d6a87149597817f4496ef0f7ac185e8da Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:33 -0700 Subject: CIFS: Add readdir support for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2misc.c | 8 ++- fs/cifs/smb2ops.c | 57 ++++++++++++++++++ fs/cifs/smb2pdu.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 28 +++++++++ fs/cifs/smb2proto.h | 5 +- 5 files changed, 264 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 9275883c8530..78225f517a60 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -248,6 +248,11 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) *len = le32_to_cpu(((struct smb2_read_rsp *)hdr)->DataLength); break; case SMB2_QUERY_DIRECTORY: + *off = le16_to_cpu( + ((struct smb2_query_directory_rsp *)hdr)->OutputBufferOffset); + *len = le32_to_cpu( + ((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength); + break; case SMB2_IOCTL: case SMB2_CHANGE_NOTIFY: default: @@ -290,8 +295,9 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) * portion, the number of word parameters and the data portion of the message. */ unsigned int -smb2_calc_size(struct smb2_hdr *hdr) +smb2_calc_size(void *buf) { + struct smb2_hdr *hdr = (struct smb2_hdr *)buf; struct smb2_pdu *pdu = (struct smb2_pdu *)hdr; int offset; /* the offset from the beginning of SMB to data area */ int data_length; /* the length of the variable length data area */ diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 52bc93125726..0fa086c1b5dc 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -424,6 +424,59 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, cfile->fid.volatile_fid, cfile->pid, &eof); } +static int +smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, + const char *path, struct cifs_sb_info *cifs_sb, + struct cifs_fid *fid, __u16 search_flags, + struct cifs_search_info *srch_inf) +{ + __le16 *utf16_path; + int rc; + __u64 persistent_fid, volatile_fid; + + utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); + if (!utf16_path) + return -ENOMEM; + + rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, + FILE_READ_ATTRIBUTES | FILE_READ_DATA, FILE_OPEN, 0, 0, + NULL); + kfree(utf16_path); + if (rc) { + cERROR(1, "open dir failed"); + return rc; + } + + srch_inf->entries_in_buffer = 0; + srch_inf->index_of_last_entry = 0; + fid->persistent_fid = persistent_fid; + fid->volatile_fid = volatile_fid; + + rc = SMB2_query_directory(xid, tcon, persistent_fid, volatile_fid, 0, + srch_inf); + if (rc) { + cERROR(1, "query directory failed"); + SMB2_close(xid, tcon, persistent_fid, volatile_fid); + } + return rc; +} + +static int +smb2_query_dir_next(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid, __u16 search_flags, + struct cifs_search_info *srch_inf) +{ + return SMB2_query_directory(xid, tcon, fid->persistent_fid, + fid->volatile_fid, 0, srch_inf); +} + +static int +smb2_close_dir(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) +{ + return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -473,6 +526,10 @@ struct smb_version_operations smb21_operations = { .async_writev = smb2_async_writev, .sync_read = smb2_sync_read, .sync_write = smb2_sync_write, + .query_dir_first = smb2_query_dir_first, + .query_dir_next = smb2_query_dir_next, + .close_dir = smb2_close_dir, + .calc_smb_size = smb2_calc_size, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a1314f99b4cc..21b3a652e192 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -45,6 +45,7 @@ #include "ntlmssp.h" #include "smb2status.h" #include "smb2glob.h" +#include "cifspdu.h" /* * The following table defines the expected "StructureSize" of SMB2 requests @@ -1603,6 +1604,173 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, return rc; } +static unsigned int +num_entries(char *bufstart, char *end_of_buf, char **lastentry, size_t size) +{ + int len; + unsigned int entrycount = 0; + unsigned int next_offset = 0; + FILE_DIRECTORY_INFO *entryptr; + + if (bufstart == NULL) + return 0; + + entryptr = (FILE_DIRECTORY_INFO *)bufstart; + + while (1) { + entryptr = (FILE_DIRECTORY_INFO *) + ((char *)entryptr + next_offset); + + if ((char *)entryptr + size > end_of_buf) { + cERROR(1, "malformed search entry would overflow"); + break; + } + + len = le32_to_cpu(entryptr->FileNameLength); + if ((char *)entryptr + len + size > end_of_buf) { + cERROR(1, "directory entry name would overflow frame " + "end of buf %p", end_of_buf); + break; + } + + *lastentry = (char *)entryptr; + entrycount++; + + next_offset = le32_to_cpu(entryptr->NextEntryOffset); + if (!next_offset) + break; + } + + return entrycount; +} + +/* + * Readdir/FindFirst + */ +int +SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, int index, + struct cifs_search_info *srch_inf) +{ + struct smb2_query_directory_req *req; + struct smb2_query_directory_rsp *rsp = NULL; + struct kvec iov[2]; + int rc = 0; + int len; + int resp_buftype; + unsigned char *bufptr; + struct TCP_Server_Info *server; + struct cifs_ses *ses = tcon->ses; + __le16 asteriks = cpu_to_le16('*'); + char *end_of_smb; + unsigned int output_size = CIFSMaxBufSize; + size_t info_buf_size; + + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + rc = small_smb2_init(SMB2_QUERY_DIRECTORY, tcon, (void **) &req); + if (rc) + return rc; + + switch (srch_inf->info_level) { + case SMB_FIND_FILE_DIRECTORY_INFO: + req->FileInformationClass = FILE_DIRECTORY_INFORMATION; + info_buf_size = sizeof(FILE_DIRECTORY_INFO) - 1; + break; + case SMB_FIND_FILE_ID_FULL_DIR_INFO: + req->FileInformationClass = FILEID_FULL_DIRECTORY_INFORMATION; + info_buf_size = sizeof(SEARCH_ID_FULL_DIR_INFO) - 1; + break; + default: + cERROR(1, "info level %u isn't supported", + srch_inf->info_level); + rc = -EINVAL; + goto qdir_exit; + } + + req->FileIndex = cpu_to_le32(index); + req->PersistentFileId = persistent_fid; + req->VolatileFileId = volatile_fid; + + len = 0x2; + bufptr = req->Buffer; + memcpy(bufptr, &asteriks, len); + + req->FileNameOffset = + cpu_to_le16(sizeof(struct smb2_query_directory_req) - 1 - 4); + req->FileNameLength = cpu_to_le16(len); + /* + * BB could be 30 bytes or so longer if we used SMB2 specific + * buffer lengths, but this is safe and close enough. + */ + output_size = min_t(unsigned int, output_size, server->maxBuf); + output_size = min_t(unsigned int, output_size, 2 << 15); + req->OutputBufferLength = cpu_to_le32(output_size); + + iov[0].iov_base = (char *)req; + /* 4 for RFC1001 length and 1 for Buffer */ + iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; + + iov[1].iov_base = (char *)(req->Buffer); + iov[1].iov_len = len; + + inc_rfc1001_len(req, len - 1 /* Buffer */); + + rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, 0); + if (rc) { + cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); + goto qdir_exit; + } + rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base; + + rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset), + le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr, + info_buf_size); + if (rc) + goto qdir_exit; + + srch_inf->unicode = true; + + if (srch_inf->ntwrk_buf_start) { + if (srch_inf->smallBuf) + cifs_small_buf_release(srch_inf->ntwrk_buf_start); + else + cifs_buf_release(srch_inf->ntwrk_buf_start); + } + srch_inf->ntwrk_buf_start = (char *)rsp; + srch_inf->srch_entries_start = srch_inf->last_entry = 4 /* rfclen */ + + (char *)&rsp->hdr + le16_to_cpu(rsp->OutputBufferOffset); + /* 4 for rfc1002 length field */ + end_of_smb = get_rfc1002_length(rsp) + 4 + (char *)&rsp->hdr; + srch_inf->entries_in_buffer = + num_entries(srch_inf->srch_entries_start, end_of_smb, + &srch_inf->last_entry, info_buf_size); + srch_inf->index_of_last_entry += srch_inf->entries_in_buffer; + cFYI(1, "num entries %d last_index %lld srch start %p srch end %p", + srch_inf->entries_in_buffer, srch_inf->index_of_last_entry, + srch_inf->srch_entries_start, srch_inf->last_entry); + if (resp_buftype == CIFS_LARGE_BUFFER) + srch_inf->smallBuf = false; + else if (resp_buftype == CIFS_SMALL_BUFFER) + srch_inf->smallBuf = true; + else + cERROR(1, "illegal search buffer type"); + + if (rsp->hdr.Status == STATUS_NO_MORE_FILES) + srch_inf->endOfSearch = 1; + else + srch_inf->endOfSearch = 0; + + return rc; + +qdir_exit: + free_rsp_buf(resp_buftype, rsp); + return rc; +} + static int send_set_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 pid, int info_class, diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index d775941f552a..e6ddd1f79706 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -538,6 +538,34 @@ struct smb2_echo_rsp { __u16 Reserved; } __packed; +/* search (query_directory) Flags field */ +#define SMB2_RESTART_SCANS 0x01 +#define SMB2_RETURN_SINGLE_ENTRY 0x02 +#define SMB2_INDEX_SPECIFIED 0x04 +#define SMB2_REOPEN 0x10 + +struct smb2_query_directory_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 33 */ + __u8 FileInformationClass; + __u8 Flags; + __le32 FileIndex; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + __le16 FileNameOffset; + __le16 FileNameLength; + __le32 OutputBufferLength; + __u8 Buffer[1]; +} __packed; + +struct smb2_query_directory_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 9 */ + __le16 OutputBufferOffset; + __le32 OutputBufferLength; + __u8 Buffer[1]; +} __packed; + /* Possible InfoType values */ #define SMB2_O_INFO_FILE 0x01 #define SMB2_O_INFO_FILESYSTEM 0x02 diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 277472433158..0d29db222a5a 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -34,7 +34,7 @@ struct statfs; */ extern int map_smb2_to_linux_error(char *buf, bool log_err); extern int smb2_check_message(char *buf, unsigned int length); -extern unsigned int smb2_calc_size(struct smb2_hdr *hdr); +extern unsigned int smb2_calc_size(void *buf); extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr); extern __le16 *cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb); @@ -117,6 +117,9 @@ extern int smb2_async_writev(struct cifs_writedata *wdata); extern int SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, struct kvec *iov, int n_vec); extern int SMB2_echo(struct TCP_Server_Info *server); +extern int SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, int index, + struct cifs_search_info *srch_inf); extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, __le16 *target_file); -- cgit v1.2.3 From 2e44b2887882134abf353b28867b82645e9f0856 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:33 -0700 Subject: CIFS: Process oplocks for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 2 ++ fs/cifs/connect.c | 4 ++++ fs/cifs/smb2file.c | 24 ++++++++++++++++++++++-- fs/cifs/smb2inode.c | 3 ++- fs/cifs/smb2ops.c | 34 ++++++++++++++++++++++++++++++---- fs/cifs/smb2pdu.c | 10 ++++++---- fs/cifs/smb2proto.h | 3 ++- 7 files changed, 68 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 9adf211ca95a..3eb59ed6904a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -328,6 +328,8 @@ struct smb_version_operations { struct cifs_fid *); /* calculate a size of SMB message */ unsigned int (*calc_smb_size)(void *); + /* check for STATUS_PENDING and process it in a positive case */ + bool (*is_status_pending)(char *, struct TCP_Server_Info *, int); }; struct smb_version_values { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index c31b30b572e0..549409b1c776 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -819,6 +819,10 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) cifs_dump_mem("Bad SMB: ", buf, min_t(unsigned int, server->total_read, 48)); + if (server->ops->is_status_pending && + server->ops->is_status_pending(buf, server, length)) + return -1; + if (!mid) return length; diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index a7618dfb7712..5ff25e025215 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -34,6 +34,26 @@ #include "fscache.h" #include "smb2proto.h" +void +smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) +{ + oplock &= 0xFF; + if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) { + cinode->clientCanCacheAll = true; + cinode->clientCanCacheRead = true; + cFYI(1, "Exclusive Oplock granted on inode %p", + &cinode->vfs_inode); + } else if (oplock == SMB2_OPLOCK_LEVEL_II) { + cinode->clientCanCacheAll = false; + cinode->clientCanCacheRead = true; + cFYI(1, "Level II Oplock granted on inode %p", + &cinode->vfs_inode); + } else { + cinode->clientCanCacheAll = false; + cinode->clientCanCacheRead = false; + } +} + int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, int disposition, int desired_access, int create_options, @@ -58,10 +78,11 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, } desired_access |= FILE_READ_ATTRIBUTES; + *oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE; rc = SMB2_open(xid, tcon, smb2_path, &fid->persistent_fid, &fid->volatile_fid, desired_access, disposition, - 0, 0, smb2_data); + 0, 0, (__u8 *)oplock, smb2_data); if (rc) goto out; @@ -79,7 +100,6 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, } out: - *oplock = 0; kfree(smb2_data); kfree(smb2_path); return rc; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 1bd6b0f0c10e..706482452df4 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -47,6 +47,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, int rc, tmprc = 0; u64 persistent_fid, volatile_fid; __le16 *utf16_path; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); if (!utf16_path) @@ -54,7 +55,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, desired_access, create_disposition, file_attributes, - create_options, NULL); + create_options, &oplock, NULL); if (rc) { kfree(utf16_path); return rc; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0fa086c1b5dc..2d88c90ded0c 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -23,6 +23,7 @@ #include "smb2proto.h" #include "cifsproto.h" #include "cifs_debug.h" +#include "smb2status.h" static int change_conf(struct TCP_Server_Info *server) @@ -207,13 +208,14 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, int rc; __u64 persistent_fid, volatile_fid; __le16 *utf16_path; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); if (!utf16_path) return -ENOMEM; rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, - FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, NULL); + FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL); if (rc) { kfree(utf16_path); return rc; @@ -358,10 +360,10 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) static void smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) { - /* struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); */ + struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); cfile->fid.persistent_fid = fid->persistent_fid; cfile->fid.volatile_fid = fid->volatile_fid; - /* cifs_set_oplock_level(cinode, oplock); */ + smb2_set_oplock_level(cinode, oplock); /* cinode->can_cache_brlcks = cinode->clientCanCacheAll; */ } @@ -432,6 +434,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, { __le16 *utf16_path; int rc; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; __u64 persistent_fid, volatile_fid; utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); @@ -440,7 +443,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, FILE_READ_ATTRIBUTES | FILE_READ_DATA, FILE_OPEN, 0, 0, - NULL); + &oplock, NULL); kfree(utf16_path); if (rc) { cERROR(1, "open dir failed"); @@ -477,6 +480,28 @@ smb2_close_dir(const unsigned int xid, struct cifs_tcon *tcon, return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); } +/* +* If we negotiate SMB2 protocol and get STATUS_PENDING - update +* the number of credits and return true. Otherwise - return false. +*/ +static bool +smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length) +{ + struct smb2_hdr *hdr = (struct smb2_hdr *)buf; + + if (le32_to_cpu(hdr->Status) != STATUS_PENDING) + return false; + + if (!length) { + spin_lock(&server->req_lock); + server->credits += le16_to_cpu(hdr->CreditRequest); + spin_unlock(&server->req_lock); + wake_up(&server->request_q); + } + + return true; +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -530,6 +555,7 @@ struct smb_version_operations smb21_operations = { .query_dir_next = smb2_query_dir_next, .close_dir = smb2_close_dir, .calc_smb_size = smb2_calc_size, + .is_status_pending = smb2_is_status_pending, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 21b3a652e192..e97c256c8a42 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -872,7 +872,7 @@ int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access, __u32 create_disposition, __u32 file_attributes, __u32 create_options, - struct smb2_file_all_info *buf) + __u8 *oplock, struct smb2_file_all_info *buf) { struct smb2_create_req *req; struct smb2_create_rsp *rsp; @@ -895,9 +895,9 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, if (rc) return rc; - /* if (server->oplocks) - req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_BATCH; - else */ + if (server->oplocks) + req->RequestedOplockLevel = *oplock; + else req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE; req->ImpersonationLevel = IL_IMPERSONATION; req->DesiredAccess = cpu_to_le32(desired_access); @@ -954,6 +954,8 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, buf->NumberOfLinks = cpu_to_le32(1); buf->DeletePending = 0; } + + *oplock = rsp->OplockLevel; creat_exit: free_rsp_buf(resp_buftype, rsp); return rc; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 0d29db222a5a..d18339fcc8a2 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -82,6 +82,7 @@ extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, int desired_access, int create_options, struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf, struct cifs_sb_info *cifs_sb); +extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); /* * SMB2 Worker functions - most of protocol specific implementation details @@ -99,7 +100,7 @@ extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access, __u32 create_disposition, __u32 file_attributes, __u32 create_options, - struct smb2_file_all_info *buf); + __u8 *oplock, struct smb2_file_all_info *buf); extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, -- cgit v1.2.3 From 95a3f2f377735ed13e42d3b8039aa1d73af2c90e Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:33 -0700 Subject: CIFS: Move oplock break to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 4 ++++ fs/cifs/file.c | 7 +++---- fs/cifs/smb1ops.c | 10 ++++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 3eb59ed6904a..a95c56dc7058 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -178,6 +178,7 @@ struct cifs_readdata; struct cifs_writedata; struct cifs_io_parms; struct cifs_search_info; +struct cifsInodeInfo; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, void *, @@ -330,6 +331,9 @@ struct smb_version_operations { unsigned int (*calc_smb_size)(void *); /* check for STATUS_PENDING and process it in a positive case */ bool (*is_status_pending)(char *, struct TCP_Server_Info *, int); + /* send oplock break response */ + int (*oplock_response)(struct cifs_tcon *, struct cifs_fid *, + struct cifsInodeInfo *); }; struct smb_version_values { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index fb6b4413255b..2418618118b6 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3404,6 +3404,7 @@ void cifs_oplock_break(struct work_struct *work) oplock_break); struct inode *inode = cfile->dentry->d_inode; struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; if (inode && S_ISREG(inode->i_mode)) { @@ -3431,10 +3432,8 @@ void cifs_oplock_break(struct work_struct *work) * disconnected since oplock already released by the server */ if (!cfile->oplock_break_cancelled) { - rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->fid.netfid, - current->tgid, 0, 0, 0, 0, - LOCKING_ANDX_OPLOCK_RELEASE, false, - cinode->clientCanCacheRead ? 1 : 0); + rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid, + cinode); cFYI(1, "Oplock release rc = %d", rc); } } diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 068d609bd02a..f55b2e3476e8 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -863,6 +863,15 @@ cifs_close_dir(const unsigned int xid, struct cifs_tcon *tcon, return CIFSFindClose(xid, tcon, fid->netfid); } +static int +cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, + struct cifsInodeInfo *cinode) +{ + return CIFSSMBLock(0, tcon, fid->netfid, current->tgid, 0, 0, 0, 0, + LOCKING_ANDX_OPLOCK_RELEASE, false, + cinode->clientCanCacheRead ? 1 : 0); +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -922,6 +931,7 @@ struct smb_version_operations smb1_operations = { .query_dir_next = cifs_query_dir_next, .close_dir = cifs_close_dir, .calc_smb_size = smbCalcSize, + .oplock_response = cifs_oplock_response, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 983c88a497914d60c91f431b05a8449ddda19167 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:33 -0700 Subject: CIFS: Add oplock break support for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2misc.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++-- fs/cifs/smb2ops.c | 22 ++++++++++++++++ fs/cifs/smb2pdu.c | 30 +++++++++++++++++++++ fs/cifs/smb2pdu.h | 10 +++++++ fs/cifs/smb2proto.h | 5 ++++ 5 files changed, 141 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 78225f517a60..01479a3fee8d 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -142,8 +142,8 @@ smb2_check_message(char *buf, unsigned int length) } if (smb2_rsp_struct_sizes[command] != pdu->StructureSize2) { - if (hdr->Status == 0 || - pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2) { + if (command != SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0 || + pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2)) { /* error packets have 9 byte structure size */ cERROR(1, "Illegal response size %u for command %d", le16_to_cpu(pdu->StructureSize2), command); @@ -162,6 +162,9 @@ smb2_check_message(char *buf, unsigned int length) if (4 + len != clc_len) { cFYI(1, "Calculated size %u length %u mismatch mid %llu", clc_len, 4 + len, mid); + /* Windows 7 server returns 24 bytes more */ + if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE) + return 0; /* server can return one byte more */ if (clc_len == 4 + len + 1) return 0; @@ -356,3 +359,72 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) CIFS_MOUNT_MAP_SPECIAL_CHR); return to; } + +bool +smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) +{ + struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer; + struct list_head *tmp, *tmp1, *tmp2; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + struct cifsInodeInfo *cinode; + struct cifsFileInfo *cfile; + + cFYI(1, "Checking for oplock break"); + + if (rsp->hdr.Command != SMB2_OPLOCK_BREAK) + return false; + + if (le16_to_cpu(rsp->StructureSize) != + smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { + return false; + } + + cFYI(1, "oplock level 0x%d", rsp->OplockLevel); + + /* look up tcon based on tid & uid */ + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + list_for_each(tmp1, &ses->tcon_list) { + tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); + + cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); + spin_lock(&cifs_file_list_lock); + list_for_each(tmp2, &tcon->openFileList) { + cfile = list_entry(tmp2, struct cifsFileInfo, + tlist); + if (rsp->PersistentFid != + cfile->fid.persistent_fid || + rsp->VolatileFid != + cfile->fid.volatile_fid) + continue; + + cFYI(1, "file id match, oplock break"); + cinode = CIFS_I(cfile->dentry->d_inode); + + if (!cinode->clientCanCacheAll && + rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE) + cfile->oplock_break_cancelled = true; + else + cfile->oplock_break_cancelled = false; + + smb2_set_oplock_level(cinode, + rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0); + + queue_work(cifsiod_wq, &cfile->oplock_break); + + spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_tcp_ses_lock); + return true; + } + spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_tcp_ses_lock); + cFYI(1, "No matching file for oplock break"); + return true; + } + } + spin_unlock(&cifs_tcp_ses_lock); + cFYI(1, "Can not process oplock break for non-existent connection"); + return false; +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 2d88c90ded0c..8c72e0768d12 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -65,6 +65,17 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add, server->in_flight--; if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP) rc = change_conf(server); + /* + * Sometimes server returns 0 credits on oplock break ack - we need to + * rebalance credits in this case. + */ + else if (server->in_flight > 0 && server->oplock_credits == 0 && + server->oplocks) { + if (server->credits > 1) { + server->credits--; + server->oplock_credits++; + } + } spin_unlock(&server->req_lock); wake_up(&server->request_q); if (rc) @@ -502,6 +513,15 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length) return true; } +static int +smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, + struct cifsInodeInfo *cinode) +{ + return SMB2_oplock_break(0, tcon, fid->persistent_fid, + fid->volatile_fid, + cinode->clientCanCacheRead ? 1 : 0); +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -519,6 +539,7 @@ struct smb_version_operations smb21_operations = { .dump_detail = smb2_dump_detail, .clear_stats = smb2_clear_stats, .print_stats = smb2_print_stats, + .is_oplock_break = smb2_is_valid_oplock_break, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, .negotiate_wsize = smb2_negotiate_wsize, @@ -556,6 +577,7 @@ struct smb_version_operations smb21_operations = { .close_dir = smb2_close_dir, .calc_smb_size = smb2_calc_size, .is_status_pending = smb2_is_status_pending, + .oplock_response = smb2_oplock_response, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index e97c256c8a42..566d86b0ad96 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1941,3 +1941,33 @@ SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, current->tgid, FILE_BASIC_INFORMATION, 1, (void **)&buf, &size); } + +int +SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, + const u64 persistent_fid, const u64 volatile_fid, + __u8 oplock_level) +{ + int rc; + struct smb2_oplock_break *req = NULL; + + cFYI(1, "SMB2_oplock_break"); + rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req); + + if (rc) + return rc; + + req->VolatileFid = volatile_fid; + req->PersistentFid = persistent_fid; + req->OplockLevel = oplock_level; + req->hdr.CreditRequest = cpu_to_le16(1); + + rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, CIFS_OBREAK_OP); + /* SMB2 buffer freed by function above */ + + if (rc) { + cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); + cFYI(1, "Send error in Oplock Break = %d", rc); + } + + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index e6ddd1f79706..8a0745764297 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -615,6 +615,16 @@ struct smb2_set_info_rsp { __le16 StructureSize; /* Must be 2 */ } __packed; +struct smb2_oplock_break { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 24 */ + __u8 OplockLevel; + __u8 Reserved; + __le32 Reserved2; + __u64 PersistentFid; + __u64 VolatileFid; +} __packed; + /* * PDU infolevel structure definitions * BB consider moving to a different header diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index d18339fcc8a2..de554b716fdf 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -49,6 +49,8 @@ extern int smb2_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, unsigned int nvec, struct mid_q_entry **ret_mid); extern void smb2_echo_request(struct work_struct *work); +extern bool smb2_is_valid_oplock_break(char *buffer, + struct TCP_Server_Info *srv); extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src); @@ -133,5 +135,8 @@ extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, FILE_BASIC_INFO *buf); +extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, + const u64 persistent_fid, const u64 volatile_fid, + const __u8 oplock_level); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From 76ec5e33846de386f44826f145cd725b92c23630 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:33 -0700 Subject: CIFS: Move statfs to ops struct Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 29 ++++------------------------- fs/cifs/cifsglob.h | 5 +++++ fs/cifs/smb1ops.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 2829f374dbf7..4dda4890d776 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -51,7 +51,6 @@ #ifdef CONFIG_CIFS_SMB2 #include "smb2pdu.h" #endif -#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ int cifsFYI = 0; int cifsERROR = 1; @@ -164,13 +163,12 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) struct super_block *sb = dentry->d_sb; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); - int rc = -EOPNOTSUPP; + struct TCP_Server_Info *server = tcon->ses->server; unsigned int xid; + int rc = 0; xid = get_xid(); - buf->f_type = CIFS_MAGIC_NUMBER; - /* * PATH_MAX may be too long - it would presumably be total path, * but note that some servers (includinng Samba 3) have a shorter @@ -182,27 +180,8 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = 0; /* undefined */ buf->f_ffree = 0; /* unlimited */ - /* - * We could add a second check for a QFS Unix capability bit - */ - if ((tcon->ses->capabilities & CAP_UNIX) && - (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability))) - rc = CIFSSMBQFSPosixInfo(xid, tcon, buf); - - /* - * Only need to call the old QFSInfo if failed on newer one, - * e.g. by OS/2. - **/ - if (rc && (tcon->ses->capabilities & CAP_NT_SMBS)) - rc = CIFSSMBQFSInfo(xid, tcon, buf); - - /* - * Some old Windows servers also do not support level 103, retry with - * older level one if old server failed the previous call or we - * bypassed it because we detected that this was an older LANMAN sess - */ - if (rc) - rc = SMBOldQFSInfo(xid, tcon, buf); + if (server->ops->queryfs) + rc = server->ops->queryfs(xid, tcon, buf); free_xid(xid); return 0; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a95c56dc7058..3c007fe641f9 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -32,6 +32,8 @@ #include "smb2pdu.h" #endif +#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ + /* * The sizes of various internal tables and strings */ @@ -334,6 +336,9 @@ struct smb_version_operations { /* send oplock break response */ int (*oplock_response)(struct cifs_tcon *, struct cifs_fid *, struct cifsInodeInfo *); + /* query remote filesystem */ + int (*queryfs)(const unsigned int, struct cifs_tcon *, + struct kstatfs *); }; struct smb_version_values { diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index f55b2e3476e8..f6c7a1c9a1b6 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -18,6 +18,7 @@ */ #include +#include #include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" @@ -872,6 +873,38 @@ cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, cinode->clientCanCacheRead ? 1 : 0); } +static int +cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon, + struct kstatfs *buf) +{ + int rc = -EOPNOTSUPP; + + buf->f_type = CIFS_MAGIC_NUMBER; + + /* + * We could add a second check for a QFS Unix capability bit + */ + if ((tcon->ses->capabilities & CAP_UNIX) && + (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability))) + rc = CIFSSMBQFSPosixInfo(xid, tcon, buf); + + /* + * Only need to call the old QFSInfo if failed on newer one, + * e.g. by OS/2. + **/ + if (rc && (tcon->ses->capabilities & CAP_NT_SMBS)) + rc = CIFSSMBQFSInfo(xid, tcon, buf); + + /* + * Some old Windows servers also do not support level 103, retry with + * older level one if old server failed the previous call or we + * bypassed it because we detected that this was an older LANMAN sess + */ + if (rc) + rc = SMBOldQFSInfo(xid, tcon, buf); + return rc; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -932,6 +965,7 @@ struct smb_version_operations smb1_operations = { .close_dir = cifs_close_dir, .calc_smb_size = smbCalcSize, .oplock_response = cifs_oplock_response, + .queryfs = cifs_queryfs, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 6fc05c25ca35e65ee1759dd803f23576a268f5ec Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 18 Sep 2012 16:20:34 -0700 Subject: CIFS: Add statfs support for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2glob.h | 2 ++ fs/cifs/smb2ops.c | 22 +++++++++++++++ fs/cifs/smb2pdu.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 19 +++++++++++++ fs/cifs/smb2proto.h | 3 ++ 5 files changed, 127 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index 05d429b1c37e..7c0e2143e775 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -23,6 +23,8 @@ #ifndef _SMB2_GLOB_H #define _SMB2_GLOB_H +#define SMB2_MAGIC_NUMBER 0xFE534D42 + /* ***************************************************************** * Constants go here diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 8c72e0768d12..3a8c68258026 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -18,12 +18,14 @@ */ #include +#include #include "cifsglob.h" #include "smb2pdu.h" #include "smb2proto.h" #include "cifsproto.h" #include "cifs_debug.h" #include "smb2status.h" +#include "smb2glob.h" static int change_conf(struct TCP_Server_Info *server) @@ -522,6 +524,25 @@ smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, cinode->clientCanCacheRead ? 1 : 0); } +static int +smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, + struct kstatfs *buf) +{ + int rc; + u64 persistent_fid, volatile_fid; + __le16 srch_path = 0; /* Null - open root of share */ + u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + + rc = SMB2_open(xid, tcon, &srch_path, &persistent_fid, &volatile_fid, + FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL); + if (rc) + return rc; + buf->f_type = SMB2_MAGIC_NUMBER; + rc = SMB2_QFS_info(xid, tcon, persistent_fid, volatile_fid, buf); + SMB2_close(xid, tcon, persistent_fid, volatile_fid); + return rc; +} + struct smb_version_operations smb21_operations = { .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -578,6 +599,7 @@ struct smb_version_operations smb21_operations = { .calc_smb_size = smb2_calc_size, .is_status_pending = smb2_is_status_pending, .oplock_response = smb2_oplock_response, + .queryfs = smb2_queryfs, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 566d86b0ad96..994c184ac9a9 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1971,3 +1971,84 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, return rc; } + +static void +copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf, + struct kstatfs *kst) +{ + kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) * + le32_to_cpu(pfs_inf->SectorsPerAllocationUnit); + kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits); + kst->f_bfree = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits); + kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); + return; +} + +static int +build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, + int outbuf_len, u64 persistent_fid, u64 volatile_fid) +{ + int rc; + struct smb2_query_info_req *req; + + cFYI(1, "Query FSInfo level %d", level); + + if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) + return -EIO; + + rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req); + if (rc) + return rc; + + req->InfoType = SMB2_O_INFO_FILESYSTEM; + req->FileInfoClass = level; + req->PersistentFileId = persistent_fid; + req->VolatileFileId = volatile_fid; + /* 4 for rfc1002 length field and 1 for pad */ + req->InputBufferOffset = + cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); + req->OutputBufferLength = cpu_to_le32( + outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - 4); + + iov->iov_base = (char *)req; + /* 4 for rfc1002 length field */ + iov->iov_len = get_rfc1002_length(req) + 4; + return 0; +} + +int +SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata) +{ + struct smb2_query_info_rsp *rsp = NULL; + struct kvec iov; + int rc = 0; + int resp_buftype; + struct cifs_ses *ses = tcon->ses; + struct smb2_fs_full_size_info *info = NULL; + + rc = build_qfs_info_req(&iov, tcon, FS_FULL_SIZE_INFORMATION, + sizeof(struct smb2_fs_full_size_info), + persistent_fid, volatile_fid); + if (rc) + return rc; + + rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, 0); + if (rc) { + cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); + goto qinf_exit; + } + rsp = (struct smb2_query_info_rsp *)iov.iov_base; + + info = (struct smb2_fs_full_size_info *)(4 /* RFC1001 len */ + + le16_to_cpu(rsp->OutputBufferOffset) + (char *)&rsp->hdr); + rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset), + le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr, + sizeof(struct smb2_fs_full_size_info)); + if (!rc) + copy_fs_info_to_kstatfs(info, fsdata); + +qinf_exit: + free_rsp_buf(resp_buftype, iov.iov_base); + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 8a0745764297..3c8e99ea88ad 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -630,6 +630,25 @@ struct smb2_oplock_break { * BB consider moving to a different header */ +/* File System Information Classes */ +#define FS_VOLUME_INFORMATION 1 /* Query */ +#define FS_LABEL_INFORMATION 2 /* Set */ +#define FS_SIZE_INFORMATION 3 /* Query */ +#define FS_DEVICE_INFORMATION 4 /* Query */ +#define FS_ATTRIBUTE_INFORMATION 5 /* Query */ +#define FS_CONTROL_INFORMATION 6 /* Query, Set */ +#define FS_FULL_SIZE_INFORMATION 7 /* Query */ +#define FS_OBJECT_ID_INFORMATION 8 /* Query, Set */ +#define FS_DRIVER_PATH_INFORMATION 9 /* Query */ + +struct smb2_fs_full_size_info { + __le64 TotalAllocationUnits; + __le64 CallerAvailableAllocationUnits; + __le64 ActualAvailableAllocationUnits; + __le32 SectorsPerAllocationUnit; + __le32 BytesPerSector; +} __packed; + /* partial list of QUERY INFO levels */ #define FILE_DIRECTORY_INFORMATION 1 #define FILE_FULL_DIRECTORY_INFORMATION 2 diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index de554b716fdf..a73a963af8f4 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -138,5 +138,8 @@ extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, const u64 persistent_fid, const u64 volatile_fid, const __u8 oplock_level); +extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_file_id, u64 volatile_file_id, + struct kstatfs *FSData); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From bf5ea0e2f29b00d4fe5f203d8e59120f797ce451 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Sep 2012 16:20:34 -0700 Subject: cifs: change signing routines to deal with smb_rqst structs We need a way to represent a call to be sent on the wire that does not require having all of the page data kmapped. Behold the smb_rqst struct. This new struct represents an array of kvecs immediately followed by an array of pages. Convert the signing routines to use these structs under the hood and turn the existing functions for this into wrappers around that. For now, we're just changing these functions to take different args. Later, we'll teach them how to deal with arrays of pages. Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsencrypt.c | 26 ++++++++++++++++++-------- fs/cifs/cifsglob.h | 14 ++++++++++++++ fs/cifs/cifsproto.h | 7 +++++-- fs/cifs/cifssmb.c | 7 ++++--- fs/cifs/transport.c | 4 +++- 5 files changed, 44 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 724738c1a560..af520522ef20 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -37,11 +37,13 @@ * the sequence number before this function is called. Also, this function * should be called with the server->srv_mutex held. */ -static int cifs_calc_signature(const struct kvec *iov, int n_vec, +static int cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, char *signature) { int i; int rc; + struct kvec *iov = rqst->rq_iov; + int n_vec = rqst->rq_nvec; if (iov == NULL || signature == NULL || server == NULL) return -EINVAL; @@ -99,12 +101,12 @@ static int cifs_calc_signature(const struct kvec *iov, int n_vec, } /* must be called with server->srv_mutex held */ -int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, +int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number) { int rc = 0; char smb_signature[20]; - struct smb_hdr *cifs_pdu = (struct smb_hdr *)iov[0].iov_base; + struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base; if ((cifs_pdu == NULL) || (server == NULL)) return -EINVAL; @@ -125,7 +127,7 @@ int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, *pexpected_response_sequence_number = server->sequence_number++; server->sequence_number++; - rc = cifs_calc_signature(iov, n_vec, server, smb_signature); + rc = cifs_calc_signature(rqst, server, smb_signature); if (rc) memset(cifs_pdu->Signature.SecuritySignature, 0, 8); else @@ -134,6 +136,15 @@ int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, return rc; } +int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, + __u32 *pexpected_response_sequence) +{ + struct smb_rqst rqst = { .rq_iov = iov, + .rq_nvec = n_vec }; + + return cifs_sign_rqst(&rqst, server, pexpected_response_sequence); +} + /* must be called with server->srv_mutex held */ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number) @@ -147,14 +158,14 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, pexpected_response_sequence_number); } -int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, +int cifs_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, __u32 expected_sequence_number) { unsigned int rc; char server_response_sig[8]; char what_we_think_sig_should_be[20]; - struct smb_hdr *cifs_pdu = (struct smb_hdr *)iov[0].iov_base; + struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base; if (cifs_pdu == NULL || server == NULL) return -EINVAL; @@ -186,8 +197,7 @@ int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, cifs_pdu->Signature.Sequence.Reserved = 0; mutex_lock(&server->srv_mutex); - rc = cifs_calc_signature(iov, nr_iov, server, - what_we_think_sig_should_be); + rc = cifs_calc_signature(rqst, server, what_we_think_sig_should_be); mutex_unlock(&server->srv_mutex); if (rc) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 3c007fe641f9..5ea50dd316c5 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -162,6 +162,20 @@ struct cifs_cred { ***************************************************************** */ +/* + * A smb_rqst represents a complete request to be issued to a server. It's + * formed by a kvec array, followed by an array of pages. Page data is assumed + * to start at the beginning of the first page. + */ +struct smb_rqst { + struct kvec *rq_iov; /* array of kvecs */ + unsigned int rq_nvec; /* number of kvecs in array */ + struct page **rq_pages; /* pointer to array of page ptrs */ + unsigned int rq_npages; /* number pages in array */ + unsigned int rq_pagesz; /* page size to use */ + unsigned int rq_tailsz; /* length of last page */ +}; + enum smb_version { Smb_1 = 1, Smb_21, diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index c7ad9a8cf82a..8e071a5a7da4 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -24,6 +24,7 @@ struct statfs; struct smb_vol; +struct smb_rqst; /* ***************************************************************** @@ -394,10 +395,12 @@ extern void sesInfoFree(struct cifs_ses *); extern struct cifs_tcon *tconInfoAlloc(void); extern void tconInfoFree(struct cifs_tcon *); -extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); +extern int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, + __u32 *pexpected_response_sequence_number); extern int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *, __u32 *); -extern int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, +extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); +extern int cifs_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, __u32 expected_sequence_number); extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index c4f43cf671dc..6786b5ee5326 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1541,6 +1541,8 @@ cifs_readv_callback(struct mid_q_entry *mid) struct cifs_readdata *rdata = mid->callback_data; struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; + struct smb_rqst rqst = { .rq_iov = rdata->iov, + .rq_nvec = rdata->nr_iov }; cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__, mid->mid, mid->mid_state, rdata->result, rdata->bytes); @@ -1552,9 +1554,8 @@ cifs_readv_callback(struct mid_q_entry *mid) (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { int rc = 0; - rc = cifs_verify_signature(rdata->iov, rdata->nr_iov, - server, - mid->sequence_number + 1); + rc = cifs_verify_signature(&rqst, server, + mid->sequence_number + 1); if (rc) cERROR(1, "SMB signature verification returned " "error = %d", rc); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index c4d7825dfc0a..bc9ccddad937 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -504,11 +504,13 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { struct kvec iov; int rc = 0; + struct smb_rqst rqst = { .rq_iov = &iov, + .rq_nvec = 1 }; iov.iov_base = mid->resp_buf; iov.iov_len = len; /* FIXME: add code to kill session */ - rc = cifs_verify_signature(&iov, 1, server, + rc = cifs_verify_signature(&rqst, server, mid->sequence_number + 1); if (rc) cERROR(1, "SMB signature verification returned error = " -- cgit v1.2.3 From 0b688cfc8b3472f5bad104abe0675a060e32ad7b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Sep 2012 16:20:34 -0700 Subject: cifs: change smb2 signing routines to use smb_rqst structs Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 5 +++-- fs/cifs/smb2proto.h | 4 ++-- fs/cifs/smb2transport.c | 44 ++++++++++++++++++++++---------------------- 3 files changed, 27 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 994c184ac9a9..e188d137cab2 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1297,6 +1297,8 @@ smb2_readv_callback(struct mid_q_entry *mid) struct TCP_Server_Info *server = tcon->ses->server; struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov[0].iov_base; unsigned int credits_received = 1; + struct smb_rqst rqst = { .rq_iov = rdata->iov, + .rq_nvec = rdata->nr_iov }; cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__, mid->mid, mid->mid_state, rdata->result, rdata->bytes); @@ -1309,8 +1311,7 @@ smb2_readv_callback(struct mid_q_entry *mid) (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { int rc; - rc = smb2_verify_signature2(rdata->iov, rdata->nr_iov, - server); + rc = smb2_verify_signature(&rqst, server); if (rc) cERROR(1, "SMB signature verification returned " "error = %d", rc); diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index a73a963af8f4..a9bda043e26e 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -26,6 +26,7 @@ #include struct statfs; +struct smb_rqst; /* ***************************************************************** @@ -39,8 +40,7 @@ extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr); extern __le16 *cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb); -extern int smb2_verify_signature2(struct kvec *, unsigned int, - struct TCP_Server_Info *); +extern int smb2_verify_signature(struct smb_rqst *, struct TCP_Server_Info *); extern int smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 66479f252ae5..1850d9ec3c90 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -39,12 +39,13 @@ #include "smb2glob.h" static int -smb2_calc_signature2(const struct kvec *iov, int n_vec, - struct TCP_Server_Info *server) +smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) { int i, rc; unsigned char smb2_signature[SMB2_HMACSHA256_SIZE]; unsigned char *sigptr = smb2_signature; + struct kvec *iov = rqst->rq_iov; + int n_vec = rqst->rq_nvec; struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); @@ -106,10 +107,10 @@ smb2_calc_signature2(const struct kvec *iov, int n_vec, /* must be called with server->srv_mutex held */ static int -smb2_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server) +smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) { int rc = 0; - struct smb2_hdr *smb2_pdu = iov[0].iov_base; + struct smb2_hdr *smb2_pdu = rqst->rq_iov[0].iov_base; if (!(smb2_pdu->Flags & SMB2_FLAGS_SIGNED) || server->tcpStatus == CifsNeedNegotiate) @@ -120,18 +121,17 @@ smb2_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server) return rc; } - rc = smb2_calc_signature2(iov, n_vec, server); + rc = smb2_calc_signature(rqst, server); return rc; } int -smb2_verify_signature2(struct kvec *iov, unsigned int n_vec, - struct TCP_Server_Info *server) +smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) { unsigned int rc; char server_response_sig[16]; - struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; + struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)rqst->rq_iov[0].iov_base; if ((smb2_pdu->Command == SMB2_NEGOTIATE) || (smb2_pdu->Command == SMB2_OPLOCK_BREAK) || @@ -157,7 +157,7 @@ smb2_verify_signature2(struct kvec *iov, unsigned int n_vec, memset(smb2_pdu->Signature, 0, SMB2_SIGNATURE_SIZE); mutex_lock(&server->srv_mutex); - rc = smb2_calc_signature2(iov, n_vec, server); + rc = smb2_calc_signature(rqst, server); mutex_unlock(&server->srv_mutex); if (rc) @@ -170,16 +170,6 @@ smb2_verify_signature2(struct kvec *iov, unsigned int n_vec, return 0; } -static int -smb2_verify_signature(struct smb2_hdr *smb2_pdu, struct TCP_Server_Info *server) -{ - struct kvec iov; - - iov.iov_base = (char *)smb2_pdu; - iov.iov_len = get_rfc1002_length(smb2_pdu) + 4; - return smb2_verify_signature2(&iov, 1, server); -} - /* * Set message id for the request. Should be called after wait_for_free_request * and when srv_mutex is held. @@ -258,6 +248,12 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error) { unsigned int len = get_rfc1002_length(mid->resp_buf); + struct kvec iov; + struct smb_rqst rqst = { .rq_iov = &iov, + .rq_nvec = 1 }; + + iov.iov_base = (char *)mid->resp_buf; + iov.iov_len = get_rfc1002_length(mid->resp_buf) + 4; dump_smb(mid->resp_buf, min_t(u32, 80, len)); /* convert the length into a more usable form */ @@ -265,7 +261,7 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) { int rc; - rc = smb2_verify_signature(mid->resp_buf, server); + rc = smb2_verify_signature(&rqst, server); if (rc) cERROR(1, "SMB signature verification returned error = " "%d", rc); @@ -281,13 +277,15 @@ smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, int rc; struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base; struct mid_q_entry *mid; + struct smb_rqst rqst = { .rq_iov = iov, + .rq_nvec = nvec }; smb2_seq_num_into_buf(ses->server, hdr); rc = smb2_get_mid_entry(ses, hdr, &mid); if (rc) return rc; - rc = smb2_sign_smb2(iov, nvec, ses->server); + rc = smb2_sign_rqst(&rqst, ses->server); if (rc) cifs_delete_mid(mid); *ret_mid = mid; @@ -301,6 +299,8 @@ smb2_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, int rc = 0; struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base; struct mid_q_entry *mid; + struct smb_rqst rqst = { .rq_iov = iov, + .rq_nvec = nvec }; smb2_seq_num_into_buf(server, hdr); @@ -308,7 +308,7 @@ smb2_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, if (mid == NULL) return -ENOMEM; - rc = smb2_sign_smb2(iov, nvec, server); + rc = smb2_sign_rqst(&rqst, server); if (rc) { DeleteMidQEntry(mid); return rc; -- cgit v1.2.3 From 6f49f46b187df34539f1e5df2469b8a541897700 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Sep 2012 16:20:34 -0700 Subject: cifs: convert send code to use smb_rqst structs Again, just a change in the arguments and some function renaming here. In later patches, we'll change this code to deal with page arrays. In this patch, we add a new smb_send_rqst wrapper and have smb_sendv call that. Then we move most of the existing smb_sendv code into a new function -- smb_send_kvec. This seems a little redundant, but later we'll flesh this out to deal with arrays of pages. Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 135 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 90 insertions(+), 45 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bc9ccddad937..766307b725bd 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -119,18 +119,29 @@ cifs_delete_mid(struct mid_q_entry *mid) DeleteMidQEntry(mid); } +/* + * smb_send_kvec - send an array of kvecs to the server + * @server: Server to send the data to + * @iov: Pointer to array of kvecs + * @n_vec: length of kvec array + * @sent: amount of data sent on socket is stored here + * + * Our basic "send data to server" function. Should be called with srv_mutex + * held. The caller is responsible for handling the results. + */ static int -smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) +smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec, + size_t *sent) { int rc = 0; int i = 0; struct msghdr smb_msg; - unsigned int len = iov[0].iov_len; - unsigned int total_len; - int first_vec = 0; - unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); + unsigned int remaining; + size_t first_vec = 0; struct socket *ssocket = server->ssocket; + *sent = 0; + if (ssocket == NULL) return -ENOTSOCK; /* BB eventually add reconnect code here */ @@ -143,56 +154,60 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) else smb_msg.msg_flags = MSG_NOSIGNAL; - total_len = 0; + remaining = 0; for (i = 0; i < n_vec; i++) - total_len += iov[i].iov_len; - - cFYI(1, "Sending smb: total_len %d", total_len); - dump_smb(iov[0].iov_base, len); + remaining += iov[i].iov_len; i = 0; - while (total_len) { + while (remaining) { + /* + * If blocking send, we try 3 times, since each can block + * for 5 seconds. For nonblocking we have to try more + * but wait increasing amounts of time allowing time for + * socket to clear. The overall time we wait in either + * case to send on the socket is about 15 seconds. + * Similarly we wait for 15 seconds for a response from + * the server in SendReceive[2] for the server to send + * a response back for most types of requests (except + * SMB Write past end of file which can be slow, and + * blocking lock operations). NFS waits slightly longer + * than CIFS, but this can make it take longer for + * nonresponsive servers to be detected and 15 seconds + * is more than enough time for modern networks to + * send a packet. In most cases if we fail to send + * after the retries we will kill the socket and + * reconnect which may clear the network problem. + */ rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec], - n_vec - first_vec, total_len); - if ((rc == -ENOSPC) || (rc == -EAGAIN)) { + n_vec - first_vec, remaining); + if (rc == -ENOSPC || rc == -EAGAIN) { i++; - /* - * If blocking send we try 3 times, since each can block - * for 5 seconds. For nonblocking we have to try more - * but wait increasing amounts of time allowing time for - * socket to clear. The overall time we wait in either - * case to send on the socket is about 15 seconds. - * Similarly we wait for 15 seconds for a response from - * the server in SendReceive[2] for the server to send - * a response back for most types of requests (except - * SMB Write past end of file which can be slow, and - * blocking lock operations). NFS waits slightly longer - * than CIFS, but this can make it take longer for - * nonresponsive servers to be detected and 15 seconds - * is more than enough time for modern networks to - * send a packet. In most cases if we fail to send - * after the retries we will kill the socket and - * reconnect which may clear the network problem. - */ - if ((i >= 14) || (!server->noblocksnd && (i > 2))) { - cERROR(1, "sends on sock %p stuck for 15 seconds", - ssocket); + if (i >= 14 || (!server->noblocksnd && (i > 2))) { + cERROR(1, "sends on sock %p stuck for 15 " + "seconds", ssocket); rc = -EAGAIN; break; } msleep(1 << i); continue; } + if (rc < 0) break; - if (rc == total_len) { - total_len = 0; + /* send was at least partially successful */ + *sent += rc; + + if (rc == remaining) { + remaining = 0; break; - } else if (rc > total_len) { - cERROR(1, "sent %d requested %d", rc, total_len); + } + + if (rc > remaining) { + cERROR(1, "sent %d requested %d", rc, remaining); break; } + if (rc == 0) { /* should never happen, letting socket clear before retrying is our only obvious option here */ @@ -200,7 +215,9 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) msleep(500); continue; } - total_len -= rc; + + remaining -= rc; + /* the line below resets i */ for (i = first_vec; i < n_vec; i++) { if (iov[i].iov_len) { @@ -215,16 +232,35 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) } } } + i = 0; /* in case we get ENOSPC on the next send */ + rc = 0; } + return rc; +} + +static int +smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) +{ + int rc; + struct kvec *iov = rqst->rq_iov; + int n_vec = rqst->rq_nvec; + unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); + size_t total_len; + + cFYI(1, "Sending smb: smb_len=%u", smb_buf_length); + dump_smb(iov[0].iov_base, iov[0].iov_len); + + rc = smb_send_kvec(server, iov, n_vec, &total_len); if ((total_len > 0) && (total_len != smb_buf_length + 4)) { - cFYI(1, "partial send (%d remaining), terminating session", - total_len); - /* If we have only sent part of an SMB then the next SMB - could be taken as the remainder of this one. We need - to kill the socket so the server throws away the partial - SMB */ + cFYI(1, "partial send (wanted=%u sent=%zu): terminating " + "session", smb_buf_length + 4, total_len); + /* + * If we have only sent part of an SMB then the next SMB could + * be taken as the remainder of this one. We need to kill the + * socket so the server throws away the partial SMB + */ server->tcpStatus = CifsNeedReconnect; } @@ -236,6 +272,15 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) return rc; } +static int +smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) +{ + struct smb_rqst rqst = { .rq_iov = iov, + .rq_nvec = n_vec }; + + return smb_send_rqst(server, &rqst); +} + int smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, unsigned int smb_buf_length) -- cgit v1.2.3 From b8eed28375a43e1c9aaa9d15af2a052aae0d0725 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Sep 2012 16:20:35 -0700 Subject: cifs: cork the socket before a send and uncork it afterward We want to send SMBs as "atomically" as possible. Prior to sending any data on the socket, cork it to make sure that no non-full frames go out. Afterward, uncork it to make sure all of the data gets pushed out to the wire. Note that this more or less renders the socket=TCP_NODELAY mount option obsolete. When TCP_CORK and TCP_NODELAY are used on the same socket, TCP_NODELAY is essentially ignored. Acked-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 4 ++++ fs/cifs/transport.c | 12 ++++++++++++ 2 files changed, 16 insertions(+) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 549409b1c776..5210bc82b1dc 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1680,6 +1680,10 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; + /* + * FIXME: since we now cork/uncork the socket while + * sending, should we deprecate this option? + */ if (strnicmp(string, "TCP_NODELAY", 11) == 0) vol->sockopt_tcp_nodelay = 1; break; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 766307b725bd..8ff4c5f36702 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -247,12 +248,23 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) int n_vec = rqst->rq_nvec; unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); size_t total_len; + struct socket *ssocket = server->ssocket; + int val = 1; cFYI(1, "Sending smb: smb_len=%u", smb_buf_length); dump_smb(iov[0].iov_base, iov[0].iov_len); + /* cork the socket */ + kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK, + (char *)&val, sizeof(val)); + rc = smb_send_kvec(server, iov, n_vec, &total_len); + /* uncork it */ + val = 0; + kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK, + (char *)&val, sizeof(val)); + if ((total_len > 0) && (total_len != smb_buf_length + 4)) { cFYI(1, "partial send (wanted=%u sent=%zu): terminating " "session", smb_buf_length + 4, total_len); -- cgit v1.2.3 From 97bc00b39408a4180eeeaa976d02d37121488997 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Sep 2012 16:20:35 -0700 Subject: cifs: teach smb_send_rqst how to handle arrays of pages Add code that allows smb_send_rqst to send an array of pages after the initial kvec array has been sent. For now, we simply kmap the page array and send it using the standard smb_send_kvec function. Eventually, we may want to convert this code to use kernel_sendpage under the hood and avoid the kmap altogether for the page data. Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 8ff4c5f36702..381dbb778478 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -240,6 +241,38 @@ smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec, return rc; } +/** + * rqst_page_to_kvec - Turn a slot in the smb_rqst page array into a kvec + * @rqst: pointer to smb_rqst + * @idx: index into the array of the page + * @iov: pointer to struct kvec that will hold the result + * + * Helper function to convert a slot in the rqst->rq_pages array into a kvec. + * The page will be kmapped and the address placed into iov_base. The length + * will then be adjusted according to the ptailoff. + */ +static void +cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, + struct kvec *iov) +{ + /* + * FIXME: We could avoid this kmap altogether if we used + * kernel_sendpage instead of kernel_sendmsg. That will only + * work if signing is disabled though as sendpage inlines the + * page directly into the fraglist. If userspace modifies the + * page after we calculate the signature, then the server will + * reject it and may break the connection. kernel_sendmsg does + * an extra copy of the data and avoids that issue. + */ + iov->iov_base = kmap(rqst->rq_pages[idx]); + + /* if last page, don't send beyond this offset into page */ + if (idx == (rqst->rq_npages - 1)) + iov->iov_len = rqst->rq_tailsz; + else + iov->iov_len = rqst->rq_pagesz; +} + static int smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) { @@ -247,7 +280,8 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); - size_t total_len; + unsigned int i; + size_t total_len = 0, sent; struct socket *ssocket = server->ssocket; int val = 1; @@ -258,8 +292,26 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK, (char *)&val, sizeof(val)); - rc = smb_send_kvec(server, iov, n_vec, &total_len); + rc = smb_send_kvec(server, iov, n_vec, &sent); + if (rc < 0) + goto uncork; + + total_len += sent; + + /* now walk the page array and send each page in it */ + for (i = 0; i < rqst->rq_npages; i++) { + struct kvec p_iov; + + cifs_rqst_page_to_kvec(rqst, i, &p_iov); + rc = smb_send_kvec(server, &p_iov, 1, &sent); + kunmap(rqst->rq_pages[i]); + if (rc < 0) + break; + + total_len += sent; + } +uncork: /* uncork it */ val = 0; kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK, -- cgit v1.2.3 From fb308a6f22f7f4f3574dab6b36c4a3598e50cf05 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Sep 2012 16:20:35 -0700 Subject: cifs: teach signing routines how to deal with arrays of pages in a smb_rqst Use the smb_send_rqst helper function to kmap each page in the array and update the hash for that chunk. Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsencrypt.c | 11 +++++++++++ fs/cifs/cifsproto.h | 2 ++ fs/cifs/smb2transport.c | 11 +++++++++++ fs/cifs/transport.c | 2 +- 4 files changed, 25 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index af520522ef20..652f5051be09 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -29,6 +29,7 @@ #include "ntlmssp.h" #include #include +#include /* * Calculate and return the CIFS signature based on the mac key and SMB PDU. @@ -93,6 +94,16 @@ static int cifs_calc_signature(struct smb_rqst *rqst, } } + /* now hash over the rq_pages array */ + for (i = 0; i < rqst->rq_npages; i++) { + struct kvec p_iov; + + cifs_rqst_page_to_kvec(rqst, i, &p_iov); + crypto_shash_update(&server->secmech.sdescmd5->shash, + p_iov.iov_base, p_iov.iov_len); + kunmap(rqst->rq_pages[i]); + } + rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); if (rc) cERROR(1, "%s: Could not generate md5 hash", __func__); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 8e071a5a7da4..e97a1843ab98 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -36,6 +36,8 @@ extern struct smb_hdr *cifs_buf_get(void); extern void cifs_buf_release(void *); extern struct smb_hdr *cifs_small_buf_get(void); extern void cifs_small_buf_release(void *); +extern void cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, + struct kvec *iov); extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *, unsigned int /* length */); extern unsigned int _get_xid(void); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 1850d9ec3c90..9ca4bcfb34c6 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "smb2pdu.h" #include "cifsglob.h" #include "cifsproto.h" @@ -95,6 +96,16 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) } } + /* now hash over the rq_pages array */ + for (i = 0; i < rqst->rq_npages; i++) { + struct kvec p_iov; + + cifs_rqst_page_to_kvec(rqst, i, &p_iov); + crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + p_iov.iov_base, p_iov.iov_len); + kunmap(rqst->rq_pages[i]); + } + rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash, sigptr); if (rc) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 381dbb778478..b6097344cd5b 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -251,7 +251,7 @@ smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec, * The page will be kmapped and the address placed into iov_base. The length * will then be adjusted according to the ptailoff. */ -static void +void cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, struct kvec *iov) { -- cgit v1.2.3 From fec344e3f31aa911297cd3a4639432d983b1f324 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Sep 2012 16:20:35 -0700 Subject: cifs: change cifs_call_async to use smb_rqst structs For now, none of the callers populate rq_pages. That will be done for writes in a later patch. While we're at it, change the prototype of setup_async_request not to need a return pointer argument. Just return the pointer to the mid_q_entry or an ERR_PTR. Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 8 +++---- fs/cifs/cifsproto.h | 16 +++++++------- fs/cifs/cifssmb.c | 20 +++++++++++------- fs/cifs/smb2pdu.c | 15 +++++++++---- fs/cifs/smb2proto.h | 9 ++++---- fs/cifs/smb2transport.c | 40 +++++++++++++++-------------------- fs/cifs/transport.c | 56 ++++++++++++++++++++++++------------------------- 7 files changed, 85 insertions(+), 79 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 5ea50dd316c5..a81790005e57 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -201,11 +201,11 @@ struct smb_version_operations { struct mid_q_entry *); bool (*compare_fids)(struct cifsFileInfo *, struct cifsFileInfo *); /* setup request: allocate mid, sign message */ - int (*setup_request)(struct cifs_ses *, struct kvec *, unsigned int, - struct mid_q_entry **); + struct mid_q_entry *(*setup_request)(struct cifs_ses *, + struct smb_rqst *); /* setup async request: allocate mid, sign message */ - int (*setup_async_request)(struct TCP_Server_Info *, struct kvec *, - unsigned int, struct mid_q_entry **); + struct mid_q_entry *(*setup_async_request)(struct TCP_Server_Info *, + struct smb_rqst *); /* check response: verify signature, map error */ int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *, bool); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index e97a1843ab98..3b628f2af258 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -70,20 +70,20 @@ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, extern void DeleteMidQEntry(struct mid_q_entry *midEntry); extern void cifs_delete_mid(struct mid_q_entry *mid); extern void cifs_wake_up_task(struct mid_q_entry *mid); -extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, - unsigned int nvec, mid_receive_t *receive, - mid_callback_t *callback, void *cbdata, - const int flags); +extern int cifs_call_async(struct TCP_Server_Info *server, + struct smb_rqst *rqst, + mid_receive_t *receive, mid_callback_t *callback, + void *cbdata, const int flags); extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *, struct smb_hdr * /* input */ , struct smb_hdr * /* out */ , int * /* bytes returned */ , const int); extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, char *in_buf, int flags); -extern int cifs_setup_request(struct cifs_ses *, struct kvec *, unsigned int, - struct mid_q_entry **); -extern int cifs_setup_async_request(struct TCP_Server_Info *, struct kvec *, - unsigned int, struct mid_q_entry **); +extern struct mid_q_entry *cifs_setup_request(struct cifs_ses *, + struct smb_rqst *); +extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *, + struct smb_rqst *); extern int cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 6786b5ee5326..2f86c84468cb 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -725,6 +725,8 @@ CIFSSMBEcho(struct TCP_Server_Info *server) ECHO_REQ *smb; int rc = 0; struct kvec iov; + struct smb_rqst rqst = { .rq_iov = &iov, + .rq_nvec = 1 }; cFYI(1, "In echo request"); @@ -742,7 +744,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server) iov.iov_base = smb; iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; - rc = cifs_call_async(server, &iov, 1, NULL, cifs_echo_callback, + rc = cifs_call_async(server, &rqst, NULL, cifs_echo_callback, server, CIFS_ASYNC_OP | CIFS_ECHO_OP); if (rc) cFYI(1, "Echo request failed: %d", rc); @@ -1585,6 +1587,8 @@ cifs_async_readv(struct cifs_readdata *rdata) READ_REQ *smb = NULL; int wct; struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); + struct smb_rqst rqst = { .rq_iov = rdata->iov, + .rq_nvec = 1 }; cFYI(1, "%s: offset=%llu bytes=%u", __func__, rdata->offset, rdata->bytes); @@ -1628,9 +1632,8 @@ cifs_async_readv(struct cifs_readdata *rdata) rdata->iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; kref_get(&rdata->refcount); - rc = cifs_call_async(tcon->ses->server, rdata->iov, 1, - cifs_readv_receive, cifs_readv_callback, - rdata, 0); + rc = cifs_call_async(tcon->ses->server, &rqst, cifs_readv_receive, + cifs_readv_callback, rdata, 0); if (rc == 0) cifs_stats_inc(&tcon->stats.cifs_stats.num_reads); @@ -2035,6 +2038,7 @@ cifs_async_writev(struct cifs_writedata *wdata) int wct; struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); struct kvec *iov = NULL; + struct smb_rqst rqst = { }; if (tcon->ses->capabilities & CAP_LARGE_FILES) { wct = 14; @@ -2051,11 +2055,13 @@ cifs_async_writev(struct cifs_writedata *wdata) goto async_writev_out; /* 1 iov per page + 1 for header */ - iov = kzalloc((wdata->nr_pages + 1) * sizeof(*iov), GFP_NOFS); + rqst.rq_nvec = wdata->nr_pages + 1; + iov = kzalloc((rqst.rq_nvec) * sizeof(*iov), GFP_NOFS); if (iov == NULL) { rc = -ENOMEM; goto async_writev_out; } + rqst.rq_iov = iov; smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid); smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16)); @@ -2104,8 +2110,8 @@ cifs_async_writev(struct cifs_writedata *wdata) } kref_get(&wdata->refcount); - rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1, - NULL, cifs_writev_callback, wdata, 0); + rc = cifs_call_async(tcon->ses->server, &rqst, NULL, + cifs_writev_callback, wdata, 0); if (rc == 0) cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index e188d137cab2..a04301b69b4e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1171,6 +1171,8 @@ SMB2_echo(struct TCP_Server_Info *server) struct smb2_echo_req *req; int rc = 0; struct kvec iov; + struct smb_rqst rqst = { .rq_iov = &iov, + .rq_nvec = 1 }; cFYI(1, "In echo request"); @@ -1184,7 +1186,7 @@ SMB2_echo(struct TCP_Server_Info *server) /* 4 for rfc1002 length field */ iov.iov_len = get_rfc1002_length(req) + 4; - rc = cifs_call_async(server, &iov, 1, NULL, smb2_echo_callback, server, + rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, server, CIFS_ECHO_OP); if (rc) cFYI(1, "Echo request failed: %d", rc); @@ -1344,6 +1346,8 @@ smb2_async_readv(struct cifs_readdata *rdata) int rc; struct smb2_hdr *buf; struct cifs_io_parms io_parms; + struct smb_rqst rqst = { .rq_iov = rdata->iov, + .rq_nvec = 1 }; cFYI(1, "%s: offset=%llu bytes=%u", __func__, rdata->offset, rdata->bytes); @@ -1363,7 +1367,7 @@ smb2_async_readv(struct cifs_readdata *rdata) rdata->iov[0].iov_len = get_rfc1002_length(rdata->iov[0].iov_base) + 4; kref_get(&rdata->refcount); - rc = cifs_call_async(io_parms.tcon->ses->server, rdata->iov, 1, + rc = cifs_call_async(io_parms.tcon->ses->server, &rqst, cifs_readv_receive, smb2_readv_callback, rdata, 0); if (rc) @@ -1484,6 +1488,7 @@ smb2_async_writev(struct cifs_writedata *wdata) struct smb2_write_req *req = NULL; struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); struct kvec *iov = NULL; + struct smb_rqst rqst; rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req); if (rc) @@ -1495,6 +1500,8 @@ smb2_async_writev(struct cifs_writedata *wdata) rc = -ENOMEM; goto async_writev_out; } + rqst.rq_iov = iov; + rqst.rq_nvec = wdata->nr_pages + 1; req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid); @@ -1530,8 +1537,8 @@ smb2_async_writev(struct cifs_writedata *wdata) inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */); kref_get(&wdata->refcount); - rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1, - NULL, smb2_writev_callback, wdata, 0); + rc = cifs_call_async(tcon->ses->server, &rqst, NULL, + smb2_writev_callback, wdata, 0); if (rc) kref_put(&wdata->refcount, cifs_writedata_release); diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index a9bda043e26e..aeb30dbdf8b8 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -43,11 +43,10 @@ extern __le16 *cifs_convert_path_to_utf16(const char *from, extern int smb2_verify_signature(struct smb_rqst *, struct TCP_Server_Info *); extern int smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); -extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, - unsigned int nvec, struct mid_q_entry **ret_mid); -extern int smb2_setup_async_request(struct TCP_Server_Info *server, - struct kvec *iov, unsigned int nvec, - struct mid_q_entry **ret_mid); +extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses, + struct smb_rqst *rqst); +extern struct mid_q_entry *smb2_setup_async_request( + struct TCP_Server_Info *server, struct smb_rqst *rqst); extern void smb2_echo_request(struct work_struct *work); extern bool smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 9ca4bcfb34c6..2a5fdf26f79f 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -281,50 +281,44 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, return map_smb2_to_linux_error(mid->resp_buf, log_error); } -int -smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, - unsigned int nvec, struct mid_q_entry **ret_mid) +struct mid_q_entry * +smb2_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst) { int rc; - struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base; + struct smb2_hdr *hdr = (struct smb2_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; - struct smb_rqst rqst = { .rq_iov = iov, - .rq_nvec = nvec }; smb2_seq_num_into_buf(ses->server, hdr); rc = smb2_get_mid_entry(ses, hdr, &mid); if (rc) - return rc; - rc = smb2_sign_rqst(&rqst, ses->server); - if (rc) + return ERR_PTR(rc); + rc = smb2_sign_rqst(rqst, ses->server); + if (rc) { cifs_delete_mid(mid); - *ret_mid = mid; - return rc; + return ERR_PTR(rc); + } + return mid; } -int -smb2_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, - unsigned int nvec, struct mid_q_entry **ret_mid) +struct mid_q_entry * +smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) { - int rc = 0; - struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base; + int rc; + struct smb2_hdr *hdr = (struct smb2_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; - struct smb_rqst rqst = { .rq_iov = iov, - .rq_nvec = nvec }; smb2_seq_num_into_buf(server, hdr); mid = smb2_mid_entry_alloc(hdr, server); if (mid == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - rc = smb2_sign_rqst(&rqst, server); + rc = smb2_sign_rqst(rqst, server); if (rc) { DeleteMidQEntry(mid); - return rc; + return ERR_PTR(rc); } - *ret_mid = mid; - return rc; + return mid; } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b6097344cd5b..2126ab185045 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -454,12 +454,11 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) return 0; } -int -cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, - unsigned int nvec, struct mid_q_entry **ret_mid) +struct mid_q_entry * +cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) { int rc; - struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base; + struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; /* enable signing if server requires it */ @@ -468,16 +467,15 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, mid = AllocMidQEntry(hdr, server); if (mid == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - rc = cifs_sign_smbv(iov, nvec, server, &mid->sequence_number); + rc = cifs_sign_rqst(rqst, server, &mid->sequence_number); if (rc) { DeleteMidQEntry(mid); - return rc; + return ERR_PTR(rc); } - *ret_mid = mid; - return 0; + return mid; } /* @@ -485,9 +483,9 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, * the result. Caller is responsible for dealing with timeouts. */ int -cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, - unsigned int nvec, mid_receive_t *receive, - mid_callback_t *callback, void *cbdata, const int flags) +cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, + mid_receive_t *receive, mid_callback_t *callback, + void *cbdata, const int flags) { int rc, timeout, optype; struct mid_q_entry *mid; @@ -500,12 +498,12 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, return rc; mutex_lock(&server->srv_mutex); - rc = server->ops->setup_async_request(server, iov, nvec, &mid); - if (rc) { + mid = server->ops->setup_async_request(server, rqst); + if (IS_ERR(mid)) { mutex_unlock(&server->srv_mutex); add_credits(server, 1, optype); wake_up(&server->request_q); - return rc; + return PTR_ERR(mid); } mid->receive = receive; @@ -520,7 +518,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, cifs_in_send_inc(server); - rc = smb_sendv(server, iov, nvec); + rc = smb_send_rqst(server, rqst); cifs_in_send_dec(server); cifs_save_when_sent(mid); mutex_unlock(&server->srv_mutex); @@ -630,22 +628,22 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, return map_smb_to_linux_error(mid->resp_buf, log_error); } -int -cifs_setup_request(struct cifs_ses *ses, struct kvec *iov, - unsigned int nvec, struct mid_q_entry **ret_mid) +struct mid_q_entry * +cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst) { int rc; - struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base; + struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; rc = allocate_mid(ses, hdr, &mid); if (rc) - return rc; - rc = cifs_sign_smbv(iov, nvec, ses->server, &mid->sequence_number); - if (rc) + return ERR_PTR(rc); + rc = cifs_sign_rqst(rqst, ses->server, &mid->sequence_number); + if (rc) { cifs_delete_mid(mid); - *ret_mid = mid; - return rc; + return ERR_PTR(rc); + } + return mid; } int @@ -658,6 +656,8 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, struct mid_q_entry *midQ; char *buf = iov[0].iov_base; unsigned int credits = 1; + struct smb_rqst rqst = { .rq_iov = iov, + .rq_nvec = n_vec }; timeout = flags & CIFS_TIMEOUT_MASK; optype = flags & CIFS_OP_MASK; @@ -695,13 +695,13 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, mutex_lock(&ses->server->srv_mutex); - rc = ses->server->ops->setup_request(ses, iov, n_vec, &midQ); - if (rc) { + midQ = ses->server->ops->setup_request(ses, &rqst); + if (IS_ERR(midQ)) { mutex_unlock(&ses->server->srv_mutex); cifs_small_buf_release(buf); /* Update # of requests on wire to server */ add_credits(ses->server, 1, optype); - return rc; + return PTR_ERR(midQ); } midQ->mid_state = MID_REQUEST_SUBMITTED; -- cgit v1.2.3 From eddb079deb4deb1259f87425094c7a586fc59313 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Sep 2012 16:20:35 -0700 Subject: cifs: convert async write code to pass in data via rq_pages array Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 4 ++-- fs/cifs/cifssmb.c | 39 +++++++++++---------------------------- fs/cifs/file.c | 48 ++++++++++-------------------------------------- fs/cifs/smb2pdu.c | 37 ++++++++++--------------------------- 4 files changed, 33 insertions(+), 95 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a81790005e57..cc70ac0bac47 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -999,8 +999,8 @@ struct cifs_writedata { pid_t pid; unsigned int bytes; int result; - void (*marshal_iov) (struct kvec *iov, - struct cifs_writedata *wdata); + unsigned int pagesz; + unsigned int tailsz; unsigned int nr_pages; struct page *pages[1]; }; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 2f86c84468cb..a110e0784221 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2033,11 +2033,11 @@ cifs_writev_callback(struct mid_q_entry *mid) int cifs_async_writev(struct cifs_writedata *wdata) { - int i, rc = -EACCES; + int rc = -EACCES; WRITE_REQ *smb = NULL; int wct; struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); - struct kvec *iov = NULL; + struct kvec iov; struct smb_rqst rqst = { }; if (tcon->ses->capabilities & CAP_LARGE_FILES) { @@ -2054,15 +2054,6 @@ cifs_async_writev(struct cifs_writedata *wdata) if (rc) goto async_writev_out; - /* 1 iov per page + 1 for header */ - rqst.rq_nvec = wdata->nr_pages + 1; - iov = kzalloc((rqst.rq_nvec) * sizeof(*iov), GFP_NOFS); - if (iov == NULL) { - rc = -ENOMEM; - goto async_writev_out; - } - rqst.rq_iov = iov; - smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid); smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16)); @@ -2079,18 +2070,15 @@ cifs_async_writev(struct cifs_writedata *wdata) cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4); /* 4 for RFC1001 length + 1 for BCC */ - iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1; - iov[0].iov_base = smb; + iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1; + iov.iov_base = smb; - /* - * This function should marshal up the page array into the kvec - * array, reserving [0] for the header. It should kmap the pages - * and set the iov_len properly for each one. It may also set - * wdata->bytes too. - */ - cifs_kmap_lock(); - wdata->marshal_iov(iov, wdata); - cifs_kmap_unlock(); + rqst.rq_iov = &iov; + rqst.rq_nvec = 1; + rqst.rq_pages = wdata->pages; + rqst.rq_npages = wdata->nr_pages; + rqst.rq_pagesz = wdata->pagesz; + rqst.rq_tailsz = wdata->tailsz; cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); @@ -2106,7 +2094,7 @@ cifs_async_writev(struct cifs_writedata *wdata) (struct smb_com_writex_req *)smb; inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5); put_bcc(wdata->bytes + 5, &smbw->hdr); - iov[0].iov_len += 4; /* pad bigger by four bytes */ + iov.iov_len += 4; /* pad bigger by four bytes */ } kref_get(&wdata->refcount); @@ -2118,13 +2106,8 @@ cifs_async_writev(struct cifs_writedata *wdata) else kref_put(&wdata->refcount, cifs_writedata_release); - /* send is done, unmap pages */ - for (i = 0; i < wdata->nr_pages; i++) - kunmap(wdata->pages[i]); - async_writev_out: cifs_small_buf_release(smb); - kfree(iov); return rc; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 2418618118b6..8a781226ae33 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1738,27 +1738,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) return rc; } -/* - * Marshal up the iov array, reserving the first one for the header. Also, - * set wdata->bytes. - */ -static void -cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata) -{ - int i; - struct inode *inode = wdata->cfile->dentry->d_inode; - loff_t size = i_size_read(inode); - - /* marshal up the pages into iov array */ - wdata->bytes = 0; - for (i = 0; i < wdata->nr_pages; i++) { - iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]), - (loff_t)PAGE_CACHE_SIZE); - iov[i + 1].iov_base = kmap(wdata->pages[i]); - wdata->bytes += iov[i + 1].iov_len; - } -} - static int cifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -1769,6 +1748,7 @@ static int cifs_writepages(struct address_space *mapping, struct TCP_Server_Info *server; struct page *page; int rc = 0; + loff_t isize = i_size_read(mapping->host); /* * If wsize is smaller than the page cache size, default to writing @@ -1873,7 +1853,7 @@ retry: */ set_page_writeback(page); - if (page_offset(page) >= mapping->host->i_size) { + if (page_offset(page) >= isize) { done = true; unlock_page(page); end_page_writeback(page); @@ -1904,7 +1884,12 @@ retry: wdata->sync_mode = wbc->sync_mode; wdata->nr_pages = nr_pages; wdata->offset = page_offset(wdata->pages[0]); - wdata->marshal_iov = cifs_writepages_marshal_iov; + wdata->pagesz = PAGE_CACHE_SIZE; + wdata->tailsz = + min(isize - page_offset(wdata->pages[nr_pages - 1]), + (loff_t)PAGE_CACHE_SIZE); + wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + + wdata->tailsz; do { if (wdata->cfile != NULL) @@ -2205,20 +2190,6 @@ size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len) return num_pages; } -static void -cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata) -{ - int i; - size_t bytes = wdata->bytes; - - /* marshal up the pages into iov array */ - for (i = 0; i < wdata->nr_pages; i++) { - iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE); - iov[i + 1].iov_base = kmap(wdata->pages[i]); - bytes -= iov[i + 1].iov_len; - } -} - static void cifs_uncached_writev_complete(struct work_struct *work) { @@ -2339,7 +2310,8 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, wdata->cfile = cifsFileInfo_get(open_file); wdata->pid = pid; wdata->bytes = cur_len; - wdata->marshal_iov = cifs_uncached_marshal_iov; + wdata->pagesz = PAGE_SIZE; + wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); rc = cifs_uncached_retry_writev(wdata); if (rc) { kref_put(&wdata->refcount, cifs_writedata_release); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a04301b69b4e..68023d23702b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1484,25 +1484,16 @@ smb2_writev_callback(struct mid_q_entry *mid) int smb2_async_writev(struct cifs_writedata *wdata) { - int i, rc = -EACCES; + int rc = -EACCES; struct smb2_write_req *req = NULL; struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); - struct kvec *iov = NULL; + struct kvec iov; struct smb_rqst rqst; rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req); if (rc) goto async_writev_out; - /* 1 iov per page + 1 for header */ - iov = kzalloc((wdata->nr_pages + 1) * sizeof(*iov), GFP_NOFS); - if (iov == NULL) { - rc = -ENOMEM; - goto async_writev_out; - } - rqst.rq_iov = iov; - rqst.rq_nvec = wdata->nr_pages + 1; - req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid); req->PersistentFileId = wdata->cfile->fid.persistent_fid; @@ -1517,18 +1508,15 @@ smb2_async_writev(struct cifs_writedata *wdata) req->RemainingBytes = 0; /* 4 for rfc1002 length field and 1 for Buffer */ - iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; - iov[0].iov_base = (char *)req; + iov.iov_len = get_rfc1002_length(req) + 4 - 1; + iov.iov_base = req; - /* - * This function should marshal up the page array into the kvec - * array, reserving [0] for the header. It should kmap the pages - * and set the iov_len properly for each one. It may also set - * wdata->bytes too. - */ - cifs_kmap_lock(); - wdata->marshal_iov(iov, wdata); - cifs_kmap_unlock(); + rqst.rq_iov = &iov; + rqst.rq_nvec = 1; + rqst.rq_pages = wdata->pages; + rqst.rq_npages = wdata->nr_pages; + rqst.rq_pagesz = wdata->pagesz; + rqst.rq_tailsz = wdata->tailsz; cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); @@ -1543,13 +1531,8 @@ smb2_async_writev(struct cifs_writedata *wdata) if (rc) kref_put(&wdata->refcount, cifs_writedata_release); - /* send is done, unmap pages */ - for (i = 0; i < wdata->nr_pages; i++) - kunmap(wdata->pages[i]); - async_writev_out: cifs_small_buf_release(req); - kfree(iov); return rc; } -- cgit v1.2.3 From c84ce4a7b22aa54017c61217f8a0e5e24d593da1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Sep 2012 16:20:36 -0700 Subject: cifs: remove the kmap size limit from wsize Now that we're not kmapping so much at once, there's no need to cap the wsize at the amount that can be simultaneously kmapped. Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/smb1ops.c | 3 --- fs/cifs/smb2ops.c | 3 --- 2 files changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index f6c7a1c9a1b6..8727ef712a3c 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -442,9 +442,6 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) wsize = min_t(unsigned int, wsize, server->maxBuf - sizeof(WRITE_REQ) + 4); - /* limit to the amount that we can kmap at once */ - wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT); - /* hard limit of CIFS_MAX_WSIZE */ wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 3a8c68258026..a621d6125367 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -187,9 +187,6 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) */ wsize = min_t(unsigned int, wsize, 2 << 15); - /* limit to the amount that we can kmap at once */ - wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT); - return wsize; } -- cgit v1.2.3 From 67c1f5295150eb86d065d57b4515a472ecbf008f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Sep 2012 16:20:36 -0700 Subject: cifs: add deprecation warning to sockopt=TCP_NODELAY option Now that we're using TCP_CORK on the socket, there's no value in continuting to support this option. Schedule it for removal in 3.9. Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton --- fs/cifs/connect.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5210bc82b1dc..443e39633107 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1680,12 +1680,13 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - /* - * FIXME: since we now cork/uncork the socket while - * sending, should we deprecate this option? - */ - if (strnicmp(string, "TCP_NODELAY", 11) == 0) + if (strnicmp(string, "TCP_NODELAY", 11) == 0) { + printk(KERN_WARNING "CIFS: the " + "sockopt=TCP_NODELAY option has been " + "deprecated and will be removed " + "in 3.9\n"); vol->sockopt_tcp_nodelay = 1; + } break; case Opt_netbiosname: string = match_strdup(args); -- cgit v1.2.3 From f4e49cd2dce2ccac6feae64fbb4e90f7d8baf370 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Sep 2012 16:20:36 -0700 Subject: cifs: allocate kvec array for cifs_readdata as a separate allocation Eventually, we're going to want to append a list of pages to cifs_readdata instead of a list of kvecs. To prepare for that, turn the kvec array allocation into a separate one and just keep a pointer to it in the readdata. Signed-off-by: Jeff Layton --- fs/cifs/cifsglob.h | 2 +- fs/cifs/file.c | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index cc70ac0bac47..737289b50ca5 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -982,7 +982,7 @@ struct cifs_readdata { int (*marshal_iov) (struct cifs_readdata *rdata, unsigned int remaining); unsigned int nr_iov; - struct kvec iov[1]; + struct kvec *iov; }; struct cifs_writedata; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8a781226ae33..61b7c834069f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2410,19 +2410,27 @@ ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, } static struct cifs_readdata * -cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete) +cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete) { struct cifs_readdata *rdata; + struct kvec *iov; - rdata = kzalloc(sizeof(*rdata) + - sizeof(struct kvec) * nr_vecs, GFP_KERNEL); + iov = kzalloc(sizeof(*iov) * (nr_pages + 1), GFP_KERNEL); + if (!iov) + return (struct cifs_readdata *)iov; + + rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); if (rdata != NULL) { kref_init(&rdata->refcount); INIT_LIST_HEAD(&rdata->list); init_completion(&rdata->done); INIT_WORK(&rdata->work, complete); INIT_LIST_HEAD(&rdata->pages); + rdata->iov = iov; + } else { + kfree(iov); } + return rdata; } @@ -2435,6 +2443,7 @@ cifs_readdata_release(struct kref *refcount) if (rdata->cfile) cifsFileInfo_put(rdata->cfile); + kfree(rdata->iov); kfree(rdata); } -- cgit v1.2.3 From c5fab6f4f081afcfcd7c1d444d9b900d6ef3e50b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Sep 2012 06:22:30 -0700 Subject: cifs: turn the pages list in cifs_readdata into an array We'll need an array to put into a smb_rqst, so convert this into an array instead of (ab)using the lru list_head. Signed-off-by: Jeff Layton --- fs/cifs/cifsglob.h | 3 +- fs/cifs/file.c | 87 +++++++++++++++++++++++++++++++----------------------- 2 files changed, 52 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 737289b50ca5..b70863ebedf2 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -977,12 +977,13 @@ struct cifs_readdata { unsigned int bytes; pid_t pid; int result; - struct list_head pages; struct work_struct work; int (*marshal_iov) (struct cifs_readdata *rdata, unsigned int remaining); unsigned int nr_iov; struct kvec *iov; + unsigned int nr_pages; + struct page *pages[]; }; struct cifs_writedata; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 61b7c834069f..6eaf48270c97 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2419,13 +2419,13 @@ cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete) if (!iov) return (struct cifs_readdata *)iov; - rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); + rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages), + GFP_KERNEL); if (rdata != NULL) { kref_init(&rdata->refcount); INIT_LIST_HEAD(&rdata->list); init_completion(&rdata->done); INIT_WORK(&rdata->work, complete); - INIT_LIST_HEAD(&rdata->pages); rdata->iov = iov; } else { kfree(iov); @@ -2448,25 +2448,25 @@ cifs_readdata_release(struct kref *refcount) } static int -cifs_read_allocate_pages(struct list_head *list, unsigned int npages) +cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages) { int rc = 0; - struct page *page, *tpage; + struct page *page; unsigned int i; - for (i = 0; i < npages; i++) { + for (i = 0; i < nr_pages; i++) { page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM); if (!page) { rc = -ENOMEM; break; } - list_add(&page->lru, list); + rdata->pages[i] = page; } if (rc) { - list_for_each_entry_safe(page, tpage, list, lru) { - list_del(&page->lru); - put_page(page); + for (i = 0; i < nr_pages; i++) { + put_page(rdata->pages[i]); + rdata->pages[i] = NULL; } } return rc; @@ -2475,13 +2475,13 @@ cifs_read_allocate_pages(struct list_head *list, unsigned int npages) static void cifs_uncached_readdata_release(struct kref *refcount) { - struct page *page, *tpage; struct cifs_readdata *rdata = container_of(refcount, struct cifs_readdata, refcount); + unsigned int i; - list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { - list_del(&page->lru); - put_page(page); + for (i = 0; i < rdata->nr_pages; i++) { + put_page(rdata->pages[i]); + rdata->pages[i] = NULL; } cifs_readdata_release(refcount); } @@ -2525,17 +2525,18 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov, int rc = 0; struct iov_iter ii; size_t pos = rdata->offset - offset; - struct page *page, *tpage; ssize_t remaining = rdata->bytes; unsigned char *pdata; + unsigned int i; /* set up iov_iter and advance to the correct offset */ iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0); iov_iter_advance(&ii, pos); *copied = 0; - list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { + for (i = 0; i < rdata->nr_pages; i++) { ssize_t copy; + struct page *page = rdata->pages[i]; /* copy a whole page or whatever's left */ copy = min_t(ssize_t, remaining, PAGE_SIZE); @@ -2555,9 +2556,6 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov, iov_iter_advance(&ii, copy); } } - - list_del(&page->lru); - put_page(page); } return rc; @@ -2568,13 +2566,12 @@ cifs_uncached_readv_complete(struct work_struct *work) { struct cifs_readdata *rdata = container_of(work, struct cifs_readdata, work); + unsigned int i; /* if the result is non-zero then the pages weren't kmapped */ if (rdata->result == 0) { - struct page *page; - - list_for_each_entry(page, &rdata->pages, lru) - kunmap(page); + for (i = 0; i < rdata->nr_pages; i++) + kunmap(rdata->pages[i]); } complete(&rdata->done); @@ -2586,10 +2583,13 @@ cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining) { int len = 0; - struct page *page, *tpage; + unsigned int i; + unsigned int nr_pages = rdata->nr_pages; rdata->nr_iov = 1; - list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { + for (i = 0; i < nr_pages; i++) { + struct page *page = rdata->pages[i]; + if (remaining >= PAGE_SIZE) { /* enough data to fill the page */ rdata->iov[rdata->nr_iov].iov_base = kmap(page); @@ -2616,7 +2616,8 @@ cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata, remaining = 0; } else { /* no need to hold page hostage */ - list_del(&page->lru); + rdata->pages[i] = NULL; + rdata->nr_pages--; put_page(page); } } @@ -2675,11 +2676,12 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, goto error; } - rc = cifs_read_allocate_pages(&rdata->pages, npages); + rc = cifs_read_allocate_pages(rdata, npages); if (rc) goto error; rdata->cfile = cifsFileInfo_get(open_file); + rdata->nr_pages = npages; rdata->offset = offset; rdata->bytes = cur_len; rdata->pid = pid; @@ -2923,12 +2925,13 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) static void cifs_readv_complete(struct work_struct *work) { + unsigned int i; struct cifs_readdata *rdata = container_of(work, struct cifs_readdata, work); - struct page *page, *tpage; - list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { - list_del(&page->lru); + for (i = 0; i < rdata->nr_pages; i++) { + struct page *page = rdata->pages[i]; + lru_cache_add_file(page); if (rdata->result == 0) { @@ -2943,6 +2946,7 @@ cifs_readv_complete(struct work_struct *work) cifs_readpage_to_fscache(rdata->mapping->host, page); page_cache_release(page); + rdata->pages[i] = NULL; } kref_put(&rdata->refcount, cifs_readdata_release); } @@ -2951,9 +2955,10 @@ static int cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining) { int len = 0; - struct page *page, *tpage; + unsigned int i; u64 eof; pgoff_t eof_index; + unsigned int nr_pages = rdata->nr_pages; /* determine the eof that the server (probably) has */ eof = CIFS_I(rdata->mapping->host)->server_eof; @@ -2961,7 +2966,9 @@ cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining) cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index); rdata->nr_iov = 1; - list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { + for (i = 0; i < nr_pages; i++) { + struct page *page = rdata->pages[i]; + if (remaining >= PAGE_CACHE_SIZE) { /* enough data to fill the page */ rdata->iov[rdata->nr_iov].iov_base = kmap(page); @@ -2996,18 +3003,20 @@ cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining) * fill them until the writes are flushed. */ zero_user(page, 0, PAGE_CACHE_SIZE); - list_del(&page->lru); lru_cache_add_file(page); flush_dcache_page(page); SetPageUptodate(page); unlock_page(page); page_cache_release(page); + rdata->pages[i] = NULL; + rdata->nr_pages--; } else { /* no need to hold page hostage */ - list_del(&page->lru); lru_cache_add_file(page); unlock_page(page); page_cache_release(page); + rdata->pages[i] = NULL; + rdata->nr_pages--; } } @@ -3065,6 +3074,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, * the rdata->pages, then we want them in increasing order. */ while (!list_empty(page_list)) { + unsigned int i; unsigned int bytes = PAGE_CACHE_SIZE; unsigned int expected_index; unsigned int nr_pages = 1; @@ -3135,13 +3145,16 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, rdata->bytes = bytes; rdata->pid = pid; rdata->marshal_iov = cifs_readpages_marshal_iov; - list_splice_init(&tmplist, &rdata->pages); + + list_for_each_entry_safe(page, tpage, &tmplist, lru) { + list_del(&page->lru); + rdata->pages[rdata->nr_pages++] = page; + } rc = cifs_retry_async_readv(rdata); if (rc != 0) { - list_for_each_entry_safe(page, tpage, &rdata->pages, - lru) { - list_del(&page->lru); + for (i = 0; i < rdata->nr_pages; i++) { + page = rdata->pages[i]; lru_cache_add_file(page); unlock_page(page); page_cache_release(page); -- cgit v1.2.3 From 8321fec436050b586cee448f2da0a6999e5172dd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Sep 2012 06:22:32 -0700 Subject: cifs: convert async read code to use pages array without kmapping Replace the "marshal_iov" function with a "read_into_pages" function. That function will copy the read data off the socket and into the pages array, kmapping and reading pages one at a time. Signed-off-by: Jeff Layton --- fs/cifs/cifsglob.h | 7 +++- fs/cifs/cifssmb.c | 27 +++++------- fs/cifs/file.c | 121 ++++++++++++++++++++++++++--------------------------- fs/cifs/smb2pdu.c | 6 ++- 4 files changed, 80 insertions(+), 81 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index b70863ebedf2..93e16200b2e8 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -978,8 +978,11 @@ struct cifs_readdata { pid_t pid; int result; struct work_struct work; - int (*marshal_iov) (struct cifs_readdata *rdata, - unsigned int remaining); + int (*read_into_pages)(struct TCP_Server_Info *server, + struct cifs_readdata *rdata, + unsigned int len); + unsigned int pagesz; + unsigned int tailsz; unsigned int nr_iov; struct kvec *iov; unsigned int nr_pages; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a110e0784221..5d7bd757dcf1 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1496,6 +1496,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* set up first iov for signature check */ rdata->iov[0].iov_base = buf; rdata->iov[0].iov_len = server->total_read; + rdata->nr_iov = 1; cFYI(1, "0: iov_base=%p iov_len=%zu", rdata->iov[0].iov_base, rdata->iov[0].iov_len); @@ -1507,23 +1508,11 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return cifs_readv_discard(server, mid); } - /* marshal up the page array */ - cifs_kmap_lock(); - len = rdata->marshal_iov(rdata, data_len); - cifs_kmap_unlock(); - data_len -= len; - - /* issue the read if we have any iovecs left to fill */ - if (rdata->nr_iov > 1) { - length = cifs_readv_from_socket(server, &rdata->iov[1], - rdata->nr_iov - 1, len); - if (length < 0) - return length; - server->total_read += length; - } else { - length = 0; - } + length = rdata->read_into_pages(server, rdata, data_len); + if (length < 0) + return length; + server->total_read += length; rdata->bytes = length; cFYI(1, "total_read=%u buflen=%u remaining=%u", server->total_read, @@ -1544,7 +1533,11 @@ cifs_readv_callback(struct mid_q_entry *mid) struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; struct smb_rqst rqst = { .rq_iov = rdata->iov, - .rq_nvec = rdata->nr_iov }; + .rq_nvec = rdata->nr_iov, + .rq_pages = rdata->pages, + .rq_npages = rdata->nr_pages, + .rq_pagesz = rdata->pagesz, + .rq_tailsz = rdata->tailsz }; cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__, mid->mid, mid->mid_state, rdata->result, rdata->bytes); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 6eaf48270c97..f3f1b1098a6c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2566,63 +2566,57 @@ cifs_uncached_readv_complete(struct work_struct *work) { struct cifs_readdata *rdata = container_of(work, struct cifs_readdata, work); - unsigned int i; - - /* if the result is non-zero then the pages weren't kmapped */ - if (rdata->result == 0) { - for (i = 0; i < rdata->nr_pages; i++) - kunmap(rdata->pages[i]); - } complete(&rdata->done); kref_put(&rdata->refcount, cifs_uncached_readdata_release); } static int -cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata, - unsigned int remaining) +cifs_uncached_read_into_pages(struct TCP_Server_Info *server, + struct cifs_readdata *rdata, unsigned int len) { - int len = 0; + int total_read = 0, result = 0; unsigned int i; unsigned int nr_pages = rdata->nr_pages; + struct kvec iov; - rdata->nr_iov = 1; + rdata->tailsz = PAGE_SIZE; for (i = 0; i < nr_pages; i++) { struct page *page = rdata->pages[i]; - if (remaining >= PAGE_SIZE) { + if (len >= PAGE_SIZE) { /* enough data to fill the page */ - rdata->iov[rdata->nr_iov].iov_base = kmap(page); - rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE; - cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", - rdata->nr_iov, page->index, - rdata->iov[rdata->nr_iov].iov_base, - rdata->iov[rdata->nr_iov].iov_len); - ++rdata->nr_iov; - len += PAGE_SIZE; - remaining -= PAGE_SIZE; - } else if (remaining > 0) { + iov.iov_base = kmap(page); + iov.iov_len = PAGE_SIZE; + cFYI(1, "%u: iov_base=%p iov_len=%zu", + i, iov.iov_base, iov.iov_len); + len -= PAGE_SIZE; + } else if (len > 0) { /* enough for partial page, fill and zero the rest */ - rdata->iov[rdata->nr_iov].iov_base = kmap(page); - rdata->iov[rdata->nr_iov].iov_len = remaining; - cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", - rdata->nr_iov, page->index, - rdata->iov[rdata->nr_iov].iov_base, - rdata->iov[rdata->nr_iov].iov_len); - memset(rdata->iov[rdata->nr_iov].iov_base + remaining, - '\0', PAGE_SIZE - remaining); - ++rdata->nr_iov; - len += remaining; - remaining = 0; + iov.iov_base = kmap(page); + iov.iov_len = len; + cFYI(1, "%u: iov_base=%p iov_len=%zu", + i, iov.iov_base, iov.iov_len); + memset(iov.iov_base + len, '\0', PAGE_SIZE - len); + rdata->tailsz = len; + len = 0; } else { /* no need to hold page hostage */ rdata->pages[i] = NULL; rdata->nr_pages--; put_page(page); + continue; } + + result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len); + kunmap(page); + if (result < 0) + break; + + total_read += result; } - return len; + return total_read > 0 ? total_read : result; } static ssize_t @@ -2685,7 +2679,8 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, rdata->offset = offset; rdata->bytes = cur_len; rdata->pid = pid; - rdata->marshal_iov = cifs_uncached_read_marshal_iov; + rdata->pagesz = PAGE_SIZE; + rdata->read_into_pages = cifs_uncached_read_into_pages; rc = cifs_retry_async_readv(rdata); error: @@ -2935,7 +2930,6 @@ cifs_readv_complete(struct work_struct *work) lru_cache_add_file(page); if (rdata->result == 0) { - kunmap(page); flush_dcache_page(page); SetPageUptodate(page); } @@ -2952,47 +2946,42 @@ cifs_readv_complete(struct work_struct *work) } static int -cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining) +cifs_readpages_read_into_pages(struct TCP_Server_Info *server, + struct cifs_readdata *rdata, unsigned int len) { - int len = 0; + int total_read = 0, result = 0; unsigned int i; u64 eof; pgoff_t eof_index; unsigned int nr_pages = rdata->nr_pages; + struct kvec iov; /* determine the eof that the server (probably) has */ eof = CIFS_I(rdata->mapping->host)->server_eof; eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index); - rdata->nr_iov = 1; + rdata->tailsz = PAGE_CACHE_SIZE; for (i = 0; i < nr_pages; i++) { struct page *page = rdata->pages[i]; - if (remaining >= PAGE_CACHE_SIZE) { + if (len >= PAGE_CACHE_SIZE) { /* enough data to fill the page */ - rdata->iov[rdata->nr_iov].iov_base = kmap(page); - rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE; + iov.iov_base = kmap(page); + iov.iov_len = PAGE_CACHE_SIZE; cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", - rdata->nr_iov, page->index, - rdata->iov[rdata->nr_iov].iov_base, - rdata->iov[rdata->nr_iov].iov_len); - ++rdata->nr_iov; - len += PAGE_CACHE_SIZE; - remaining -= PAGE_CACHE_SIZE; - } else if (remaining > 0) { + i, page->index, iov.iov_base, iov.iov_len); + len -= PAGE_CACHE_SIZE; + } else if (len > 0) { /* enough for partial page, fill and zero the rest */ - rdata->iov[rdata->nr_iov].iov_base = kmap(page); - rdata->iov[rdata->nr_iov].iov_len = remaining; + iov.iov_base = kmap(page); + iov.iov_len = len; cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", - rdata->nr_iov, page->index, - rdata->iov[rdata->nr_iov].iov_base, - rdata->iov[rdata->nr_iov].iov_len); - memset(rdata->iov[rdata->nr_iov].iov_base + remaining, - '\0', PAGE_CACHE_SIZE - remaining); - ++rdata->nr_iov; - len += remaining; - remaining = 0; + i, page->index, iov.iov_base, iov.iov_len); + memset(iov.iov_base + len, + '\0', PAGE_CACHE_SIZE - len); + rdata->tailsz = len; + len = 0; } else if (page->index > eof_index) { /* * The VFS will not try to do readahead past the @@ -3010,6 +2999,7 @@ cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining) page_cache_release(page); rdata->pages[i] = NULL; rdata->nr_pages--; + continue; } else { /* no need to hold page hostage */ lru_cache_add_file(page); @@ -3017,10 +3007,18 @@ cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining) page_cache_release(page); rdata->pages[i] = NULL; rdata->nr_pages--; + continue; } + + result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len); + kunmap(page); + if (result < 0) + break; + + total_read += result; } - return len; + return total_read > 0 ? total_read : result; } static int cifs_readpages(struct file *file, struct address_space *mapping, @@ -3144,7 +3142,8 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, rdata->offset = offset; rdata->bytes = bytes; rdata->pid = pid; - rdata->marshal_iov = cifs_readpages_marshal_iov; + rdata->pagesz = PAGE_CACHE_SIZE; + rdata->read_into_pages = cifs_readpages_read_into_pages; list_for_each_entry_safe(page, tpage, &tmplist, lru) { list_del(&page->lru); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 68023d23702b..e3efa47cd6ec 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1300,7 +1300,11 @@ smb2_readv_callback(struct mid_q_entry *mid) struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov[0].iov_base; unsigned int credits_received = 1; struct smb_rqst rqst = { .rq_iov = rdata->iov, - .rq_nvec = rdata->nr_iov }; + .rq_nvec = 1, + .rq_pages = rdata->pages, + .rq_npages = rdata->nr_pages, + .rq_pagesz = rdata->pagesz, + .rq_tailsz = rdata->tailsz }; cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__, mid->mid, mid->mid_state, rdata->result, rdata->bytes); -- cgit v1.2.3 From 5819575ec6b82345e1a21a960d381c699a91c700 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Sep 2012 06:22:34 -0700 Subject: cifs: replace kvec array in readdata with a single kvec The array is no longer needed. We just need a single kvec to hold the header for signature checking. Signed-off-by: Jeff Layton --- fs/cifs/cifsglob.h | 3 +-- fs/cifs/cifssmb.c | 29 ++++++++++++++--------------- fs/cifs/file.c | 9 --------- fs/cifs/smb2pdu.c | 12 ++++++------ 4 files changed, 21 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 93e16200b2e8..79e8b6f06021 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -981,10 +981,9 @@ struct cifs_readdata { int (*read_into_pages)(struct TCP_Server_Info *server, struct cifs_readdata *rdata, unsigned int len); + struct kvec iov; unsigned int pagesz; unsigned int tailsz; - unsigned int nr_iov; - struct kvec *iov; unsigned int nr_pages; struct page *pages[]; }; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 5d7bd757dcf1..88bbb3ef95b3 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1436,10 +1436,10 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) len = min_t(unsigned int, buflen, server->vals->read_rsp_size) - HEADER_SIZE(server) + 1; - rdata->iov[0].iov_base = buf + HEADER_SIZE(server) - 1; - rdata->iov[0].iov_len = len; + rdata->iov.iov_base = buf + HEADER_SIZE(server) - 1; + rdata->iov.iov_len = len; - length = cifs_readv_from_socket(server, rdata->iov, 1, len); + length = cifs_readv_from_socket(server, &rdata->iov, 1, len); if (length < 0) return length; server->total_read += length; @@ -1485,20 +1485,19 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) len = data_offset - server->total_read; if (len > 0) { /* read any junk before data into the rest of smallbuf */ - rdata->iov[0].iov_base = buf + server->total_read; - rdata->iov[0].iov_len = len; - length = cifs_readv_from_socket(server, rdata->iov, 1, len); + rdata->iov.iov_base = buf + server->total_read; + rdata->iov.iov_len = len; + length = cifs_readv_from_socket(server, &rdata->iov, 1, len); if (length < 0) return length; server->total_read += length; } /* set up first iov for signature check */ - rdata->iov[0].iov_base = buf; - rdata->iov[0].iov_len = server->total_read; - rdata->nr_iov = 1; + rdata->iov.iov_base = buf; + rdata->iov.iov_len = server->total_read; cFYI(1, "0: iov_base=%p iov_len=%zu", - rdata->iov[0].iov_base, rdata->iov[0].iov_len); + rdata->iov.iov_base, rdata->iov.iov_len); /* how much data is in the response? */ data_len = server->ops->read_data_length(buf); @@ -1532,8 +1531,8 @@ cifs_readv_callback(struct mid_q_entry *mid) struct cifs_readdata *rdata = mid->callback_data; struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; - struct smb_rqst rqst = { .rq_iov = rdata->iov, - .rq_nvec = rdata->nr_iov, + struct smb_rqst rqst = { .rq_iov = &rdata->iov, + .rq_nvec = 1, .rq_pages = rdata->pages, .rq_npages = rdata->nr_pages, .rq_pagesz = rdata->pagesz, @@ -1580,7 +1579,7 @@ cifs_async_readv(struct cifs_readdata *rdata) READ_REQ *smb = NULL; int wct; struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); - struct smb_rqst rqst = { .rq_iov = rdata->iov, + struct smb_rqst rqst = { .rq_iov = &rdata->iov, .rq_nvec = 1 }; cFYI(1, "%s: offset=%llu bytes=%u", __func__, @@ -1621,8 +1620,8 @@ cifs_async_readv(struct cifs_readdata *rdata) } /* 4 for RFC1001 length + 1 for BCC */ - rdata->iov[0].iov_base = smb; - rdata->iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; + rdata->iov.iov_base = smb; + rdata->iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; kref_get(&rdata->refcount); rc = cifs_call_async(tcon->ses->server, &rqst, cifs_readv_receive, diff --git a/fs/cifs/file.c b/fs/cifs/file.c index f3f1b1098a6c..2421ec28df14 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2413,11 +2413,6 @@ static struct cifs_readdata * cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete) { struct cifs_readdata *rdata; - struct kvec *iov; - - iov = kzalloc(sizeof(*iov) * (nr_pages + 1), GFP_KERNEL); - if (!iov) - return (struct cifs_readdata *)iov; rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages), GFP_KERNEL); @@ -2426,9 +2421,6 @@ cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete) INIT_LIST_HEAD(&rdata->list); init_completion(&rdata->done); INIT_WORK(&rdata->work, complete); - rdata->iov = iov; - } else { - kfree(iov); } return rdata; @@ -2443,7 +2435,6 @@ cifs_readdata_release(struct kref *refcount) if (rdata->cfile) cifsFileInfo_put(rdata->cfile); - kfree(rdata->iov); kfree(rdata); } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index e3efa47cd6ec..1b447612200e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1297,9 +1297,9 @@ smb2_readv_callback(struct mid_q_entry *mid) struct cifs_readdata *rdata = mid->callback_data; struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; - struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov[0].iov_base; + struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov.iov_base; unsigned int credits_received = 1; - struct smb_rqst rqst = { .rq_iov = rdata->iov, + struct smb_rqst rqst = { .rq_iov = &rdata->iov, .rq_nvec = 1, .rq_pages = rdata->pages, .rq_npages = rdata->nr_pages, @@ -1350,7 +1350,7 @@ smb2_async_readv(struct cifs_readdata *rdata) int rc; struct smb2_hdr *buf; struct cifs_io_parms io_parms; - struct smb_rqst rqst = { .rq_iov = rdata->iov, + struct smb_rqst rqst = { .rq_iov = &rdata->iov, .rq_nvec = 1 }; cFYI(1, "%s: offset=%llu bytes=%u", __func__, @@ -1362,13 +1362,13 @@ smb2_async_readv(struct cifs_readdata *rdata) io_parms.persistent_fid = rdata->cfile->fid.persistent_fid; io_parms.volatile_fid = rdata->cfile->fid.volatile_fid; io_parms.pid = rdata->pid; - rc = smb2_new_read_req(&rdata->iov[0], &io_parms, 0, 0); + rc = smb2_new_read_req(&rdata->iov, &io_parms, 0, 0); if (rc) return rc; - buf = (struct smb2_hdr *)rdata->iov[0].iov_base; + buf = (struct smb2_hdr *)rdata->iov.iov_base; /* 4 for rfc1002 length field */ - rdata->iov[0].iov_len = get_rfc1002_length(rdata->iov[0].iov_base) + 4; + rdata->iov.iov_len = get_rfc1002_length(rdata->iov.iov_base) + 4; kref_get(&rdata->refcount); rc = cifs_call_async(io_parms.tcon->ses->server, &rqst, -- cgit v1.2.3 From 71953fc6e4ce5ac05b594d8e5866accf531aa969 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Sep 2012 06:22:42 -0700 Subject: cifs: remove kmap lock and rsize limit Now that we aren't abusing the kmap address space, there's no need for this lock or to impose a limit on the rsize. Signed-off-by: Jeff Layton --- fs/cifs/cifsfs.c | 4 ---- fs/cifs/cifsglob.h | 39 --------------------------------------- fs/cifs/smb1ops.c | 3 --- fs/cifs/smb2ops.c | 3 --- 4 files changed, 49 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 4dda4890d776..3a3e2fee0b3e 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -88,10 +88,6 @@ extern mempool_t *cifs_mid_poolp; struct workqueue_struct *cifsiod_wq; -#ifdef CONFIG_HIGHMEM -DEFINE_MUTEX(cifs_kmap_mutex); -#endif - static int cifs_read_super(struct super_block *sb) { diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 79e8b6f06021..b2bb941d8ddd 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -637,45 +637,6 @@ get_next_mid(struct TCP_Server_Info *server) #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) -/* - * On hosts with high memory, we can't currently support wsize/rsize that are - * larger than we can kmap at once. Cap the rsize/wsize at - * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request - * larger than that anyway. - */ -#ifdef CONFIG_HIGHMEM -#define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE) -#else /* CONFIG_HIGHMEM */ -#define CIFS_KMAP_SIZE_LIMIT (1<<24) -#endif /* CONFIG_HIGHMEM */ - -#ifdef CONFIG_HIGHMEM -/* - * On arches that have high memory, kmap address space is limited. By - * serializing the kmap operations on those arches, we ensure that we don't - * end up with a bunch of threads in writeback with partially mapped page - * arrays, stuck waiting for kmap to come back. That situation prevents - * progress and can deadlock. - */ - -extern struct mutex cifs_kmap_mutex; - -static inline void -cifs_kmap_lock(void) -{ - mutex_lock(&cifs_kmap_mutex); -} - -static inline void -cifs_kmap_unlock(void) -{ - mutex_unlock(&cifs_kmap_mutex); -} -#else /* !CONFIG_HIGHMEM */ -#define cifs_kmap_lock() do { ; } while (0) -#define cifs_kmap_unlock() do { ; } while (0) -#endif /* CONFIG_HIGHMEM */ - /* * Macros to allow the TCP_Server_Info->net field and related code to drop out * when CONFIG_NET_NS isn't set. diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 8727ef712a3c..ed7f95532383 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -483,9 +483,6 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) if (!(server->capabilities & CAP_LARGE_READ_X)) rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); - /* limit to the amount that we can kmap at once */ - rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT); - /* hard limit of CIFS_MAX_RSIZE */ rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index a621d6125367..b1dedf8cb372 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -205,9 +205,6 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) */ rsize = min_t(unsigned int, rsize, 2 << 15); - /* limit to the amount that we can kmap at once */ - rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT); - return rsize; } -- cgit v1.2.3 From 1c0bd60b560cdf63a263f8ff3cebe9f99fe7a47c Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 06:22:43 -0700 Subject: CIFS: Add NTLMSSP sec type to defaults to let us negotiate SMB2 without specifying sec type explicitly. Signed-off-by: Pavel Shilovsky --- fs/cifs/cifsglob.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index b2bb941d8ddd..004672f9e16c 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1328,7 +1328,7 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ -#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2) +#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP) #define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) /* -- cgit v1.2.3 From f45d34167c67b083b54690e349e77f59062ef0ea Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 06:22:43 -0700 Subject: CIFS: Remove spinlock dependence in brlock processing Now we need to lock/unlock a spinlock while processing brlock ops on the inode. Move brlocks of a fid to a separate list and attach all such lists to the inode. This let us not hold a spinlock. Signed-off-by: Pavel Shilovsky --- fs/cifs/cifsfs.c | 1 + fs/cifs/cifsglob.h | 18 +++++++------- fs/cifs/file.c | 73 ++++++++++++++++++++++++++++++++---------------------- 3 files changed, 53 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3a3e2fee0b3e..e958d9438505 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -233,6 +233,7 @@ cifs_alloc_inode(struct super_block *sb) to zero by the VFS */ /* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME;*/ INIT_LIST_HEAD(&cifs_inode->openFileList); + INIT_LIST_HEAD(&cifs_inode->llist); return &cifs_inode->vfs_inode; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 004672f9e16c..b2eb577b5f3f 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -890,13 +890,16 @@ struct cifs_fid { #endif }; +struct cifs_fid_locks { + struct list_head llist; + struct cifsFileInfo *cfile; /* fid that owns locks */ + struct list_head locks; /* locks held by fid above */ +}; + struct cifsFileInfo { struct list_head tlist; /* pointer to next fid owned by tcon */ struct list_head flist; /* next fid (file instance) for this inode */ - struct list_head llist; /* - * brlocks held by this fid, protected by - * lock_mutex from cifsInodeInfo structure - */ + struct cifs_fid_locks *llist; /* brlocks held by this fid */ unsigned int uid; /* allows finding which FileInfo structure */ __u32 pid; /* process id who opened file */ struct cifs_fid fid; /* file id from remote */ @@ -988,11 +991,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file); struct cifsInodeInfo { bool can_cache_brlcks; - struct mutex lock_mutex; /* - * protect the field above and llist - * from every cifsFileInfo structure - * from openFileList - */ + struct list_head llist; /* locks helb by this inode */ + struct mutex lock_mutex; /* protect the fields above */ /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 2421ec28df14..63963730cb28 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -245,11 +245,25 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct inode *inode = dentry->d_inode; struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifsFileInfo *cfile; + struct cifs_fid_locks *fdlocks; cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); if (cfile == NULL) return cfile; + fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL); + if (!fdlocks) { + kfree(cfile); + return NULL; + } + + INIT_LIST_HEAD(&fdlocks->locks); + fdlocks->cfile = cfile; + cfile->llist = fdlocks; + mutex_lock(&cinode->lock_mutex); + list_add(&fdlocks->llist, &cinode->llist); + mutex_unlock(&cinode->lock_mutex); + cfile->count = 1; cfile->pid = current->tgid; cfile->uid = current_fsuid(); @@ -257,9 +271,8 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, cfile->f_flags = file->f_flags; cfile->invalidHandle = false; cfile->tlink = cifs_get_tlink(tlink); - mutex_init(&cfile->fh_mutex); INIT_WORK(&cfile->oplock_break, cifs_oplock_break); - INIT_LIST_HEAD(&cfile->llist); + mutex_init(&cfile->fh_mutex); tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock); spin_lock(&cifs_file_list_lock); @@ -336,15 +349,18 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) free_xid(xid); } - /* Delete any outstanding lock records. We'll lose them when the file + /* + * Delete any outstanding lock records. We'll lose them when the file * is closed anyway. */ mutex_lock(&cifsi->lock_mutex); - list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) { + list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { list_del(&li->llist); cifs_del_lock_waiters(li); kfree(li); } + list_del(&cifs_file->llist->llist); + kfree(cifs_file->llist); mutex_unlock(&cifsi->lock_mutex); cifs_put_tlink(cifs_file->tlink); @@ -691,25 +707,24 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock) } static bool -cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, - __u64 length, __u8 type, struct cifsFileInfo *cur, +cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, + __u64 length, __u8 type, struct cifsFileInfo *cfile, struct cifsLockInfo **conf_lock) { struct cifsLockInfo *li; + struct cifsFileInfo *cur_cfile = fdlocks->cfile; struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; - list_for_each_entry(li, &cfile->llist, llist) { + list_for_each_entry(li, &fdlocks->locks, llist) { if (offset + length <= li->offset || offset >= li->offset + li->length) continue; - else if ((type & server->vals->shared_lock_type) && - ((server->ops->compare_fids(cur, cfile) && - current->tgid == li->pid) || type == li->type)) + if ((type & server->vals->shared_lock_type) && + ((server->ops->compare_fids(cfile, cur_cfile) && + current->tgid == li->pid) || type == li->type)) continue; - else { - *conf_lock = li; - return true; - } + *conf_lock = li; + return true; } return false; } @@ -719,17 +734,15 @@ cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, __u8 type, struct cifsLockInfo **conf_lock) { bool rc = false; - struct cifsFileInfo *fid, *tmp; + struct cifs_fid_locks *cur; struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); - spin_lock(&cifs_file_list_lock); - list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) { - rc = cifs_find_fid_lock_conflict(fid, offset, length, type, + list_for_each_entry(cur, &cinode->llist, llist) { + rc = cifs_find_fid_lock_conflict(cur, offset, length, type, cfile, conf_lock); if (rc) break; } - spin_unlock(&cifs_file_list_lock); return rc; } @@ -777,7 +790,7 @@ cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) { struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); mutex_lock(&cinode->lock_mutex); - list_add_tail(&lock->llist, &cfile->llist); + list_add_tail(&lock->llist, &cfile->llist->locks); mutex_unlock(&cinode->lock_mutex); } @@ -803,7 +816,7 @@ try_again: exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, lock->type, &conf_lock); if (!exist && cinode->can_cache_brlcks) { - list_add_tail(&lock->llist, &cfile->llist); + list_add_tail(&lock->llist, &cfile->llist->locks); mutex_unlock(&cinode->lock_mutex); return rc; } @@ -937,7 +950,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) for (i = 0; i < 2; i++) { cur = buf; num = 0; - list_for_each_entry_safe(li, tmp, &cfile->llist, llist) { + list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { if (li->type != types[i]) continue; cur->Pid = cpu_to_le16(li->pid); @@ -1279,7 +1292,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, for (i = 0; i < 2; i++) { cur = buf; num = 0; - list_for_each_entry_safe(li, tmp, &cfile->llist, llist) { + list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { if (flock->fl_start > li->offset || (flock->fl_start + length) < (li->offset + li->length)) @@ -1320,7 +1333,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, * list to the head of the file's list. */ cifs_move_llist(&tmp_llist, - &cfile->llist); + &cfile->llist->locks); rc = stored_rc; } else /* @@ -1337,7 +1350,8 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, types[i], num, 0, buf); if (stored_rc) { - cifs_move_llist(&tmp_llist, &cfile->llist); + cifs_move_llist(&tmp_llist, + &cfile->llist->locks); rc = stored_rc; } else cifs_free_llist(&tmp_llist); @@ -1350,7 +1364,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, } static int -cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, +cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, bool wait_flag, bool posix_lck, int lock, int unlock, unsigned int xid) { @@ -1359,7 +1373,6 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; - __u16 netfid = cfile->fid.netfid; if (posix_lck) { int posix_lock_type; @@ -1376,9 +1389,9 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, if (unlock == 1) posix_lock_type = CIFS_UNLCK; - rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid, - flock->fl_start, length, NULL, - posix_lock_type, wait_flag); + rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, + current->tgid, flock->fl_start, length, + NULL, posix_lock_type, wait_flag); goto out; } -- cgit v1.2.3 From d39a4f710b7a7be05b6ed9d4ab8fac754c139f8a Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 06:22:43 -0700 Subject: CIFS: Move brlock code to ops struct Signed-off-by: Pavel Shilovsky --- fs/cifs/cifsglob.h | 8 ++++++++ fs/cifs/cifsproto.h | 3 +++ fs/cifs/file.c | 42 +++++++++++++++++------------------------- fs/cifs/smb1ops.c | 12 ++++++++++++ 4 files changed, 40 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index b2eb577b5f3f..d3c72713fec4 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -353,6 +353,14 @@ struct smb_version_operations { /* query remote filesystem */ int (*queryfs)(const unsigned int, struct cifs_tcon *, struct kstatfs *); + /* send mandatory brlock to the server */ + int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64, + __u64, __u32, int, int, bool); + /* unlock range of mandatory locks */ + int (*mand_unlock_range)(struct cifsFileInfo *, struct file_lock *, + const unsigned int); + /* push brlocks from the cache to the server */ + int (*push_mand_locks)(struct cifsFileInfo *); }; struct smb_version_values { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 3b628f2af258..a7e238f88898 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -124,6 +124,9 @@ extern u64 cifs_UnixTimeToNT(struct timespec); extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset); extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); +extern int cifs_unlock_range(struct cifsFileInfo *cfile, + struct file_lock *flock, const unsigned int xid); +extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile); extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 63963730cb28..90ab83647b82 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -903,7 +903,7 @@ try_again: return rc; } -static int +int cifs_push_mandatory_locks(struct cifsFileInfo *cfile) { unsigned int xid; @@ -1111,7 +1111,7 @@ cifs_push_locks(struct cifsFileInfo *cfile) ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) return cifs_push_posix_locks(cfile); - return cifs_push_mandatory_locks(cfile); + return tcon->ses->server->ops->push_mand_locks(cfile); } static void @@ -1161,15 +1161,6 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, cFYI(1, "Unknown type of lock"); } -static int -cifs_mandatory_lock(unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, - __u64 length, __u32 type, int lock, int unlock, bool wait) -{ - return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->fid.netfid, - current->tgid, length, offset, unlock, lock, - (__u8)type, wait, 0); -} - static int cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, bool wait_flag, bool posix_lck, unsigned int xid) @@ -1203,11 +1194,11 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, return rc; /* BB we could chain these into one lock request BB */ - rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type, - 1, 0, false); + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, + 1, 0, false); if (rc == 0) { - rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, - type, 0, 1, false); + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, + type, 0, 1, false); flock->fl_type = F_UNLCK; if (rc != 0) cERROR(1, "Error unlocking previously locked " @@ -1220,13 +1211,14 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, return 0; } - rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, - type | server->vals->shared_lock_type, 1, 0, - false); + type &= ~server->vals->exclusive_lock_type; + + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, + type | server->vals->shared_lock_type, + 1, 0, false); if (rc == 0) { - rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, - type | server->vals->shared_lock_type, - 0, 1, false); + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, + type | server->vals->shared_lock_type, 0, 1, false); flock->fl_type = F_RDLCK; if (rc != 0) cERROR(1, "Error unlocking previously locked " @@ -1256,7 +1248,7 @@ cifs_free_llist(struct list_head *llist) } } -static int +int cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, unsigned int xid) { @@ -1408,8 +1400,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, if (rc <= 0) goto out; - rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, - type, 1, 0, wait_flag); + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, + type, 1, 0, wait_flag); if (rc) { kfree(lock); goto out; @@ -1417,7 +1409,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, cifs_lock_add(cfile, lock); } else if (unlock) - rc = cifs_unlock_range(cfile, flock, xid); + rc = server->ops->mand_unlock_range(cfile, flock, xid); out: if (flock->fl_flags & FL_POSIX) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index ed7f95532383..5fb0fe5f72b6 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -899,6 +899,15 @@ cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +static int +cifs_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, + __u64 length, __u32 type, int lock, int unlock, bool wait) +{ + return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->fid.netfid, + current->tgid, length, offset, unlock, lock, + (__u8)type, wait, 0); +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -960,6 +969,9 @@ struct smb_version_operations smb1_operations = { .calc_smb_size = smbCalcSize, .oplock_response = cifs_oplock_response, .queryfs = cifs_queryfs, + .mand_lock = cifs_mand_lock, + .mand_unlock_range = cifs_unlock_range, + .push_mand_locks = cifs_push_mandatory_locks, }; struct smb_version_values smb1_values = { -- cgit v1.2.3 From 027e8eec31d8141a6231f772e10ccae60c9d5c13 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 06:22:43 -0700 Subject: CIFS: Handle SMB2 lock flags Signed-off-by: Pavel Shilovsky --- fs/cifs/smb2ops.c | 12 ++++++++++++ fs/cifs/smb2pdu.h | 5 +++++ 2 files changed, 17 insertions(+) (limited to 'fs') diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b1dedf8cb372..e4a59d1f06b1 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -537,7 +537,15 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +static bool +smb2_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2) +{ + return ob1->fid.persistent_fid == ob2->fid.persistent_fid && + ob1->fid.volatile_fid == ob2->fid.volatile_fid; +} + struct smb_version_operations smb21_operations = { + .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, .check_receive = smb2_check_receive, @@ -598,6 +606,10 @@ struct smb_version_operations smb21_operations = { struct smb_version_values smb21_values = { .version_string = SMB21_VERSION_STRING, + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, + .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, + .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, .header_size = sizeof(struct smb2_hdr), .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 3c8e99ea88ad..d2d132e94155 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -526,6 +526,11 @@ struct smb2_write_rsp { __u8 Buffer[1]; } __packed; +#define SMB2_LOCKFLAG_SHARED_LOCK 0x0001 +#define SMB2_LOCKFLAG_EXCLUSIVE_LOCK 0x0002 +#define SMB2_LOCKFLAG_UNLOCK 0x0004 +#define SMB2_LOCKFLAG_FAIL_IMMEDIATELY 0x0010 + struct smb2_echo_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 4 */ -- cgit v1.2.3 From f7ba7fe685bc3ed8fd0687870e68b2567d17357f Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 06:22:43 -0700 Subject: CIFS: Add brlock support for SMB2 Signed-off-by: Pavel Shilovsky --- fs/cifs/cifsproto.h | 4 +++ fs/cifs/file.c | 8 ++--- fs/cifs/smb2file.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2ops.c | 13 +++++++ fs/cifs/smb2pdu.c | 59 ++++++++++++++++++++++++++++++++ fs/cifs/smb2pdu.h | 24 +++++++++++++ fs/cifs/smb2proto.h | 10 ++++++ 7 files changed, 210 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index a7e238f88898..15e38dc389fc 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -190,6 +190,10 @@ extern void cifs_dfs_release_automount_timer(void); void cifs_proc_init(void); void cifs_proc_clean(void); +extern void cifs_move_llist(struct list_head *source, struct list_head *dest); +extern void cifs_free_llist(struct list_head *llist); +extern void cifs_del_lock_waiters(struct cifsLockInfo *lock); + extern int cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses); extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 90ab83647b82..e2a8e4456275 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -288,8 +288,6 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, return cfile; } -static void cifs_del_lock_waiters(struct cifsLockInfo *lock); - struct cifsFileInfo * cifsFileInfo_get(struct cifsFileInfo *cifs_file) { @@ -696,7 +694,7 @@ cifs_lock_init(__u64 offset, __u64 length, __u8 type) return lock; } -static void +void cifs_del_lock_waiters(struct cifsLockInfo *lock) { struct cifsLockInfo *li, *tmp; @@ -1229,7 +1227,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, return 0; } -static void +void cifs_move_llist(struct list_head *source, struct list_head *dest) { struct list_head *li, *tmp; @@ -1237,7 +1235,7 @@ cifs_move_llist(struct list_head *source, struct list_head *dest) list_move(li, dest); } -static void +void cifs_free_llist(struct list_head *llist) { struct cifsLockInfo *li, *tmp; diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 5ff25e025215..a25ea02149e7 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -104,3 +104,100 @@ out: kfree(smb2_path); return rc; } + +int +smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, + const unsigned int xid) +{ + int rc = 0, stored_rc; + unsigned int max_num, num = 0, max_buf; + struct smb2_lock_element *buf, *cur; + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + struct cifsLockInfo *li, *tmp; + __u64 length = 1 + flock->fl_end - flock->fl_start; + struct list_head tmp_llist; + + INIT_LIST_HEAD(&tmp_llist); + + /* + * Accessing maxBuf is racy with cifs_reconnect - need to store value + * and check it for zero before using. + */ + max_buf = tcon->ses->server->maxBuf; + if (!max_buf) + return -EINVAL; + + max_num = max_buf / sizeof(struct smb2_lock_element); + buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + cur = buf; + + mutex_lock(&cinode->lock_mutex); + list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { + if (flock->fl_start > li->offset || + (flock->fl_start + length) < + (li->offset + li->length)) + continue; + if (current->tgid != li->pid) + continue; + if (cinode->can_cache_brlcks) { + /* + * We can cache brlock requests - simply remove a lock + * from the file's list. + */ + list_del(&li->llist); + cifs_del_lock_waiters(li); + kfree(li); + continue; + } + cur->Length = cpu_to_le64(li->length); + cur->Offset = cpu_to_le64(li->offset); + cur->Flags = cpu_to_le32(SMB2_LOCKFLAG_UNLOCK); + /* + * We need to save a lock here to let us add it again to the + * file's list if the unlock range request fails on the server. + */ + list_move(&li->llist, &tmp_llist); + if (++num == max_num) { + stored_rc = smb2_lockv(xid, tcon, + cfile->fid.persistent_fid, + cfile->fid.volatile_fid, + current->tgid, num, buf); + if (stored_rc) { + /* + * We failed on the unlock range request - add + * all locks from the tmp list to the head of + * the file's list. + */ + cifs_move_llist(&tmp_llist, + &cfile->llist->locks); + rc = stored_rc; + } else + /* + * The unlock range request succeed - free the + * tmp list. + */ + cifs_free_llist(&tmp_llist); + cur = buf; + num = 0; + } else + cur++; + } + if (num) { + stored_rc = smb2_lockv(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, current->tgid, + num, buf); + if (stored_rc) { + cifs_move_llist(&tmp_llist, &cfile->llist->locks); + rc = stored_rc; + } else + cifs_free_llist(&tmp_llist); + } + mutex_unlock(&cinode->lock_mutex); + + kfree(buf); + return rc; +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e4a59d1f06b1..caed2c57896d 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -544,6 +544,17 @@ smb2_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2) ob1->fid.volatile_fid == ob2->fid.volatile_fid; } +static int +smb2_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, + __u64 length, __u32 type, int lock, int unlock, bool wait) +{ + if (unlock && !lock) + type = SMB2_LOCKFLAG_UNLOCK; + return SMB2_lock(xid, tlink_tcon(cfile->tlink), + cfile->fid.persistent_fid, cfile->fid.volatile_fid, + current->tgid, length, offset, type, wait); +} + struct smb_version_operations smb21_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, @@ -602,6 +613,8 @@ struct smb_version_operations smb21_operations = { .is_status_pending = smb2_is_status_pending, .oplock_response = smb2_oplock_response, .queryfs = smb2_queryfs, + .mand_lock = smb2_mand_lock, + .mand_unlock_range = smb2_unlock_range, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 1b447612200e..d3e1cfca3379 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2047,3 +2047,62 @@ qinf_exit: free_rsp_buf(resp_buftype, iov.iov_base); return rc; } + +int +smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, + const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid, + const __u32 num_lock, struct smb2_lock_element *buf) +{ + int rc = 0; + struct smb2_lock_req *req = NULL; + struct kvec iov[2]; + int resp_buf_type; + unsigned int count; + + cFYI(1, "smb2_lockv num lock %d", num_lock); + + rc = small_smb2_init(SMB2_LOCK, tcon, (void **) &req); + if (rc) + return rc; + + req->hdr.ProcessId = cpu_to_le32(pid); + req->LockCount = cpu_to_le16(num_lock); + + req->PersistentFileId = persist_fid; + req->VolatileFileId = volatile_fid; + + count = num_lock * sizeof(struct smb2_lock_element); + inc_rfc1001_len(req, count - sizeof(struct smb2_lock_element)); + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field and count for all locks */ + iov[0].iov_len = get_rfc1002_length(req) + 4 - count; + iov[1].iov_base = (char *)buf; + iov[1].iov_len = count; + + cifs_stats_inc(&tcon->stats.cifs_stats.num_locks); + rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP); + if (rc) { + cFYI(1, "Send error in smb2_lockv = %d", rc); + cifs_stats_fail_inc(tcon, SMB2_LOCK_HE); + } + + return rc; +} + +int +SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon, + const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid, + const __u64 length, const __u64 offset, const __u32 lock_flags, + const bool wait) +{ + struct smb2_lock_element lock; + + lock.Offset = cpu_to_le64(offset); + lock.Length = cpu_to_le64(length); + lock.Flags = cpu_to_le32(lock_flags); + if (!wait && lock_flags != SMB2_LOCKFLAG_UNLOCK) + lock.Flags |= cpu_to_le32(SMB2_LOCKFLAG_FAIL_IMMEDIATELY); + + return smb2_lockv(xid, tcon, persist_fid, volatile_fid, pid, 1, &lock); +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index d2d132e94155..889ee5e193d9 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -531,6 +531,30 @@ struct smb2_write_rsp { #define SMB2_LOCKFLAG_UNLOCK 0x0004 #define SMB2_LOCKFLAG_FAIL_IMMEDIATELY 0x0010 +struct smb2_lock_element { + __le64 Offset; + __le64 Length; + __le32 Flags; + __le32 Reserved; +} __packed; + +struct smb2_lock_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 48 */ + __le16 LockCount; + __le32 Reserved; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + /* Followed by at least one */ + struct smb2_lock_element locks[1]; +} __packed; + +struct smb2_lock_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 4 */ + __le16 Reserved; +} __packed; + struct smb2_echo_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 4 */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index aeb30dbdf8b8..ab19152b092b 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -84,6 +84,8 @@ extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf, struct cifs_sb_info *cifs_sb); extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); +extern int smb2_unlock_range(struct cifsFileInfo *cfile, + struct file_lock *flock, const unsigned int xid); /* * SMB2 Worker functions - most of protocol specific implementation details @@ -140,5 +142,13 @@ extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, struct kstatfs *FSData); +extern int SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon, + const __u64 persist_fid, const __u64 volatile_fid, + const __u32 pid, const __u64 length, const __u64 offset, + const __u32 lockFlags, const bool wait); +extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, + const __u64 persist_fid, const __u64 volatile_fid, + const __u32 pid, const __u32 num_lock, + struct smb2_lock_element *buf); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From b140799a11adb6023d5f96712874c37b71dab290 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 06:22:44 -0700 Subject: CIFS: Use brlock cache for SMB2 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2file.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2ops.c | 3 +- fs/cifs/smb2proto.h | 1 + 3 files changed, 94 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index a25ea02149e7..181e13d9f9db 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -201,3 +201,94 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, kfree(buf); return rc; } + +static int +smb2_push_mand_fdlocks(struct cifs_fid_locks *fdlocks, const unsigned int xid, + struct smb2_lock_element *buf, unsigned int max_num) +{ + int rc = 0, stored_rc; + struct cifsFileInfo *cfile = fdlocks->cfile; + struct cifsLockInfo *li; + unsigned int num = 0; + struct smb2_lock_element *cur = buf; + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + + list_for_each_entry(li, &fdlocks->locks, llist) { + cur->Length = cpu_to_le64(li->length); + cur->Offset = cpu_to_le64(li->offset); + cur->Flags = cpu_to_le32(li->type | + SMB2_LOCKFLAG_FAIL_IMMEDIATELY); + if (++num == max_num) { + stored_rc = smb2_lockv(xid, tcon, + cfile->fid.persistent_fid, + cfile->fid.volatile_fid, + current->tgid, num, buf); + if (stored_rc) + rc = stored_rc; + cur = buf; + num = 0; + } else + cur++; + } + if (num) { + stored_rc = smb2_lockv(xid, tcon, + cfile->fid.persistent_fid, + cfile->fid.volatile_fid, + current->tgid, num, buf); + if (stored_rc) + rc = stored_rc; + } + + return rc; +} + +int +smb2_push_mandatory_locks(struct cifsFileInfo *cfile) +{ + int rc = 0, stored_rc; + unsigned int xid; + unsigned int max_num, max_buf; + struct smb2_lock_element *buf; + struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + struct cifs_fid_locks *fdlocks; + + xid = get_xid(); + mutex_lock(&cinode->lock_mutex); + if (!cinode->can_cache_brlcks) { + mutex_unlock(&cinode->lock_mutex); + free_xid(xid); + return rc; + } + + /* + * Accessing maxBuf is racy with cifs_reconnect - need to store value + * and check it for zero before using. + */ + max_buf = tlink_tcon(cfile->tlink)->ses->server->maxBuf; + if (!max_buf) { + mutex_unlock(&cinode->lock_mutex); + free_xid(xid); + return -EINVAL; + } + + max_num = max_buf / sizeof(struct smb2_lock_element); + buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL); + if (!buf) { + mutex_unlock(&cinode->lock_mutex); + free_xid(xid); + return -ENOMEM; + } + + list_for_each_entry(fdlocks, &cinode->llist, llist) { + stored_rc = smb2_push_mand_fdlocks(fdlocks, xid, buf, max_num); + if (stored_rc) + rc = stored_rc; + } + + cinode->can_cache_brlcks = false; + kfree(buf); + + mutex_unlock(&cinode->lock_mutex); + free_xid(xid); + return rc; +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index caed2c57896d..0808b238219b 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -371,7 +371,7 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) cfile->fid.persistent_fid = fid->persistent_fid; cfile->fid.volatile_fid = fid->volatile_fid; smb2_set_oplock_level(cinode, oplock); - /* cinode->can_cache_brlcks = cinode->clientCanCacheAll; */ + cinode->can_cache_brlcks = cinode->clientCanCacheAll; } static int @@ -615,6 +615,7 @@ struct smb_version_operations smb21_operations = { .queryfs = smb2_queryfs, .mand_lock = smb2_mand_lock, .mand_unlock_range = smb2_unlock_range, + .push_mand_locks = smb2_push_mandatory_locks, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index ab19152b092b..8b4d3712255b 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -86,6 +86,7 @@ extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); extern int smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, const unsigned int xid); +extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile); /* * SMB2 Worker functions - most of protocol specific implementation details -- cgit v1.2.3 From 1b4b55a1d9404f51aacf1ff19887eb911484057d Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 06:22:44 -0700 Subject: CIFS: Turn lock mutex into rw semaphore and allow several processes to walk through the lock list and read can_cache_brlcks value if they are not going to modify them. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 2 +- fs/cifs/cifsglob.h | 2 +- fs/cifs/file.c | 60 ++++++++++++++++++++++++++++-------------------------- fs/cifs/smb2file.c | 15 +++++++------- 4 files changed, 41 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e958d9438505..4cd68c77ce39 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -937,7 +937,7 @@ cifs_init_once(void *inode) struct cifsInodeInfo *cifsi = inode; inode_init_once(&cifsi->vfs_inode); - mutex_init(&cifsi->lock_mutex); + init_rwsem(&cifsi->lock_sem); } static int diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index d3c72713fec4..e2492e1cdb85 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1000,7 +1000,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file); struct cifsInodeInfo { bool can_cache_brlcks; struct list_head llist; /* locks helb by this inode */ - struct mutex lock_mutex; /* protect the fields above */ + struct rw_semaphore lock_sem; /* protect the fields above */ /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e2a8e4456275..1adff75dfffc 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -260,9 +260,9 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, INIT_LIST_HEAD(&fdlocks->locks); fdlocks->cfile = cfile; cfile->llist = fdlocks; - mutex_lock(&cinode->lock_mutex); + down_write(&cinode->lock_sem); list_add(&fdlocks->llist, &cinode->llist); - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); cfile->count = 1; cfile->pid = current->tgid; @@ -351,7 +351,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) * Delete any outstanding lock records. We'll lose them when the file * is closed anyway. */ - mutex_lock(&cifsi->lock_mutex); + down_write(&cifsi->lock_sem); list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { list_del(&li->llist); cifs_del_lock_waiters(li); @@ -359,7 +359,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) } list_del(&cifs_file->llist->llist); kfree(cifs_file->llist); - mutex_unlock(&cifsi->lock_mutex); + up_write(&cifsi->lock_sem); cifs_put_tlink(cifs_file->tlink); dput(cifs_file->dentry); @@ -762,7 +762,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; bool exist; - mutex_lock(&cinode->lock_mutex); + down_read(&cinode->lock_sem); exist = cifs_find_lock_conflict(cfile, offset, length, type, &conf_lock); @@ -779,7 +779,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, else flock->fl_type = F_UNLCK; - mutex_unlock(&cinode->lock_mutex); + up_read(&cinode->lock_sem); return rc; } @@ -787,9 +787,9 @@ static void cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) { struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); - mutex_lock(&cinode->lock_mutex); + down_write(&cinode->lock_sem); list_add_tail(&lock->llist, &cfile->llist->locks); - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); } /* @@ -809,13 +809,13 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, try_again: exist = false; - mutex_lock(&cinode->lock_mutex); + down_write(&cinode->lock_sem); exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, lock->type, &conf_lock); if (!exist && cinode->can_cache_brlcks) { list_add_tail(&lock->llist, &cfile->llist->locks); - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); return rc; } @@ -825,17 +825,17 @@ try_again: rc = -EACCES; else { list_add_tail(&lock->blist, &conf_lock->blist); - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); rc = wait_event_interruptible(lock->block_q, (lock->blist.prev == &lock->blist) && (lock->blist.next == &lock->blist)); if (!rc) goto try_again; - mutex_lock(&cinode->lock_mutex); + down_write(&cinode->lock_sem); list_del_init(&lock->blist); } - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); return rc; } @@ -856,7 +856,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock) if ((flock->fl_flags & FL_POSIX) == 0) return 1; - mutex_lock(&cinode->lock_mutex); + down_read(&cinode->lock_sem); posix_test_lock(file, flock); if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { @@ -864,7 +864,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock) rc = 1; } - mutex_unlock(&cinode->lock_mutex); + up_read(&cinode->lock_sem); return rc; } @@ -884,14 +884,14 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock) return rc; try_again: - mutex_lock(&cinode->lock_mutex); + down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); return rc; } rc = posix_lock_file(file, flock, NULL); - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); if (rc == FILE_LOCK_DEFERRED) { rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); if (!rc) @@ -918,9 +918,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) xid = get_xid(); tcon = tlink_tcon(cfile->tlink); - mutex_lock(&cinode->lock_mutex); + /* we are going to update can_cache_brlcks here - need a write access */ + down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return rc; } @@ -931,7 +932,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) */ max_buf = tcon->ses->server->maxBuf; if (!max_buf) { - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return -EINVAL; } @@ -940,7 +941,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) sizeof(LOCKING_ANDX_RANGE); buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); if (!buf) { - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return -ENOMEM; } @@ -978,7 +979,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) } cinode->can_cache_brlcks = false; - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); kfree(buf); free_xid(xid); @@ -1013,9 +1014,10 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) xid = get_xid(); - mutex_lock(&cinode->lock_mutex); + /* we are going to update can_cache_brlcks here - need a write access */ + down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return rc; } @@ -1031,7 +1033,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) /* * Allocating count locks is enough because no FL_POSIX locks can be - * added to the list while we are holding cinode->lock_mutex that + * added to the list while we are holding cinode->lock_sem that * protects locking operations of this inode. */ for (; i < count; i++) { @@ -1086,7 +1088,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) out: cinode->can_cache_brlcks = false; - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return rc; @@ -1278,7 +1280,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, if (!buf) return -ENOMEM; - mutex_lock(&cinode->lock_mutex); + down_write(&cinode->lock_sem); for (i = 0; i < 2; i++) { cur = buf; num = 0; @@ -1348,7 +1350,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, } } - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); kfree(buf); return rc; } diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 181e13d9f9db..0ddd617ffa1a 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -135,7 +135,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, cur = buf; - mutex_lock(&cinode->lock_mutex); + down_write(&cinode->lock_sem); list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { if (flock->fl_start > li->offset || (flock->fl_start + length) < @@ -196,7 +196,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, } else cifs_free_llist(&tmp_llist); } - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); kfree(buf); return rc; @@ -253,9 +253,10 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) struct cifs_fid_locks *fdlocks; xid = get_xid(); - mutex_lock(&cinode->lock_mutex); + /* we are going to update can_cache_brlcks here - need a write access */ + down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return rc; } @@ -266,7 +267,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) */ max_buf = tlink_tcon(cfile->tlink)->ses->server->maxBuf; if (!max_buf) { - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return -EINVAL; } @@ -274,7 +275,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) max_num = max_buf / sizeof(struct smb2_lock_element); buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL); if (!buf) { - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return -ENOMEM; } @@ -288,7 +289,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) cinode->can_cache_brlcks = false; kfree(buf); - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return rc; } -- cgit v1.2.3 From 579f9053236c796d718162c37c72bb3bd32d008c Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 06:22:44 -0700 Subject: CIFS: Check for mandatory brlocks on read/write Currently CIFS code accept read/write ops on mandatory locked area when two processes use the same file descriptor - it's wrong. Fix this by serializing io and brlock operations on the inode. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 4 ++ fs/cifs/file.c | 121 ++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 102 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 15e38dc389fc..c758ee7b0307 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -180,6 +180,10 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data, extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); extern void cifs_umount(struct cifs_sb_info *); extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon); +extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, + __u64 length, __u8 type, + struct cifsLockInfo **conf_lock, + bool rw_check); #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) extern void cifs_dfs_release_automount_timer(void); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 1adff75dfffc..2e2e4f9aeb63 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -707,7 +707,7 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock) static bool cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, __u64 length, __u8 type, struct cifsFileInfo *cfile, - struct cifsLockInfo **conf_lock) + struct cifsLockInfo **conf_lock, bool rw_check) { struct cifsLockInfo *li; struct cifsFileInfo *cur_cfile = fdlocks->cfile; @@ -717,19 +717,24 @@ cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, if (offset + length <= li->offset || offset >= li->offset + li->length) continue; + if (rw_check && server->ops->compare_fids(cfile, cur_cfile) && + current->tgid == li->pid) + continue; if ((type & server->vals->shared_lock_type) && ((server->ops->compare_fids(cfile, cur_cfile) && current->tgid == li->pid) || type == li->type)) continue; - *conf_lock = li; + if (conf_lock) + *conf_lock = li; return true; } return false; } -static bool +bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, - __u8 type, struct cifsLockInfo **conf_lock) + __u8 type, struct cifsLockInfo **conf_lock, + bool rw_check) { bool rc = false; struct cifs_fid_locks *cur; @@ -737,7 +742,7 @@ cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, list_for_each_entry(cur, &cinode->llist, llist) { rc = cifs_find_fid_lock_conflict(cur, offset, length, type, - cfile, conf_lock); + cfile, conf_lock, rw_check); if (rc) break; } @@ -765,7 +770,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, down_read(&cinode->lock_sem); exist = cifs_find_lock_conflict(cfile, offset, length, type, - &conf_lock); + &conf_lock, false); if (exist) { flock->fl_start = conf_lock->offset; flock->fl_end = conf_lock->offset + conf_lock->length - 1; @@ -812,7 +817,7 @@ try_again: down_write(&cinode->lock_sem); exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, - lock->type, &conf_lock); + lock->type, &conf_lock, false); if (!exist && cinode->can_cache_brlcks) { list_add_tail(&lock->llist, &cfile->llist->locks); up_write(&cinode->lock_sem); @@ -2394,15 +2399,58 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, return written; } -ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t +cifs_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - struct inode *inode; + struct file *file = iocb->ki_filp; + struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; + struct inode *inode = file->f_mapping->host; + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; + ssize_t rc = -EACCES; - inode = iocb->ki_filp->f_path.dentry->d_inode; + BUG_ON(iocb->ki_pos != pos); - if (CIFS_I(inode)->clientCanCacheAll) - return generic_file_aio_write(iocb, iov, nr_segs, pos); + sb_start_write(inode->i_sb); + + /* + * We need to hold the sem to be sure nobody modifies lock list + * with a brlock that prevents writing. + */ + down_read(&cinode->lock_sem); + if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), + server->vals->exclusive_lock_type, NULL, + true)) { + mutex_lock(&inode->i_mutex); + rc = __generic_file_aio_write(iocb, iov, nr_segs, + &iocb->ki_pos); + mutex_unlock(&inode->i_mutex); + } + + if (rc > 0 || rc == -EIOCBQUEUED) { + ssize_t err; + + err = generic_write_sync(file, pos, rc); + if (err < 0 && rc > 0) + rc = err; + } + + up_read(&cinode->lock_sem); + sb_end_write(inode->i_sb); + return rc; +} + +ssize_t +cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsFileInfo *cfile = (struct cifsFileInfo *) + iocb->ki_filp->private_data; + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); /* * In strict cache mode we need to write the data to the server exactly @@ -2411,7 +2459,15 @@ ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, * not on the region from pos to ppos+len-1. */ - return cifs_user_writev(iocb, iov, nr_segs, pos); + if (!cinode->clientCanCacheAll) + return cifs_user_writev(iocb, iov, nr_segs, pos); + + if (cap_unix(tcon->ses) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + return generic_file_aio_write(iocb, iov, nr_segs, pos); + + return cifs_writev(iocb, iov, nr_segs, pos); } static struct cifs_readdata * @@ -2746,15 +2802,17 @@ ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, return read; } -ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t +cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - struct inode *inode; - - inode = iocb->ki_filp->f_path.dentry->d_inode; - - if (CIFS_I(inode)->clientCanCacheRead) - return generic_file_aio_read(iocb, iov, nr_segs, pos); + struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsFileInfo *cfile = (struct cifsFileInfo *) + iocb->ki_filp->private_data; + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + int rc = -EACCES; /* * In strict cache mode we need to read from the server all the time @@ -2764,8 +2822,25 @@ ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, * on pages affected by this read but not on the region from pos to * pos+len-1. */ + if (!cinode->clientCanCacheRead) + return cifs_user_readv(iocb, iov, nr_segs, pos); - return cifs_user_readv(iocb, iov, nr_segs, pos); + if (cap_unix(tcon->ses) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + return generic_file_aio_read(iocb, iov, nr_segs, pos); + + /* + * We need to hold the sem to be sure nobody modifies lock list + * with a brlock that prevents reading. + */ + down_read(&cinode->lock_sem); + if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), + tcon->ses->server->vals->shared_lock_type, + NULL, true)) + rc = generic_file_aio_read(iocb, iov, nr_segs, pos); + up_read(&cinode->lock_sem); + return rc; } static ssize_t -- cgit v1.2.3 From b8c32dbb0deb287a5fcb78251e4eae6c7275760d Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 06:22:44 -0700 Subject: CIFS: Request SMB2.1 leases if server supports them and we need oplocks. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 28 ++++++++--- fs/cifs/cifsglob.h | 10 ++++ fs/cifs/dir.c | 13 ++++- fs/cifs/file.c | 21 +++++--- fs/cifs/smb2file.c | 9 +++- fs/cifs/smb2ops.c | 21 ++++++++ fs/cifs/smb2pdu.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++---- fs/cifs/smb2pdu.h | 43 ++++++++++++++++- 8 files changed, 256 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 4cd68c77ce39..28ac048d54ea 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include "cifsfs.h" #include "cifspdu.h" @@ -88,6 +89,10 @@ extern mempool_t *cifs_mid_poolp; struct workqueue_struct *cifsiod_wq; +#ifdef CONFIG_CIFS_SMB2 +__u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; +#endif + static int cifs_read_super(struct super_block *sb) { @@ -218,9 +223,10 @@ cifs_alloc_inode(struct super_block *sb) return NULL; cifs_inode->cifsAttrs = 0x20; /* default */ cifs_inode->time = 0; - /* Until the file is open and we have gotten oplock - info back from the server, can not assume caching of - file data or metadata */ + /* + * Until the file is open and we have gotten oplock info back from the + * server, can not assume caching of file data or metadata. + */ cifs_set_oplock_level(cifs_inode, 0); cifs_inode->delete_pending = false; cifs_inode->invalid_mapping = false; @@ -228,10 +234,14 @@ cifs_alloc_inode(struct super_block *sb) cifs_inode->server_eof = 0; cifs_inode->uniqueid = 0; cifs_inode->createtime = 0; - - /* Can not set i_flags here - they get immediately overwritten - to zero by the VFS */ -/* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME;*/ +#ifdef CONFIG_CIFS_SMB2 + get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE); +#endif + /* + * Can not set i_flags here - they get immediately overwritten to zero + * by the VFS. + */ + /* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME; */ INIT_LIST_HEAD(&cifs_inode->openFileList); INIT_LIST_HEAD(&cifs_inode->llist); return &cifs_inode->vfs_inode; @@ -1107,6 +1117,10 @@ init_cifs(void) spin_lock_init(&cifs_file_list_lock); spin_lock_init(&GlobalMid_Lock); +#ifdef CONFIG_CIFS_SMB2 + get_random_bytes(cifs_client_guid, SMB2_CLIENT_GUID_SIZE); +#endif + if (cifs_max_pending < 2) { cifs_max_pending = 2; cFYI(1, "cifs_max_pending set to min of 2"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index e2492e1cdb85..b6ec142028e8 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -361,6 +361,12 @@ struct smb_version_operations { const unsigned int); /* push brlocks from the cache to the server */ int (*push_mand_locks)(struct cifsFileInfo *); + /* get lease key of the inode */ + void (*get_lease_key)(struct inode *, struct cifs_fid *fid); + /* set lease key of the inode */ + void (*set_lease_key)(struct inode *, struct cifs_fid *fid); + /* generate new lease key */ + void (*new_lease_key)(struct cifs_fid *fid); }; struct smb_version_values { @@ -895,6 +901,7 @@ struct cifs_fid { #ifdef CONFIG_CIFS_SMB2 __u64 persistent_fid; /* persist file id for smb2 */ __u64 volatile_fid; /* volatile file id for smb2 */ + __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ #endif }; @@ -1012,6 +1019,9 @@ struct cifsInodeInfo { u64 server_eof; /* current file size on server -- protected by i_lock */ u64 uniqueid; /* server inode number */ u64 createtime; /* creation time on server */ +#ifdef CONFIG_CIFS_SMB2 + __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for this inode */ +#endif #ifdef CONFIG_CIFS_FSCACHE struct fscache_cookie *fscache; #endif diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index b99a1670dad4..4f2147c5adb6 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -340,6 +340,8 @@ cifs_create_get_file_info: rc = cifs_get_inode_info(&newinode, full_path, buf, inode->i_sb, xid, &fid->netfid); if (newinode) { + if (server->ops->set_lease_key) + server->ops->set_lease_key(newinode, fid); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) newinode->i_mode = mode; if ((*oplock & CIFS_CREATE_ACTION) && @@ -418,6 +420,9 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, tcon = tlink_tcon(tlink); server = tcon->ses->server; + if (server->ops->new_lease_key) + server->ops->new_lease_key(&fid); + rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, &oplock, &fid, opened); @@ -473,10 +478,14 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, if (IS_ERR(tlink)) goto out_free_xid; - rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, - &oplock, &fid, &created); tcon = tlink_tcon(tlink); server = tcon->ses->server; + + if (server->ops->new_lease_key) + server->ops->new_lease_key(&fid); + + rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, + &oplock, &fid, &created); if (!rc && server->ops->close) server->ops->close(xid, tcon, &fid); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 2e2e4f9aeb63..ccad858d2d67 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -177,8 +177,9 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, int disposition; int create_options = CREATE_NOT_DIR; FILE_ALL_INFO *buf; + struct TCP_Server_Info *server = tcon->ses->server; - if (!tcon->ses->server->ops->open) + if (!server->ops->open) return -ENOSYS; desired_access = cifs_convert_flags(f_flags); @@ -218,9 +219,9 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; - rc = tcon->ses->server->ops->open(xid, tcon, full_path, disposition, - desired_access, create_options, fid, - oplock, buf, cifs_sb); + rc = server->ops->open(xid, tcon, full_path, disposition, + desired_access, create_options, fid, oplock, buf, + cifs_sb); if (rc) goto out; @@ -372,6 +373,7 @@ int cifs_open(struct inode *inode, struct file *file) unsigned int xid; __u32 oplock; struct cifs_sb_info *cifs_sb; + struct TCP_Server_Info *server; struct cifs_tcon *tcon; struct tcon_link *tlink; struct cifsFileInfo *cfile = NULL; @@ -388,6 +390,7 @@ int cifs_open(struct inode *inode, struct file *file) return PTR_ERR(tlink); } tcon = tlink_tcon(tlink); + server = tcon->ses->server; full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { @@ -432,6 +435,9 @@ int cifs_open(struct inode *inode, struct file *file) } if (!posix_open_ok) { + if (server->ops->get_lease_key) + server->ops->get_lease_key(inode, &fid); + rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid, xid); if (rc) @@ -440,8 +446,8 @@ int cifs_open(struct inode *inode, struct file *file) cfile = cifs_new_fileinfo(&fid, file, tlink, oplock); if (cfile == NULL) { - if (tcon->ses->server->ops->close) - tcon->ses->server->ops->close(xid, tcon, &fid); + if (server->ops->close) + server->ops->close(xid, tcon, &fid); rc = -ENOMEM; goto out; } @@ -567,6 +573,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; + if (server->ops->get_lease_key) + server->ops->get_lease_key(inode, &fid); + /* * Can not refresh inode by passing in file_info buf to be returned by * CIFSSMBOpen and then calling get_inode_info with returned buf since diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 0ddd617ffa1a..78fb2050e0d6 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -63,6 +63,7 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, int rc; __le16 *smb2_path; struct smb2_file_all_info *smb2_data = NULL; + __u8 smb2_oplock[17]; smb2_path = cifs_convert_path_to_utf16(path, cifs_sb); if (smb2_path == NULL) { @@ -78,11 +79,14 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, } desired_access |= FILE_READ_ATTRIBUTES; - *oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE; + *smb2_oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE; + + if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) + memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE); rc = SMB2_open(xid, tcon, smb2_path, &fid->persistent_fid, &fid->volatile_fid, desired_access, disposition, - 0, 0, (__u8 *)oplock, smb2_data); + 0, 0, smb2_oplock, smb2_data); if (rc) goto out; @@ -99,6 +103,7 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, move_smb2_info_to_cifs(buf, smb2_data); } + *oplock = *smb2_oplock; out: kfree(smb2_data); kfree(smb2_path); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0808b238219b..360d9079af49 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -555,6 +555,24 @@ smb2_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, current->tgid, length, offset, type, wait); } +static void +smb2_get_lease_key(struct inode *inode, struct cifs_fid *fid) +{ + memcpy(fid->lease_key, CIFS_I(inode)->lease_key, SMB2_LEASE_KEY_SIZE); +} + +static void +smb2_set_lease_key(struct inode *inode, struct cifs_fid *fid) +{ + memcpy(CIFS_I(inode)->lease_key, fid->lease_key, SMB2_LEASE_KEY_SIZE); +} + +static void +smb2_new_lease_key(struct cifs_fid *fid) +{ + get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE); +} + struct smb_version_operations smb21_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, @@ -616,6 +634,9 @@ struct smb_version_operations smb21_operations = { .mand_lock = smb2_mand_lock, .mand_unlock_range = smb2_unlock_range, .push_mand_locks = smb2_push_mandatory_locks, + .get_lease_key = smb2_get_lease_key, + .set_lease_key = smb2_set_lease_key, + .new_lease_key = smb2_new_lease_key, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index d3e1cfca3379..89d2824587b2 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -304,7 +304,7 @@ free_rsp_buf(int resp_buftype, void *rsp) cifs_buf_release(rsp); } -#define SMB2_NUM_PROT 1 +#define SMB2_NUM_PROT 2 #define SMB2_PROT 0 #define SMB21_PROT 1 @@ -393,6 +393,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) req->Capabilities = cpu_to_le32(SMB2_GLOBAL_CAP_DFS); + memcpy(req->ClientGUID, cifs_client_guid, SMB2_CLIENT_GUID_SIZE); + iov[0].iov_base = (char *)req; /* 4 for rfc1002 length field */ iov[0].iov_len = get_rfc1002_length(req) + 4; @@ -868,6 +870,83 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) return rc; } +static struct create_lease * +create_lease_buf(u8 *lease_key, u8 oplock) +{ + struct create_lease *buf; + + buf = kmalloc(sizeof(struct create_lease), GFP_KERNEL); + if (!buf) + return NULL; + + memset(buf, 0, sizeof(struct create_lease)); + + buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); + buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); + if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) + buf->lcontext.LeaseState = SMB2_LEASE_WRITE_CACHING | + SMB2_LEASE_READ_CACHING; + else if (oplock == SMB2_OPLOCK_LEVEL_II) + buf->lcontext.LeaseState = SMB2_LEASE_READ_CACHING; + else if (oplock == SMB2_OPLOCK_LEVEL_BATCH) + buf->lcontext.LeaseState = SMB2_LEASE_HANDLE_CACHING | + SMB2_LEASE_READ_CACHING | + SMB2_LEASE_WRITE_CACHING; + + buf->ccontext.DataOffset = cpu_to_le16(offsetof + (struct create_lease, lcontext)); + buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context)); + buf->ccontext.NameOffset = cpu_to_le16(offsetof + (struct create_lease, Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + buf->Name[0] = 'R'; + buf->Name[1] = 'q'; + buf->Name[2] = 'L'; + buf->Name[3] = 's'; + return buf; +} + +static __u8 +parse_lease_state(struct smb2_create_rsp *rsp) +{ + char *data_offset; + struct create_lease *lc; + __u8 oplock = 0; + bool found = false; + + data_offset = (char *)rsp; + data_offset += 4 + le32_to_cpu(rsp->CreateContextsOffset); + lc = (struct create_lease *)data_offset; + do { + char *name = le16_to_cpu(lc->ccontext.NameOffset) + (char *)lc; + if (le16_to_cpu(lc->ccontext.NameLength) != 4 || + strncmp(name, "RqLs", 4)) { + lc = (struct create_lease *)((char *)lc + + le32_to_cpu(lc->ccontext.Next)); + continue; + } + if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) + return SMB2_OPLOCK_LEVEL_NOCHANGE; + found = true; + break; + } while (le32_to_cpu(lc->ccontext.Next) != 0); + + if (!found) + return oplock; + + if (le32_to_cpu(lc->lcontext.LeaseState) & SMB2_LEASE_WRITE_CACHING) { + if (le32_to_cpu(lc->lcontext.LeaseState) & + SMB2_LEASE_HANDLE_CACHING) + oplock = SMB2_OPLOCK_LEVEL_BATCH; + else + oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE; + } else if (le32_to_cpu(lc->lcontext.LeaseState) & + SMB2_LEASE_READ_CACHING) + oplock = SMB2_OPLOCK_LEVEL_II; + + return oplock; +} + int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access, @@ -878,9 +957,11 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, struct smb2_create_rsp *rsp; struct TCP_Server_Info *server; struct cifs_ses *ses = tcon->ses; - struct kvec iov[2]; + struct kvec iov[3]; int resp_buftype; int uni_path_len; + __le16 *copy_path = NULL; + int copy_size; int rc = 0; int num_iovecs = 2; @@ -895,10 +976,6 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, if (rc) return rc; - if (server->oplocks) - req->RequestedOplockLevel = *oplock; - else - req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE; req->ImpersonationLevel = IL_IMPERSONATION; req->DesiredAccess = cpu_to_le32(desired_access); /* File attributes ignored on open (used in create though) */ @@ -908,7 +985,7 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, req->CreateOptions = cpu_to_le32(create_options); uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2; req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - - 1 /* pad */ - 4 /* do not count rfc1001 len field */); + - 8 /* pad */ - 4 /* do not count rfc1001 len field */); iov[0].iov_base = (char *)req; /* 4 for rfc1002 length field */ @@ -919,6 +996,20 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, req->NameLength = cpu_to_le16(uni_path_len - 2); /* -1 since last byte is buf[0] which is sent below (path) */ iov[0].iov_len--; + if (uni_path_len % 8 != 0) { + copy_size = uni_path_len / 8 * 8; + if (copy_size < uni_path_len) + copy_size += 8; + + copy_path = kzalloc(copy_size, GFP_KERNEL); + if (!copy_path) + return -ENOMEM; + memcpy((char *)copy_path, (const char *)path, + uni_path_len); + uni_path_len = copy_size; + path = copy_path; + } + iov[1].iov_len = uni_path_len; iov[1].iov_base = path; /* @@ -927,10 +1018,37 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, */ inc_rfc1001_len(req, uni_path_len - 1); } else { + iov[0].iov_len += 7; + req->hdr.smb2_buf_length = cpu_to_be32(be32_to_cpu( + req->hdr.smb2_buf_length) + 8 - 1); num_iovecs = 1; req->NameLength = 0; } + if (!server->oplocks) + *oplock = SMB2_OPLOCK_LEVEL_NONE; + + if (!(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) || + *oplock == SMB2_OPLOCK_LEVEL_NONE) + req->RequestedOplockLevel = *oplock; + else { + iov[num_iovecs].iov_base = create_lease_buf(oplock+1, *oplock); + if (iov[num_iovecs].iov_base == NULL) { + cifs_small_buf_release(req); + kfree(copy_path); + return -ENOMEM; + } + iov[num_iovecs].iov_len = sizeof(struct create_lease); + req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE; + req->CreateContextsOffset = cpu_to_le32( + sizeof(struct smb2_create_req) - 4 - 8 + + iov[num_iovecs-1].iov_len); + req->CreateContextsLength = cpu_to_le32( + sizeof(struct create_lease)); + inc_rfc1001_len(&req->hdr, sizeof(struct create_lease)); + num_iovecs++; + } + rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); rsp = (struct smb2_create_rsp *)iov[0].iov_base; @@ -955,8 +1073,12 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, buf->DeletePending = 0; } - *oplock = rsp->OplockLevel; + if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) + *oplock = parse_lease_state(rsp); + else + *oplock = rsp->OplockLevel; creat_exit: + kfree(copy_path); free_rsp_buf(resp_buftype, rsp); return rc; } diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 889ee5e193d9..e818a5cc5bd8 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -150,6 +150,10 @@ struct smb2_err_rsp { __u8 ErrorData[1]; /* variable length */ } __packed; +#define SMB2_CLIENT_GUID_SIZE 16 + +extern __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; + struct smb2_negotiate_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 36 */ @@ -157,7 +161,7 @@ struct smb2_negotiate_req { __le16 SecurityMode; __le16 Reserved; /* MBZ */ __le32 Capabilities; - __u8 ClientGUID[16]; /* MBZ */ + __u8 ClientGUID[SMB2_CLIENT_GUID_SIZE]; __le64 ClientStartTime; /* MBZ */ __le16 Dialects[2]; /* variable length */ } __packed; @@ -307,6 +311,8 @@ struct smb2_tree_disconnect_rsp { #define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08 #define SMB2_OPLOCK_LEVEL_BATCH 0x09 #define SMB2_OPLOCK_LEVEL_LEASE 0xFF +/* Non-spec internal type */ +#define SMB2_OPLOCK_LEVEL_NOCHANGE 0x99 /* Desired Access Flags */ #define FILE_READ_DATA_LE cpu_to_le32(0x00000001) @@ -404,7 +410,7 @@ struct smb2_create_req { __le16 NameLength; __le32 CreateContextsOffset; __le32 CreateContextsLength; - __u8 Buffer[1]; + __u8 Buffer[8]; } __packed; struct smb2_create_rsp { @@ -428,6 +434,39 @@ struct smb2_create_rsp { __u8 Buffer[1]; } __packed; +struct create_context { + __le32 Next; + __le16 NameOffset; + __le16 NameLength; + __le16 Reserved; + __le16 DataOffset; + __le32 DataLength; + __u8 Buffer[0]; +} __packed; + +#define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00) +#define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01) +#define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02) +#define SMB2_LEASE_WRITE_CACHING __constant_cpu_to_le32(0x04) + +#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02) + +#define SMB2_LEASE_KEY_SIZE 16 + +struct lease_context { + __le64 LeaseKeyLow; + __le64 LeaseKeyHigh; + __le32 LeaseState; + __le32 LeaseFlags; + __le64 LeaseDuration; +} __packed; + +struct create_lease { + struct create_context ccontext; + __u8 Name[8]; + struct lease_context lcontext; +} __packed; + /* Currently defined values for close flags */ #define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) struct smb2_close_req { -- cgit v1.2.3 From 25078105fbe14e7b3270391eaa11514bec787a52 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 06:22:45 -0700 Subject: CIFS: Fix cache coherency for read oplock case When we have a file opened with read oplock and we are writing a data to this file, we need to store the data in the cache and then send to the server to ensure that the next read operation will get a coherent data. Also mark it as CONFIG_CIFS_SMB2 because it's more suitable for SMB2 code but can fix some CIFS problems too (when server delays sending an oplock break after a write request). We can drop this ifdefs dependence in future. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ccad858d2d67..e93e3d2c69e6 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2461,11 +2461,30 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, iocb->ki_filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); +#ifdef CONFIG_CIFS_SMB2 /* - * In strict cache mode we need to write the data to the server exactly - * from the pos to pos+len-1 rather than flush all affected pages - * because it may cause a error with mandatory locks on these pages but - * not on the region from pos to ppos+len-1. + * If we have an oplock for read and want to write a data to the file + * we need to store it in the page cache and then push it to the server + * to be sure the next read will get a valid data. + */ + if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead) { + ssize_t written; + int rc; + + written = generic_file_aio_write(iocb, iov, nr_segs, pos); + rc = filemap_fdatawrite(inode->i_mapping); + if (rc) + return (ssize_t)rc; + + return written; + } +#endif + + /* + * For non-oplocked files in strict cache mode we need to write the data + * to the server exactly from the pos to pos+len-1 rather than flush all + * affected pages because it may cause a error with mandatory locks on + * these pages but not on the region from pos to ppos+len-1. */ if (!cinode->clientCanCacheAll) -- cgit v1.2.3 From 0822f51426b51bd599b3a7e972b14aacaa045a92 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 06:22:45 -0700 Subject: CIFS: Add SMB2.1 lease break support Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2file.c | 2 ++ fs/cifs/smb2misc.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/cifs/smb2ops.c | 4 +++ fs/cifs/smb2pdu.c | 46 +++++++++++++++++++-------- fs/cifs/smb2pdu.h | 25 +++++++++++++++ fs/cifs/smb2proto.h | 4 +++ 6 files changed, 157 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 78fb2050e0d6..a93eec30a50d 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -38,6 +38,8 @@ void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) { oplock &= 0xFF; + if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) + return; if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) { cinode->clientCanCacheAll = true; cinode->clientCanCacheRead = true; diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 01479a3fee8d..3a7f8bd5127d 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -148,6 +148,13 @@ smb2_check_message(char *buf, unsigned int length) cERROR(1, "Illegal response size %u for command %d", le16_to_cpu(pdu->StructureSize2), command); return 1; + } else if (command == SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0) + && (le16_to_cpu(pdu->StructureSize2) != 44) + && (le16_to_cpu(pdu->StructureSize2) != 36)) { + /* special case for SMB2.1 lease break message */ + cERROR(1, "Illegal response size %d for oplock break", + le16_to_cpu(pdu->StructureSize2)); + return 1; } } @@ -360,6 +367,84 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) return to; } +__le32 +smb2_get_lease_state(struct cifsInodeInfo *cinode) +{ + if (cinode->clientCanCacheAll) + return SMB2_LEASE_WRITE_CACHING | SMB2_LEASE_READ_CACHING; + else if (cinode->clientCanCacheRead) + return SMB2_LEASE_READ_CACHING; + return 0; +} + +__u8 smb2_map_lease_to_oplock(__le32 lease_state) +{ + if (lease_state & SMB2_LEASE_WRITE_CACHING) { + if (lease_state & SMB2_LEASE_HANDLE_CACHING) + return SMB2_OPLOCK_LEVEL_BATCH; + else + return SMB2_OPLOCK_LEVEL_EXCLUSIVE; + } else if (lease_state & SMB2_LEASE_READ_CACHING) + return SMB2_OPLOCK_LEVEL_II; + return 0; +} + +static bool +smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) +{ + struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; + struct list_head *tmp, *tmp1, *tmp2; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + struct cifsInodeInfo *cinode; + struct cifsFileInfo *cfile; + + cFYI(1, "Checking for lease break"); + + /* look up tcon based on tid & uid */ + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + list_for_each(tmp1, &ses->tcon_list) { + tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); + + cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); + spin_lock(&cifs_file_list_lock); + list_for_each(tmp2, &tcon->openFileList) { + cfile = list_entry(tmp2, struct cifsFileInfo, + tlist); + cinode = CIFS_I(cfile->dentry->d_inode); + + if (memcmp(cinode->lease_key, rsp->LeaseKey, + SMB2_LEASE_KEY_SIZE)) + continue; + + cFYI(1, "lease key match, lease break 0x%d", + le32_to_cpu(rsp->NewLeaseState)); + + smb2_set_oplock_level(cinode, + smb2_map_lease_to_oplock(rsp->NewLeaseState)); + + if (rsp->Flags & + SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED) + cfile->oplock_break_cancelled = false; + else + cfile->oplock_break_cancelled = true; + + queue_work(cifsiod_wq, &cfile->oplock_break); + + spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_tcp_ses_lock); + return true; + } + spin_unlock(&cifs_file_list_lock); + } + } + spin_unlock(&cifs_tcp_ses_lock); + cFYI(1, "Can not process lease break - no lease matched"); + return false; +} + bool smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) { @@ -377,7 +462,10 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) if (le16_to_cpu(rsp->StructureSize) != smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { - return false; + if (le16_to_cpu(rsp->StructureSize) == 44) + return smb2_is_valid_lease_break(buffer, server); + else + return false; } cFYI(1, "oplock level 0x%d", rsp->OplockLevel); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 360d9079af49..630156f98cc7 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -513,6 +513,10 @@ static int smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, struct cifsInodeInfo *cinode) { + if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) + return SMB2_lease_break(0, tcon, cinode->lease_key, + smb2_get_lease_state(cinode)); + return SMB2_oplock_break(0, tcon, fid->persistent_fid, fid->volatile_fid, cinode->clientCanCacheRead ? 1 : 0); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 89d2824587b2..1572abefb378 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -911,7 +911,6 @@ parse_lease_state(struct smb2_create_rsp *rsp) { char *data_offset; struct create_lease *lc; - __u8 oplock = 0; bool found = false; data_offset = (char *)rsp; @@ -932,19 +931,9 @@ parse_lease_state(struct smb2_create_rsp *rsp) } while (le32_to_cpu(lc->ccontext.Next) != 0); if (!found) - return oplock; - - if (le32_to_cpu(lc->lcontext.LeaseState) & SMB2_LEASE_WRITE_CACHING) { - if (le32_to_cpu(lc->lcontext.LeaseState) & - SMB2_LEASE_HANDLE_CACHING) - oplock = SMB2_OPLOCK_LEVEL_BATCH; - else - oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE; - } else if (le32_to_cpu(lc->lcontext.LeaseState) & - SMB2_LEASE_READ_CACHING) - oplock = SMB2_OPLOCK_LEVEL_II; + return 0; - return oplock; + return smb2_map_lease_to_oplock(lc->lcontext.LeaseState); } int @@ -2228,3 +2217,34 @@ SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon, return smb2_lockv(xid, tcon, persist_fid, volatile_fid, pid, 1, &lock); } + +int +SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, + __u8 *lease_key, const __le32 lease_state) +{ + int rc; + struct smb2_lease_ack *req = NULL; + + cFYI(1, "SMB2_lease_break"); + rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req); + + if (rc) + return rc; + + req->hdr.CreditRequest = cpu_to_le16(1); + req->StructureSize = cpu_to_le16(36); + inc_rfc1001_len(req, 12); + + memcpy(req->LeaseKey, lease_key, 16); + req->LeaseState = lease_state; + + rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, CIFS_OBREAK_OP); + /* SMB2 buffer freed by function above */ + + if (rc) { + cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); + cFYI(1, "Send error in Lease Break = %d", rc); + } + + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index e818a5cc5bd8..da099225b1a9 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -693,6 +693,31 @@ struct smb2_oplock_break { __u64 VolatileFid; } __packed; +#define SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED cpu_to_le32(0x01) + +struct smb2_lease_break { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 44 */ + __le16 Reserved; + __le32 Flags; + __u8 LeaseKey[16]; + __le32 CurrentLeaseState; + __le32 NewLeaseState; + __le32 BreakReason; + __le32 AccessMaskHint; + __le32 ShareMaskHint; +} __packed; + +struct smb2_lease_ack { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 36 */ + __le16 Reserved; + __le32 Flags; + __u8 LeaseKey[16]; + __le32 LeaseState; + __le64 LeaseDuration; +} __packed; + /* * PDU infolevel structure definitions * BB consider moving to a different header diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 8b4d3712255b..7d25f8b14f93 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -48,6 +48,8 @@ extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses, extern struct mid_q_entry *smb2_setup_async_request( struct TCP_Server_Info *server, struct smb_rqst *rqst); extern void smb2_echo_request(struct work_struct *work); +extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); +extern __u8 smb2_map_lease_to_oplock(__le32 lease_state); extern bool smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv); @@ -151,5 +153,7 @@ extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid, const __u32 num_lock, struct smb2_lock_element *buf); +extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, + __u8 *lease_key, const __le32 lease_state); #endif /* _SMB2PROTO_H */ -- cgit v1.2.3 From 233839b1df65a24c8b67b748fe7b18d86d0ad6d7 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 06:22:45 -0700 Subject: CIFS: Fix fast lease break after open problem Now we walk though cifsFileInfo's list for every incoming lease break and look for an equivalent there. That approach misses lease breaks that come just after an open response - we don't have time to populate new cifsFileInfo structure to the list. Fix this by adding new list of pending opens and look for a lease there if we didn't find it in the list of cifsFileInfo structures. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 12 +++++++++ fs/cifs/cifsproto.h | 7 +++++ fs/cifs/connect.c | 1 + fs/cifs/dir.c | 9 ++++++- fs/cifs/file.c | 35 ++++++++++++++++++++++--- fs/cifs/misc.c | 30 ++++++++++++++++++++++ fs/cifs/smb2misc.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++---- 7 files changed, 158 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index b6ec142028e8..a39e5b7fc844 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -715,6 +715,7 @@ struct cifs_ses { __u16 session_flags; #endif /* CONFIG_CIFS_SMB2 */ }; + /* no more than one of the following three session flags may be set */ #define CIFS_SES_NT4 1 #define CIFS_SES_OS2 2 @@ -821,6 +822,7 @@ struct cifs_tcon { u64 resource_id; /* server resource id */ struct fscache_cookie *fscache; /* cookie for share */ #endif + struct list_head pending_opens; /* list of incomplete opens */ /* BB add field for back pointer to sb struct(s)? */ }; @@ -863,6 +865,15 @@ cifs_get_tlink(struct tcon_link *tlink) /* This function is always expected to succeed */ extern struct cifs_tcon *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb); +#define CIFS_OPLOCK_NO_CHANGE 0xfe + +struct cifs_pending_open { + struct list_head olist; + struct tcon_link *tlink; + __u8 lease_key[16]; + __u32 oplock; +}; + /* * This info hangs off the cifsFileInfo structure, pointed to by llist. * This is used to track byte stream locks on the file @@ -903,6 +914,7 @@ struct cifs_fid { __u64 volatile_fid; /* volatile file id for smb2 */ __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ #endif + struct cifs_pending_open *pending_open; }; struct cifs_fid_locks { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index c758ee7b0307..09ea6321c55a 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -184,6 +184,13 @@ extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, __u8 type, struct cifsLockInfo **conf_lock, bool rw_check); +extern void cifs_add_pending_open(struct cifs_fid *fid, + struct tcon_link *tlink, + struct cifs_pending_open *open); +extern void cifs_add_pending_open_locked(struct cifs_fid *fid, + struct tcon_link *tlink, + struct cifs_pending_open *open); +extern void cifs_del_pending_open(struct cifs_pending_open *open); #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) extern void cifs_dfs_release_automount_timer(void); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 443e39633107..59c595e8a1b0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2645,6 +2645,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) tcon->retry = volume_info->retry; tcon->nocase = volume_info->nocase; tcon->local_lease = volume_info->local_lease; + INIT_LIST_HEAD(&tcon->pending_opens); spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 4f2147c5adb6..7c0a81283645 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -382,6 +382,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, struct cifs_tcon *tcon; struct TCP_Server_Info *server; struct cifs_fid fid; + struct cifs_pending_open open; __u32 oplock; struct cifsFileInfo *file_info; @@ -423,16 +424,21 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (server->ops->new_lease_key) server->ops->new_lease_key(&fid); + cifs_add_pending_open(&fid, tlink, &open); + rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, &oplock, &fid, opened); - if (rc) + if (rc) { + cifs_del_pending_open(&open); goto out; + } rc = finish_open(file, direntry, generic_file_open, opened); if (rc) { if (server->ops->close) server->ops->close(xid, tcon, &fid); + cifs_del_pending_open(&open); goto out; } @@ -440,6 +446,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (file_info == NULL) { if (server->ops->close) server->ops->close(xid, tcon, &fid); + cifs_del_pending_open(&open); rc = -ENOMEM; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e93e3d2c69e6..88e9c74e2cac 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -247,6 +247,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifsFileInfo *cfile; struct cifs_fid_locks *fdlocks; + struct cifs_tcon *tcon = tlink_tcon(tlink); cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); if (cfile == NULL) @@ -274,10 +275,15 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, cfile->tlink = cifs_get_tlink(tlink); INIT_WORK(&cfile->oplock_break, cifs_oplock_break); mutex_init(&cfile->fh_mutex); - tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock); spin_lock(&cifs_file_list_lock); - list_add(&cfile->tlist, &(tlink_tcon(tlink)->openFileList)); + if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE) + oplock = fid->pending_open->oplock; + list_del(&fid->pending_open->olist); + + tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock); + + list_add(&cfile->tlist, &tcon->openFileList); /* if readable file instance put first in list*/ if (file->f_mode & FMODE_READ) list_add(&cfile->flist, &cinode->openFileList); @@ -307,9 +313,12 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) { struct inode *inode = cifs_file->dentry->d_inode; struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); + struct TCP_Server_Info *server = tcon->ses->server; struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsLockInfo *li, *tmp; + struct cifs_fid fid; + struct cifs_pending_open open; spin_lock(&cifs_file_list_lock); if (--cifs_file->count > 0) { @@ -317,6 +326,12 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) return; } + if (server->ops->get_lease_key) + server->ops->get_lease_key(inode, &fid); + + /* store open in pending opens to make sure we don't miss lease break */ + cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); + /* remove it from the lists */ list_del(&cifs_file->flist); list_del(&cifs_file->tlist); @@ -348,6 +363,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) free_xid(xid); } + cifs_del_pending_open(&open); + /* * Delete any outstanding lock records. We'll lose them when the file * is closed anyway. @@ -368,6 +385,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) } int cifs_open(struct inode *inode, struct file *file) + { int rc = -EACCES; unsigned int xid; @@ -380,6 +398,7 @@ int cifs_open(struct inode *inode, struct file *file) char *full_path = NULL; bool posix_open_ok = false; struct cifs_fid fid; + struct cifs_pending_open open; xid = get_xid(); @@ -401,7 +420,7 @@ int cifs_open(struct inode *inode, struct file *file) cFYI(1, "inode = 0x%p file flags are 0x%x for %s", inode, file->f_flags, full_path); - if (tcon->ses->server->oplocks) + if (server->oplocks) oplock = REQ_OPLOCK; else oplock = 0; @@ -434,20 +453,28 @@ int cifs_open(struct inode *inode, struct file *file) */ } + if (server->ops->get_lease_key) + server->ops->get_lease_key(inode, &fid); + + cifs_add_pending_open(&fid, tlink, &open); + if (!posix_open_ok) { if (server->ops->get_lease_key) server->ops->get_lease_key(inode, &fid); rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid, xid); - if (rc) + if (rc) { + cifs_del_pending_open(&open); goto out; + } } cfile = cifs_new_fileinfo(&fid, file, tlink, oplock); if (cfile == NULL) { if (server->ops->close) server->ops->close(xid, tcon, &fid); + cifs_del_pending_open(&open); rc = -ENOMEM; goto out; } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index a921b0712eff..3a00c0d0cead 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -579,3 +579,33 @@ backup_cred(struct cifs_sb_info *cifs_sb) return false; } + +void +cifs_del_pending_open(struct cifs_pending_open *open) +{ + spin_lock(&cifs_file_list_lock); + list_del(&open->olist); + spin_unlock(&cifs_file_list_lock); +} + +void +cifs_add_pending_open_locked(struct cifs_fid *fid, struct tcon_link *tlink, + struct cifs_pending_open *open) +{ +#ifdef CONFIG_CIFS_SMB2 + memcpy(open->lease_key, fid->lease_key, SMB2_LEASE_KEY_SIZE); +#endif + open->oplock = CIFS_OPLOCK_NO_CHANGE; + open->tlink = tlink; + fid->pending_open = open; + list_add_tail(&open->olist, &tlink_tcon(tlink)->pending_opens); +} + +void +cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink, + struct cifs_pending_open *open) +{ + spin_lock(&cifs_file_list_lock); + cifs_add_pending_open_locked(fid, tlink, open); + spin_unlock(&cifs_file_list_lock); +} diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 3a7f8bd5127d..cd31715f03f4 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -389,6 +389,27 @@ __u8 smb2_map_lease_to_oplock(__le32 lease_state) return 0; } +struct smb2_lease_break_work { + struct work_struct lease_break; + struct tcon_link *tlink; + __u8 lease_key[16]; + __le32 lease_state; +}; + +static void +cifs_ses_oplock_break(struct work_struct *work) +{ + struct smb2_lease_break_work *lw = container_of(work, + struct smb2_lease_break_work, lease_break); + int rc; + + rc = SMB2_lease_break(0, tlink_tcon(lw->tlink), lw->lease_key, + lw->lease_state); + cFYI(1, "Lease release rc %d", rc); + cifs_put_tlink(lw->tlink); + kfree(lw); +} + static bool smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) { @@ -398,6 +419,19 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) struct cifs_tcon *tcon; struct cifsInodeInfo *cinode; struct cifsFileInfo *cfile; + struct cifs_pending_open *open; + struct smb2_lease_break_work *lw; + bool found; + int ack_req = rsp->Flags & SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED; + + lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); + if (!lw) { + cERROR(1, "Memory allocation failed during lease break check"); + return false; + } + + INIT_WORK(&lw->lease_break, cifs_ses_oplock_break); + lw->lease_state = rsp->NewLeaseState; cFYI(1, "Checking for lease break"); @@ -405,28 +439,29 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) spin_lock(&cifs_tcp_ses_lock); list_for_each(tmp, &server->smb_ses_list) { ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + + spin_lock(&cifs_file_list_lock); list_for_each(tmp1, &ses->tcon_list) { tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); - spin_lock(&cifs_file_list_lock); list_for_each(tmp2, &tcon->openFileList) { cfile = list_entry(tmp2, struct cifsFileInfo, - tlist); + tlist); cinode = CIFS_I(cfile->dentry->d_inode); if (memcmp(cinode->lease_key, rsp->LeaseKey, SMB2_LEASE_KEY_SIZE)) continue; + cFYI(1, "found in the open list"); cFYI(1, "lease key match, lease break 0x%d", le32_to_cpu(rsp->NewLeaseState)); smb2_set_oplock_level(cinode, smb2_map_lease_to_oplock(rsp->NewLeaseState)); - if (rsp->Flags & - SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED) + if (ack_req) cfile->oplock_break_cancelled = false; else cfile->oplock_break_cancelled = true; @@ -437,10 +472,39 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) spin_unlock(&cifs_tcp_ses_lock); return true; } - spin_unlock(&cifs_file_list_lock); + + found = false; + list_for_each_entry(open, &tcon->pending_opens, olist) { + if (memcmp(open->lease_key, rsp->LeaseKey, + SMB2_LEASE_KEY_SIZE)) + continue; + + if (!found && ack_req) { + found = true; + memcpy(lw->lease_key, open->lease_key, + SMB2_LEASE_KEY_SIZE); + lw->tlink = cifs_get_tlink(open->tlink); + queue_work(cifsiod_wq, + &lw->lease_break); + } + + cFYI(1, "found in the pending open list"); + cFYI(1, "lease key match, lease break 0x%d", + le32_to_cpu(rsp->NewLeaseState)); + + open->oplock = + smb2_map_lease_to_oplock(rsp->NewLeaseState); + } + if (found) { + spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_tcp_ses_lock); + return true; + } } + spin_unlock(&cifs_file_list_lock); } spin_unlock(&cifs_tcp_ses_lock); + kfree(lw); cFYI(1, "Can not process lease break - no lease matched"); return false; } -- cgit v1.2.3 From 101b92d9590a645d6fb643654b3a92556203b745 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Sep 2012 06:22:45 -0700 Subject: cifs: cleanups for cifs_mkdir_qinfo Rename inode pointers for better clarity. Move the d_instantiate call to the end of the function to prevent other tasks from seeing it before we've finished constructing it. Since we should have exclusive access to the inode at this point, remove the spinlock around i_nlink update. Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/inode.c | 51 ++++++++++++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index e74e1bceb416..3d155875f446 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1177,34 +1177,33 @@ unlink_out: } static int -cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, +cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode, const char *full_path, struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon, const unsigned int xid) { int rc = 0; - struct inode *newinode = NULL; + struct inode *inode = NULL; if (tcon->unix_ext) - rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, + rc = cifs_get_inode_info_unix(&inode, full_path, parent->i_sb, xid); else - rc = cifs_get_inode_info(&newinode, full_path, NULL, - inode->i_sb, xid, NULL); + rc = cifs_get_inode_info(&inode, full_path, NULL, parent->i_sb, + xid, NULL); + if (rc) return rc; - d_instantiate(dentry, newinode); /* * setting nlink not necessary except in cases where we failed to get it - * from the server or was set bogus + * from the server or was set bogus. Also, since this is a brand new + * inode, no need to grab the i_lock before setting the i_nlink. */ - spin_lock(&dentry->d_inode->i_lock); - if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2)) - set_nlink(dentry->d_inode, 2); - spin_unlock(&dentry->d_inode->i_lock); + if (inode->i_nlink < 2) + set_nlink(inode, 2); mode &= ~current_umask(); /* must turn on setgid bit if parent dir has it */ - if (inode->i_mode & S_ISGID) + if (parent->i_mode & S_ISGID) mode |= S_ISGID; if (tcon->unix_ext) { @@ -1217,8 +1216,8 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, }; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { args.uid = (__u64)current_fsuid(); - if (inode->i_mode & S_ISGID) - args.gid = (__u64)inode->i_gid; + if (parent->i_mode & S_ISGID) + args.gid = (__u64)parent->i_gid; else args.gid = (__u64)current_fsgid(); } else { @@ -1233,22 +1232,20 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, struct TCP_Server_Info *server = tcon->ses->server; if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && (mode & S_IWUGO) == 0 && server->ops->mkdir_setinfo) - server->ops->mkdir_setinfo(newinode, full_path, cifs_sb, + server->ops->mkdir_setinfo(inode, full_path, cifs_sb, tcon, xid); - if (dentry->d_inode) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) - dentry->d_inode->i_mode = (mode | S_IFDIR); - - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { - dentry->d_inode->i_uid = current_fsuid(); - if (inode->i_mode & S_ISGID) - dentry->d_inode->i_gid = inode->i_gid; - else - dentry->d_inode->i_gid = - current_fsgid(); - } + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + inode->i_mode = (mode | S_IFDIR); + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + inode->i_uid = current_fsuid(); + if (inode->i_mode & S_ISGID) + inode->i_gid = parent->i_gid; + else + inode->i_gid = current_fsgid(); } } + d_instantiate(dentry, inode); return rc; } -- cgit v1.2.3 From ecdb69e2cc80cca77d6afcc0aca244b72cc5ee68 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Wed, 19 Sep 2012 06:22:45 -0700 Subject: cifs: Mangle string used for unc in /proc/mounts The string for "unc=" in /proc/mounts needs to be escaped. The current behaviour can create problems in cases when mounting a share starting with a number. example: >mount -t cifs -o username=test,password=x vm140-31:/17000-test /mnt >mount -o remount,password=x /mnt mount error: could not resolve address for vm140-31x00-test: Unknown error The sub-string "\170" which is part of the unc for the mount above in /proc/mounts is interpreted as character'x' in the case above. Escaping the string fixes the problem. Signed-off-by: Sachin Prabhu Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 28ac048d54ea..a41044a31083 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -350,7 +350,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) cifs_show_security(s, tcon->ses->server); cifs_show_cache_flavor(s, cifs_sb); - seq_printf(s, ",unc=%s", tcon->treeName); + seq_printf(s, ",unc="); + seq_escape(s, tcon->treeName, " \t\n\\"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) seq_printf(s, ",multiuser"); -- cgit v1.2.3 From 3d6d854a13844223b603fd7a16a4a4a4afd62c72 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Sep 2012 06:22:46 -0700 Subject: cifs: add FL_CLOSE to fl_flags mask in cifs_read_flock FL_CLOSE is quite common when you close a file on which you hold a lock. The spurious "Unknown lock flags" message in cFYI is confusing in this case. Reported-by: Alexander Bokovoy Signed-off-by: Jeff Layton Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 88e9c74e2cac..075f7cfd1da5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1173,7 +1173,8 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, if (flock->fl_flags & FL_LEASE) cFYI(1, "Lease on file - not implemented yet"); if (flock->fl_flags & - (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE))) + (~(FL_POSIX | FL_FLOCK | FL_SLEEP | + FL_ACCESS | FL_LEASE | FL_CLOSE))) cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags); *type = server->vals->large_lock_type; -- cgit v1.2.3 From 5efeb0970794933f86af70a0fa8282e9255a1ca2 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 19 Sep 2012 06:22:46 -0700 Subject: Update cifs version number With SMB2 support, update from version 1.79 to 2.0 to make it easier for users to recognize which version has SMB2 support. Signed-off-by: Steve French Reviewed-by: Jeff Layton --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 1c49c5a9b27a..7163419cecd9 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -128,5 +128,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "1.78" +#define CIFS_VERSION "2.0" #endif /* _CIFSFS_H */ -- cgit v1.2.3 From ba02e89915afcfc9a071a86e5cae32f77c7d353a Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 19 Sep 2012 06:22:46 -0700 Subject: MARK SMB2 support EXPERIMENTAL Now that the merge of the remaining pieces needed for SMB2 (SMB2.1 dialect) are in, and most test cases pass, we can consider SMB2.1 EXPERIMENTAL rather than "BROKEN." Reviewed-by: Jeff Layton Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 802930179c2b..253033ec0904 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -162,7 +162,7 @@ config CIFS_NFSD_EXPORT config CIFS_SMB2 bool "SMB2 network file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL && INET && BROKEN + depends on EXPERIMENTAL && INET select NLS select KEYS select FSCACHE -- cgit v1.2.3 From 12e8a20824677fbc24e921d7aebfda6a47cc25b1 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 19 Sep 2012 09:19:39 -0700 Subject: Trivial endian fixes Some trivial endian fixes for the SMB2 code. One warning remains which I asked Pavel to look at. Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2misc.c | 5 +++-- fs/cifs/smb2ops.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index cd31715f03f4..7b1c5e3287fb 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -422,7 +422,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) struct cifs_pending_open *open; struct smb2_lease_break_work *lw; bool found; - int ack_req = rsp->Flags & SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED; + int ack_req = le32_to_cpu(rsp->Flags & + SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED); lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); if (!lw) { @@ -524,7 +525,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) if (rsp->hdr.Command != SMB2_OPLOCK_BREAK) return false; - if (le16_to_cpu(rsp->StructureSize) != + if (rsp->StructureSize != smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { if (le16_to_cpu(rsp->StructureSize) == 44) return smb2_is_valid_lease_break(buffer, server); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 630156f98cc7..2183bb343edd 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -496,7 +496,7 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length) { struct smb2_hdr *hdr = (struct smb2_hdr *)buf; - if (le32_to_cpu(hdr->Status) != STATUS_PENDING) + if (hdr->Status != STATUS_PENDING) return false; if (!length) { -- cgit v1.2.3 From e4e3703555b6eb1afa0be86a45cd3c8bccb6cb08 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 12:36:52 +0400 Subject: CIFS: Fix endian conversion of IndexNumber by making it __le64 rather than __u64 in FILE_AL_INFO structure. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifspdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 3fb03e2c8e86..b9d59a948a2c 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -2210,7 +2210,7 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */ __u8 DeletePending; __u8 Directory; __u16 Pad2; - __u64 IndexNumber; + __le64 IndexNumber; __le32 EASize; __le32 AccessFlags; __u64 IndexNumber1; -- cgit v1.2.3 From e5d04887196ee30423c79e52043d418e04012954 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Sep 2012 16:03:26 +0400 Subject: CIFS: Fix possible memory leaks in SMB2 code and add missed increments of failed async read and write requests. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 1572abefb378..a7db95f4760c 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1218,13 +1218,13 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, iov[0].iov_len = get_rfc1002_length(req) + 4; rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); + rsp = (struct smb2_query_info_rsp *)iov[0].iov_base; + if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); goto qinf_exit; } - rsp = (struct smb2_query_info_rsp *)iov[0].iov_base; - rc = validate_and_copy_buf(le16_to_cpu(rsp->OutputBufferOffset), le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr, min_len, data); @@ -1485,8 +1485,10 @@ smb2_async_readv(struct cifs_readdata *rdata) rc = cifs_call_async(io_parms.tcon->ses->server, &rqst, cifs_readv_receive, smb2_readv_callback, rdata, 0); - if (rc) + if (rc) { kref_put(&rdata->refcount, cifs_readdata_release); + cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE); + } cifs_small_buf_release(buf); return rc; @@ -1643,8 +1645,10 @@ smb2_async_writev(struct cifs_writedata *wdata) rc = cifs_call_async(tcon->ses->server, &rqst, NULL, smb2_writev_callback, wdata, 0); - if (rc) + if (rc) { kref_put(&wdata->refcount, cifs_writedata_release); + cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); + } async_writev_out: cifs_small_buf_release(req); @@ -1700,15 +1704,15 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, rc = SendReceive2(xid, io_parms->tcon->ses, iov, n_vec + 1, &resp_buftype, 0); + rsp = (struct smb2_write_rsp *)iov[0].iov_base; if (rc) { cifs_stats_fail_inc(io_parms->tcon, SMB2_WRITE_HE); cERROR(1, "Send error in write = %d", rc); - } else { - rsp = (struct smb2_write_rsp *)iov[0].iov_base; + } else *nbytes = le32_to_cpu(rsp->DataLength); - free_rsp_buf(resp_buftype, rsp); - } + + free_rsp_buf(resp_buftype, rsp); return rc; } @@ -1828,11 +1832,12 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, inc_rfc1001_len(req, len - 1 /* Buffer */); rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, 0); + rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base; + if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); goto qdir_exit; } - rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base; rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset), le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr, -- cgit v1.2.3 From 52b0c3427e5baf14f1ffb92cf8ae542160ec6a07 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Sep 2012 15:20:27 -0700 Subject: cifs: remove support for CIFS_IOC_CHECKUMOUNT ioctl ...as promised for 3.7. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/ioctl.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'fs') diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 5b3481bd3d96..fd5009d56f9f 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -28,8 +28,6 @@ #include "cifs_debug.h" #include "cifsfs.h" -#define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2) - long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) { struct inode *inode = filep->f_dentry->d_inode; @@ -51,23 +49,6 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) cifs_sb = CIFS_SB(inode->i_sb); switch (command) { - static bool warned = false; - case CIFS_IOC_CHECKUMOUNT: - if (!warned) { - warned = true; - cERROR(1, "the CIFS_IOC_CHECKMOUNT ioctl will " - "be deprecated in 3.7. Please " - "migrate away from the use of " - "umount.cifs"); - } - cFYI(1, "User unmount attempted"); - if (cifs_sb->mnt_uid == current_uid()) - rc = 0; - else { - rc = -EACCES; - cFYI(1, "uids do not match"); - } - break; #ifdef CONFIG_CIFS_POSIX case FS_IOC_GETFLAGS: if (pSMBFile == NULL) -- cgit v1.2.3 From 1b359204901e182b27aa9da432095cbe2cfd2512 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Sep 2012 15:20:27 -0700 Subject: cifs: remove support for deprecated "forcedirectio" and "strictcache" mount options ...and make the default cache=strict as promised for 3.7. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 37 ++++--------------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 59c595e8a1b0..a792282f02f7 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -82,8 +82,7 @@ enum { Opt_serverino, Opt_noserverino, Opt_rwpidforward, Opt_cifsacl, Opt_nocifsacl, Opt_acl, Opt_noacl, Opt_locallease, - Opt_sign, Opt_seal, Opt_direct, - Opt_strictcache, Opt_noac, + Opt_sign, Opt_seal, Opt_noac, Opt_fsc, Opt_mfsymlinks, Opt_multiuser, Opt_sloppy, @@ -160,10 +159,6 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_locallease, "locallease" }, { Opt_sign, "sign" }, { Opt_seal, "seal" }, - { Opt_direct, "direct" }, - { Opt_direct, "directio" }, - { Opt_direct, "forcedirectio" }, - { Opt_strictcache, "strictcache" }, { Opt_noac, "noac" }, { Opt_fsc, "fsc" }, { Opt_mfsymlinks, "mfsymlinks" }, @@ -1105,8 +1100,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, char *string = NULL; char *tmp_end, *value; char delim; - bool cache_specified = false; - static bool cache_warned = false; separator[0] = ','; separator[1] = 0; @@ -1138,6 +1131,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, /* default to using server inode numbers where available */ vol->server_ino = 1; + /* default is to use strict cifs caching semantics */ + vol->strict_io = true; + vol->actimeo = CIFS_DEF_ACTIMEO; /* FIXME: add autonegotiation -- for now, SMB1 is default */ @@ -1321,22 +1317,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, */ vol->seal = 1; break; - case Opt_direct: - cache_specified = true; - vol->direct_io = true; - vol->strict_io = false; - cERROR(1, "The \"directio\" option will be removed in " - "3.7. Please switch to the \"cache=none\" " - "option."); - break; - case Opt_strictcache: - cache_specified = true; - vol->direct_io = false; - vol->strict_io = true; - cERROR(1, "The \"strictcache\" option will be removed " - "in 3.7. Please switch to the \"cache=strict\" " - "option."); - break; case Opt_noac: printk(KERN_WARNING "CIFS: Mount option noac not " "supported. Instead set " @@ -1771,7 +1751,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto cifs_parse_mount_err; break; case Opt_cache: - cache_specified = true; string = match_strdup(args); if (string == NULL) goto out_nomem; @@ -1822,14 +1801,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " "specified with no gid= option.\n"); - /* FIXME: remove this block in 3.7 */ - if (!cache_specified && !cache_warned) { - cache_warned = true; - printk(KERN_NOTICE "CIFS: no cache= option specified, using " - "\"cache=loose\". This default will change " - "to \"cache=strict\" in 3.7.\n"); - } - kfree(mountdata_copy); return 0; -- cgit v1.2.3 From 142e5460a66edbfe881474eb422e86ff61c4bfc7 Mon Sep 17 00:00:00 2001 From: Jaeden Amero Date: Mon, 24 Sep 2012 10:39:45 -0500 Subject: compat_ioctl: Avoid using undefined RS-485 IOCTLs Wrap the use of TIOCSRS485 and TIOCGRS485 in #ifdef so that we avoid adding undefined IOCTLs to the ioctl pointer list as compatible ioctls. This change was motivated by a build error on a MIPS build. tree: git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git tty-next head: ac57e7f38ea6fe7358cd0b7a2f2d21aef5ab70cd commit: 84c3b84860440a9e3a3666c14112f41311b8f623 [10/16] compat_ioctl: Add RS-485 IOCTLs to the list config: mips-fuloong2e_defconfig All related error/warning messages: fs/compat_ioctl.c:869:1: error: 'TIOCSRS485' undeclared here (not in a function) fs/compat_ioctl.c:870:1: error: 'TIOCGRS485' undeclared here (not in a function) vim +869 fs/compat_ioctl.c 863 COMPATIBLE_IOCTL(TIOCSPGRP) 864 COMPATIBLE_IOCTL(TIOCGPGRP) 865 COMPATIBLE_IOCTL(TIOCGPTN) 866 COMPATIBLE_IOCTL(TIOCSPTLCK) 867 COMPATIBLE_IOCTL(TIOCSERGETLSR) 868 COMPATIBLE_IOCTL(TIOCSIG) > 869 COMPATIBLE_IOCTL(TIOCSRS485) 870 COMPATIBLE_IOCTL(TIOCGRS485) 871 #ifdef TCGETS2 872 COMPATIBLE_IOCTL(TCGETS2) Reported-by: Fengguang Wu Reported-by: Stephen Rothwell Signed-off-by: Jaeden Amero Signed-off-by: Greg Kroah-Hartman --- fs/compat_ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 85dfebfe6820..59f8db4a39a7 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -866,8 +866,12 @@ COMPATIBLE_IOCTL(TIOCGPTN) COMPATIBLE_IOCTL(TIOCSPTLCK) COMPATIBLE_IOCTL(TIOCSERGETLSR) COMPATIBLE_IOCTL(TIOCSIG) +#ifdef TIOCSRS485 COMPATIBLE_IOCTL(TIOCSRS485) +#endif +#ifdef TIOCGRS485 COMPATIBLE_IOCTL(TIOCGRS485) +#endif #ifdef TCGETS2 COMPATIBLE_IOCTL(TCGETS2) COMPATIBLE_IOCTL(TCSETS2) -- cgit v1.2.3 From 7f1468d1d50d368097ab13596dc08eaba7eace7f Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Tue, 25 Sep 2012 23:19:25 -0400 Subject: ext4: fix double unlock buffer mess during fs-resize bh_submit_read() is responsible for unlock bh on endio. In addition, we need to use bh_uptodate_or_lock() to avoid races. Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9f821ce39800..f21fdbf5c75d 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1181,17 +1181,12 @@ static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) struct buffer_head *bh = sb_getblk(sb, block); if (!bh) return NULL; - - if (bitmap_uptodate(bh)) - return bh; - - lock_buffer(bh); - if (bh_submit_read(bh) < 0) { - unlock_buffer(bh); - brelse(bh); - return NULL; + if (!bh_uptodate_or_lock(bh)) { + if (bh_submit_read(bh) < 0) { + brelse(bh); + return NULL; + } } - unlock_buffer(bh); return bh; } -- cgit v1.2.3 From 0acdb8876fead922c9ffa6768c5675a37485c48c Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 26 Sep 2012 00:08:57 -0400 Subject: ext4: don't call update_backups() multiple times for the same bg When performing an online resize, we add a bunch of groups at one time in ext4_flex_group_add, so in most cases a lot of group descriptors will be in the same group block. But in the end of this function, update_backups will be called for every group descriptor and the same block will be copied and journalled again and again. It is really a waste. Fix things so we only update a particular bg descriptor block once and skip subsequent updates of the same block. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f21fdbf5c75d..7a75e1086961 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1460,6 +1460,7 @@ exit_journal: EXT4_DESC_PER_BLOCK(sb)); int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + sector_t old_gdb = 0; update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, sizeof(struct ext4_super_block), 0); @@ -1467,8 +1468,11 @@ exit_journal: struct buffer_head *gdb_bh; gdb_bh = sbi->s_group_desc[gdb_num]; + if (old_gdb == gdb_bh->b_blocknr) + continue; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, gdb_bh->b_size, meta_bg); + old_gdb = gdb_bh->b_blocknr; } } exit: -- cgit v1.2.3 From 03bd8b9b896c8e940f282f540e6b4de90d666b7c Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 26 Sep 2012 12:32:19 -0400 Subject: ext4: move_extent code cleanup - Remove usless checks, because it is too late to check that inode != NULL at the moment it was referenced several times. - Double lock routines looks very ugly and locking ordering relays on order of i_ino, but other kernel code rely on order of pointers. Let's make them simple and clean. - check that inodes belongs to the same SB as soon as possible. Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/move_extent.c | 167 ++++++++++++++------------------------------------ 1 file changed, 47 insertions(+), 120 deletions(-) (limited to 'fs') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index c5826c623e7a..df5cde5130c5 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -140,56 +140,22 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, return 1; } -/** - * mext_check_null_inode - NULL check for two inodes - * - * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. - */ -static int -mext_check_null_inode(struct inode *inode1, struct inode *inode2, - const char *function, unsigned int line) -{ - int ret = 0; - - if (inode1 == NULL) { - __ext4_error(inode2->i_sb, function, line, - "Both inodes should not be NULL: " - "inode1 NULL inode2 %lu", inode2->i_ino); - ret = -EIO; - } else if (inode2 == NULL) { - __ext4_error(inode1->i_sb, function, line, - "Both inodes should not be NULL: " - "inode1 %lu inode2 NULL", inode1->i_ino); - ret = -EIO; - } - return ret; -} - /** * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem * - * @orig_inode: original inode structure - * @donor_inode: donor inode structure - * Acquire write lock of i_data_sem of the two inodes (orig and donor) by - * i_ino order. + * Acquire write lock of i_data_sem of the two inodes */ static void -double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) +double_down_write_data_sem(struct inode *first, struct inode *second) { - struct inode *first = orig_inode, *second = donor_inode; + if (first < second) { + down_write(&EXT4_I(first)->i_data_sem); + down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); + } else { + down_write(&EXT4_I(second)->i_data_sem); + down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING); - /* - * Use the inode number to provide the stable locking order instead - * of its address, because the C language doesn't guarantee you can - * compare pointers that don't come from the same array. - */ - if (donor_inode->i_ino < orig_inode->i_ino) { - first = donor_inode; - second = orig_inode; } - - down_write(&EXT4_I(first)->i_data_sem); - down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); } /** @@ -969,14 +935,6 @@ mext_check_arguments(struct inode *orig_inode, return -EINVAL; } - /* Files should be in the same ext4 FS */ - if (orig_inode->i_sb != donor_inode->i_sb) { - ext4_debug("ext4 move extent: The argument files " - "should be in same FS [ino:orig %lu, donor %lu]\n", - orig_inode->i_ino, donor_inode->i_ino); - return -EINVAL; - } - /* Ext4 move extent supports only extent based file */ if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { ext4_debug("ext4 move extent: orig file is not extents " @@ -1072,35 +1030,19 @@ mext_check_arguments(struct inode *orig_inode, * @inode1: the inode structure * @inode2: the inode structure * - * Lock two inodes' i_mutex by i_ino order. - * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. + * Lock two inodes' i_mutex */ -static int +static void mext_inode_double_lock(struct inode *inode1, struct inode *inode2) { - int ret = 0; - - BUG_ON(inode1 == NULL && inode2 == NULL); - - ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); - if (ret < 0) - goto out; - - if (inode1 == inode2) { - mutex_lock(&inode1->i_mutex); - goto out; - } - - if (inode1->i_ino < inode2->i_ino) { + BUG_ON(inode1 == inode2); + if (inode1 < inode2) { mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); } else { mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); } - -out: - return ret; } /** @@ -1109,28 +1051,13 @@ out: * @inode1: the inode that is released first * @inode2: the inode that is released second * - * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. */ -static int +static void mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) { - int ret = 0; - - BUG_ON(inode1 == NULL && inode2 == NULL); - - ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); - if (ret < 0) - goto out; - - if (inode1) - mutex_unlock(&inode1->i_mutex); - - if (inode2 && inode2 != inode1) - mutex_unlock(&inode2->i_mutex); - -out: - return ret; + mutex_unlock(&inode1->i_mutex); + mutex_unlock(&inode2->i_mutex); } /** @@ -1187,16 +1114,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; ext4_lblk_t rest_blocks; pgoff_t orig_page_offset = 0, seq_end_page; - int ret1, ret2, depth, last_extent = 0; + int ret, depth, last_extent = 0; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; int data_offset_in_page; int block_len_in_page; int uninit; - /* orig and donor should be different file */ - if (orig_inode->i_ino == donor_inode->i_ino) { + if (orig_inode->i_sb != donor_inode->i_sb) { + ext4_debug("ext4 move extent: The argument files " + "should be in same FS [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + + /* orig and donor should be different inodes */ + if (orig_inode == donor_inode) { ext4_debug("ext4 move extent: The argument files should not " - "be same file [ino:orig %lu, donor %lu]\n", + "be same inode [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } @@ -1210,16 +1144,14 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, } /* Protect orig and donor inodes against a truncate */ - ret1 = mext_inode_double_lock(orig_inode, donor_inode); - if (ret1 < 0) - return ret1; + mext_inode_double_lock(orig_inode, donor_inode); /* Protect extent tree against block allocations via delalloc */ double_down_write_data_sem(orig_inode, donor_inode); /* Check the filesystem environment whether move_extent can be done */ - ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, + ret = mext_check_arguments(orig_inode, donor_inode, orig_start, donor_start, &len); - if (ret1) + if (ret) goto out; file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; @@ -1227,13 +1159,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, if (file_end < block_end) len -= block_end - file_end; - ret1 = get_ext_path(orig_inode, block_start, &orig_path); - if (ret1) + ret = get_ext_path(orig_inode, block_start, &orig_path); + if (ret) goto out; /* Get path structure to check the hole */ - ret1 = get_ext_path(orig_inode, block_start, &holecheck_path); - if (ret1) + ret = get_ext_path(orig_inode, block_start, &holecheck_path); + if (ret) goto out; depth = ext_depth(orig_inode); @@ -1252,13 +1184,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, last_extent = mext_next_extent(orig_inode, holecheck_path, &ext_cur); if (last_extent < 0) { - ret1 = last_extent; + ret = last_extent; goto out; } last_extent = mext_next_extent(orig_inode, orig_path, &ext_dummy); if (last_extent < 0) { - ret1 = last_extent; + ret = last_extent; goto out; } seq_start = le32_to_cpu(ext_cur->ee_block); @@ -1272,7 +1204,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, if (le32_to_cpu(ext_cur->ee_block) > block_end) { ext4_debug("ext4 move extent: The specified range of file " "may be the hole\n"); - ret1 = -EINVAL; + ret = -EINVAL; goto out; } @@ -1292,7 +1224,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, last_extent = mext_next_extent(orig_inode, holecheck_path, &ext_cur); if (last_extent < 0) { - ret1 = last_extent; + ret = last_extent; break; } add_blocks = ext4_ext_get_actual_len(ext_cur); @@ -1349,18 +1281,18 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, orig_page_offset, data_offset_in_page, block_len_in_page, uninit, - &ret1); + &ret); /* Count how many blocks we have exchanged */ *moved_len += block_len_in_page; - if (ret1 < 0) + if (ret < 0) break; if (*moved_len > len) { EXT4_ERROR_INODE(orig_inode, "We replaced blocks too much! " "sum of replaced: %llu requested: %llu", *moved_len, len); - ret1 = -EIO; + ret = -EIO; break; } @@ -1374,22 +1306,22 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, } double_down_write_data_sem(orig_inode, donor_inode); - if (ret1 < 0) + if (ret < 0) break; /* Decrease buffer counter */ if (holecheck_path) ext4_ext_drop_refs(holecheck_path); - ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path); - if (ret1) + ret = get_ext_path(orig_inode, seq_start, &holecheck_path); + if (ret) break; depth = holecheck_path->p_depth; /* Decrease buffer counter */ if (orig_path) ext4_ext_drop_refs(orig_path); - ret1 = get_ext_path(orig_inode, seq_start, &orig_path); - if (ret1) + ret = get_ext_path(orig_inode, seq_start, &orig_path); + if (ret) break; ext_cur = holecheck_path[depth].p_ext; @@ -1412,12 +1344,7 @@ out: kfree(holecheck_path); } double_up_write_data_sem(orig_inode, donor_inode); - ret2 = mext_inode_double_unlock(orig_inode, donor_inode); + mext_inode_double_unlock(orig_inode, donor_inode); - if (ret1) - return ret1; - else if (ret2) - return ret2; - - return 0; + return ret; } -- cgit v1.2.3 From f066055a3449f0e5b0ae4f3ceab4445bead47638 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 26 Sep 2012 12:32:54 -0400 Subject: ext4: online defrag is not supported for journaled files Proper block swap for inodes with full journaling enabled is truly non obvious task. In order to be on a safe side let's explicitly disable it for now. Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/move_extent.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index df5cde5130c5..e2016f34b58f 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -1142,7 +1142,12 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } - + /* TODO: This is non obvious task to swap blocks for inodes with full + jornaling enabled */ + if (ext4_should_journal_data(orig_inode) || + ext4_should_journal_data(donor_inode)) { + return -EINVAL; + } /* Protect orig and donor inodes against a truncate */ mext_inode_double_lock(orig_inode, donor_inode); -- cgit v1.2.3 From bb5574880574fea38c674942cf0360253a2d60fe Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 26 Sep 2012 12:52:07 -0400 Subject: ext4: clean up online defrag bugs in move_extent_per_page() Non-full list of bugs: 1) uninitialized extent optimization does not hold page's lock, and simply replace brunches after that writeback code goes crazy because block mapping changed under it's feets kernel BUG at fs/ext4/inode.c:1434! ( 288'th xfstress) 2) uninitialized extent may became initialized right after we drop i_data_sem, so extent state must be rechecked 3) Locked pages goes uptodate via following sequence: ->readpage(page); lock_page(page); use_that_page(page) But after readpage() one may invalidate it because it is uptodate and unlocked (reclaimer does that) As result kernel bug at include/linux/buffer_head.c:133! 4) We call write_begin() with already opened stansaction which result in following deadlock: ->move_extent_per_page() ->ext4_journal_start()-> hold journal transaction ->write_begin() ->ext4_da_write_begin() ->ext4_nonda_switch() ->writeback_inodes_sb_if_idle() --> will wait for journal_stop() 5) try_to_release_page() may fail and it does fail if one of page's bh was pinned by journal 6) If we about to change page's mapping we MUST hold it's lock during entire remapping procedure, this is true for both pages(original and donor one) Fixes: - Avoid (1) and (2) simply by temproraly drop uninitialized extent handling optimization, this will be reimplemented later. - Fix (3) by manually forcing page to uptodate state w/o dropping it's lock - Fix (4) by rearranging existing locking: from: journal_start(); ->write_begin to: write_begin(); journal_extend() - Fix (5) simply by checking retvalue - Fix (6) by locking both (original and donor one) pages during extent swap with help of mext_page_double_lock() Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/move_extent.c | 253 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 178 insertions(+), 75 deletions(-) (limited to 'fs') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index e2016f34b58f..c87a746450e5 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -735,6 +735,118 @@ out: return replaced_count; } +/** + * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2 + * + * @inode1: the inode structure + * @inode2: the inode structure + * @index: page index + * @page: result page vector + * + * Grab two locked pages for inode's by inode order + */ +static int +mext_page_double_lock(struct inode *inode1, struct inode *inode2, + pgoff_t index, struct page *page[2]) +{ + struct address_space *mapping[2]; + unsigned fl = AOP_FLAG_NOFS; + + BUG_ON(!inode1 || !inode2); + if (inode1 < inode2) { + mapping[0] = inode1->i_mapping; + mapping[1] = inode2->i_mapping; + } else { + mapping[0] = inode2->i_mapping; + mapping[1] = inode1->i_mapping; + } + + page[0] = grab_cache_page_write_begin(mapping[0], index, fl); + if (!page[0]) + return -ENOMEM; + + page[1] = grab_cache_page_write_begin(mapping[1], index, fl); + if (!page[1]) { + unlock_page(page[0]); + page_cache_release(page[0]); + return -ENOMEM; + } + + if (inode1 > inode2) { + struct page *tmp; + tmp = page[0]; + page[0] = page[1]; + page[1] = tmp; + } + return 0; +} + +/* Force page buffers uptodate w/o dropping page's lock */ +static int +mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + sector_t block; + struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; + unsigned int blocksize, block_start, block_end; + int i, err, nr = 0, partial = 0; + BUG_ON(!PageLocked(page)); + BUG_ON(PageWriteback(page)); + + if (PageUptodate(page)) + return 0; + + blocksize = 1 << inode->i_blkbits; + if (!page_has_buffers(page)) + create_empty_buffers(page, blocksize, 0); + + head = page_buffers(page); + block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + for (bh = head, block_start = 0; bh != head || !block_start; + block++, block_start = block_end, bh = bh->b_this_page) { + block_end = block_start + blocksize; + if (block_end <= from || block_start >= to) { + if (!buffer_uptodate(bh)) + partial = 1; + continue; + } + if (buffer_uptodate(bh)) + continue; + if (!buffer_mapped(bh)) { + int err = 0; + err = ext4_get_block(inode, block, bh, 0); + if (err) { + SetPageError(page); + return err; + } + if (!buffer_mapped(bh)) { + zero_user(page, block_start, blocksize); + if (!err) + set_buffer_uptodate(bh); + continue; + } + } + BUG_ON(nr >= MAX_BUF_PER_PAGE); + arr[nr++] = bh; + } + /* No io required */ + if (!nr) + goto out; + + for (i = 0; i < nr; i++) { + bh = arr[i]; + if (!bh_uptodate_or_lock(bh)) { + err = bh_submit_read(bh); + if (err) + return err; + } + } +out: + if (!partial) + SetPageUptodate(page); + return 0; +} + /** * move_extent_per_page - Move extent data per page * @@ -757,26 +869,24 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, int block_len_in_page, int uninit, int *err) { struct inode *orig_inode = o_filp->f_dentry->d_inode; - struct address_space *mapping = orig_inode->i_mapping; - struct buffer_head *bh; - struct page *page = NULL; - const struct address_space_operations *a_ops = mapping->a_ops; + struct page *pagep[2] = {NULL, NULL}; handle_t *handle; ext4_lblk_t orig_blk_offset; long long offs = orig_page_offset << PAGE_CACHE_SHIFT; unsigned long blocksize = orig_inode->i_sb->s_blocksize; unsigned int w_flags = 0; unsigned int tmp_data_size, data_size, replaced_size; - void *fsdata; - int i, jblocks; - int err2 = 0; + int err2, jblocks, retries = 0; int replaced_count = 0; + int from = data_offset_in_page << orig_inode->i_blkbits; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; /* * It needs twice the amount of ordinary journal buffers because * inode and donor_inode may change each different metadata blocks. */ +again: + *err = 0; jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; handle = ext4_journal_start(orig_inode, jblocks); if (IS_ERR(handle)) { @@ -790,19 +900,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, orig_blk_offset = orig_page_offset * blocks_per_page + data_offset_in_page; - /* - * If orig extent is uninitialized one, - * it's not necessary force the page into memory - * and then force it to be written out again. - * Just swap data blocks between orig and donor. - */ - if (uninit) { - replaced_count = mext_replace_branches(handle, orig_inode, - donor_inode, orig_blk_offset, - block_len_in_page, err); - goto out2; - } - offs = (long long)orig_blk_offset << orig_inode->i_blkbits; /* Calculate data_size */ @@ -824,75 +921,81 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, replaced_size = data_size; - *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, - &page, &fsdata); + *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset, + pagep); if (unlikely(*err < 0)) - goto out; + goto stop_journal; - if (!PageUptodate(page)) { - mapping->a_ops->readpage(o_filp, page); - lock_page(page); + *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size); + if (*err) + goto unlock_pages; + + /* At this point all buffers in range are uptodate, old mapping layout + * is no longer required, try to drop it now. */ + if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) || + (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) { + *err = -EBUSY; + goto unlock_pages; } - /* - * try_to_release_page() doesn't call releasepage in writeback mode. - * We should care about the order of writing to the same file - * by multiple move extent processes. - * It needs to call wait_on_page_writeback() to wait for the - * writeback of the page. - */ - wait_on_page_writeback(page); - - /* Release old bh and drop refs */ - try_to_release_page(page, 0); - replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, - orig_blk_offset, block_len_in_page, - &err2); - if (err2) { + orig_blk_offset, + block_len_in_page, err); + if (*err) { if (replaced_count) { block_len_in_page = replaced_count; replaced_size = block_len_in_page << orig_inode->i_blkbits; } else - goto out; + goto unlock_pages; } + /* Perform all necessary steps similar write_begin()/write_end() + * but keeping in mind that i_size will not change */ + *err = __block_write_begin(pagep[0], from, from + replaced_size, + ext4_get_block); + if (!*err) + *err = block_commit_write(pagep[0], from, from + replaced_size); - if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); - - bh = page_buffers(page); - for (i = 0; i < data_offset_in_page; i++) - bh = bh->b_this_page; - - for (i = 0; i < block_len_in_page; i++) { - *err = ext4_get_block(orig_inode, - (sector_t)(orig_blk_offset + i), bh, 0); - if (*err < 0) - goto out; - - if (bh->b_this_page != NULL) - bh = bh->b_this_page; - } - - *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size, - page, fsdata); - page = NULL; - -out: - if (unlikely(page)) { - if (PageLocked(page)) - unlock_page(page); - page_cache_release(page); - ext4_journal_stop(handle); - } -out2: + if (unlikely(*err < 0)) + goto repair_branches; + + /* Even in case of data=writeback it is reasonable to pin + * inode to transaction, to prevent unexpected data loss */ + *err = ext4_jbd2_file_inode(handle, orig_inode); + +unlock_pages: + unlock_page(pagep[0]); + page_cache_release(pagep[0]); + unlock_page(pagep[1]); + page_cache_release(pagep[1]); +stop_journal: ext4_journal_stop(handle); - - if (err2) - *err = err2; - + /* Buffer was busy because probably is pinned to journal transaction, + * force transaction commit may help to free it. */ + if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb, + &retries)) + goto again; return replaced_count; + +repair_branches: + /* + * This should never ever happen! + * Extents are swapped already, but we are not able to copy data. + * Try to swap extents to it's original places + */ + double_down_write_data_sem(orig_inode, donor_inode); + replaced_count = mext_replace_branches(handle, donor_inode, orig_inode, + orig_blk_offset, + block_len_in_page, &err2); + double_up_write_data_sem(orig_inode, donor_inode); + if (replaced_count != block_len_in_page) { + EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), + "Unable to copy data block," + " data will be lost."); + *err = -EIO; + } + replaced_count = 0; + goto unlock_pages; } /** -- cgit v1.2.3 From 8c85447391735469f407add6fdb0630ce59d7f6d Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 26 Sep 2012 12:54:52 -0400 Subject: ext4: reimplement uninit extent optimization for move_extent_per_page() Uninitialized extent may became initialized(parallel writeback task) at any moment after we drop i_data_sem, so we have to recheck extent's state after we hold page's lock and i_data_sem. If we about to change page's mapping we must hold page's lock in order to serialize other users. Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/move_extent.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index c87a746450e5..c2e47da7c2ba 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -594,6 +594,43 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, return 0; } +/** + * mext_check_coverage - Check that all extents in range has the same type + * + * @inode: inode in question + * @from: block offset of inode + * @count: block count to be checked + * @uninit: extents expected to be uninitialized + * @err: pointer to save error value + * + * Return 1 if all extents in range has expected type, and zero otherwise. + */ +static int +mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, + int uninit, int *err) +{ + struct ext4_ext_path *path = NULL; + struct ext4_extent *ext; + ext4_lblk_t last = from + count; + while (from < last) { + *err = get_ext_path(inode, from, &path); + if (*err) + return 0; + ext = path[ext_depth(inode)].p_ext; + if (!ext) { + ext4_ext_drop_refs(path); + return 0; + } + if (uninit != ext4_ext_is_uninitialized(ext)) { + ext4_ext_drop_refs(path); + return 0; + } + from += ext4_ext_get_actual_len(ext); + ext4_ext_drop_refs(path); + } + return 1; +} + /** * mext_replace_branches - Replace original extents with new extents * @@ -629,9 +666,6 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, int replaced_count = 0; int dext_alen; - /* Protect extent trees against block allocations via delalloc */ - double_down_write_data_sem(orig_inode, donor_inode); - /* Get the original extent for the block "orig_off" */ *err = get_ext_path(orig_inode, orig_off, &orig_path); if (*err) @@ -730,8 +764,6 @@ out: ext4_ext_invalidate_cache(orig_inode); ext4_ext_invalidate_cache(donor_inode); - double_up_write_data_sem(orig_inode, donor_inode); - return replaced_count; } @@ -925,7 +957,46 @@ again: pagep); if (unlikely(*err < 0)) goto stop_journal; + /* + * If orig extent was uninitialized it can become initialized + * at any time after i_data_sem was dropped, in order to + * serialize with delalloc we have recheck extent while we + * hold page's lock, if it is still the case data copy is not + * necessary, just swap data blocks between orig and donor. + */ + if (uninit) { + double_down_write_data_sem(orig_inode, donor_inode); + /* If any of extents in range became initialized we have to + * fallback to data copying */ + uninit = mext_check_coverage(orig_inode, orig_blk_offset, + block_len_in_page, 1, err); + if (*err) + goto drop_data_sem; + uninit &= mext_check_coverage(donor_inode, orig_blk_offset, + block_len_in_page, 1, err); + if (*err) + goto drop_data_sem; + + if (!uninit) { + double_up_write_data_sem(orig_inode, donor_inode); + goto data_copy; + } + if ((page_has_private(pagep[0]) && + !try_to_release_page(pagep[0], 0)) || + (page_has_private(pagep[1]) && + !try_to_release_page(pagep[1], 0))) { + *err = -EBUSY; + goto drop_data_sem; + } + replaced_count = mext_replace_branches(handle, orig_inode, + donor_inode, orig_blk_offset, + block_len_in_page, err); + drop_data_sem: + double_up_write_data_sem(orig_inode, donor_inode); + goto unlock_pages; + } +data_copy: *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size); if (*err) goto unlock_pages; -- cgit v1.2.3 From 4f2b86aba87a2654a1258ffe09c22ce70ab69d60 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 24 Sep 2012 09:33:40 -0400 Subject: cifs: change DOS/NT/POSIX mapping of ERRnoresource ERRnoresource is an ERRSRV level (aka server-side) error and means "No resources currently available for request". Currently that maps to POSIX -ENOBUFS. No NT errors map to it currently. NT_STATUS_INSUFFICIENT_RESOURCES and NT_STATUS_INSUFF_SERVER_RESOURCES are also similar in meaning. Currently the client maps those to ERRnomem, which maps to -ENOMEM in POSIX. All of these mappings seem to be quite wrong to me and are confusing for users. All of the above errors indicate problems on the server, not the client. Reporting -ENOMEM or -ENOBUFS implies that the client is running out of resources. This patch changes those mappings. The NT_* errors are changed to map to the SRV level ERRnoresource. That error is in turn changed to return -EREMOTEIO which is the only POSIX error I could find that conveys that something went wrong on the server. While we're at it, change the SMB2 equivalent error to return the same. Signed-off-by: Jeff Layton Acked-by: Suresh Jayaraman Signed-off-by: Steve French --- fs/cifs/netmisc.c | 6 +++--- fs/cifs/smb2maperror.c | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index e7bab3be5cf9..d5ce9e26696c 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -110,7 +110,7 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = { {ERRnoroom, -ENOSPC}, {ERRrmuns, -EUSERS}, {ERRtimeout, -ETIME}, - {ERRnoresource, -ENOBUFS}, + {ERRnoresource, -EREMOTEIO}, {ERRtoomanyuids, -EUSERS}, {ERRbaduid, -EACCES}, {ERRusempx, -EIO}, @@ -412,7 +412,7 @@ static const struct { from NT_STATUS_INSUFFICIENT_RESOURCES to NT_STATUS_INSUFF_SERVER_RESOURCES during the session setup } */ { - ERRDOS, ERRnomem, NT_STATUS_INSUFFICIENT_RESOURCES}, { + ERRDOS, ERRnoresource, NT_STATUS_INSUFFICIENT_RESOURCES}, { ERRDOS, ERRbadpath, NT_STATUS_DFS_EXIT_PATH_FOUND}, { ERRDOS, 23, NT_STATUS_DEVICE_DATA_ERROR}, { ERRHRD, ERRgeneral, NT_STATUS_DEVICE_NOT_CONNECTED}, { @@ -682,7 +682,7 @@ static const struct { ERRHRD, ERRgeneral, NT_STATUS_NO_USER_SESSION_KEY}, { ERRDOS, 59, NT_STATUS_USER_SESSION_DELETED}, { ERRHRD, ERRgeneral, NT_STATUS_RESOURCE_LANG_NOT_FOUND}, { - ERRDOS, ERRnomem, NT_STATUS_INSUFF_SERVER_RESOURCES}, { + ERRDOS, ERRnoresource, NT_STATUS_INSUFF_SERVER_RESOURCES}, { ERRHRD, ERRgeneral, NT_STATUS_INVALID_BUFFER_SIZE}, { ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_COMPONENT}, { ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_WILDCARD}, { diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index eaf5466d4041..494c912c76fe 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -453,7 +453,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_FILE_INVALID, -EIO, "STATUS_FILE_INVALID"}, {STATUS_ALLOTTED_SPACE_EXCEEDED, -EIO, "STATUS_ALLOTTED_SPACE_EXCEEDED"}, - {STATUS_INSUFFICIENT_RESOURCES, -EIO, "STATUS_INSUFFICIENT_RESOURCES"}, + {STATUS_INSUFFICIENT_RESOURCES, -EREMOTEIO, + "STATUS_INSUFFICIENT_RESOURCES"}, {STATUS_DFS_EXIT_PATH_FOUND, -EIO, "STATUS_DFS_EXIT_PATH_FOUND"}, {STATUS_DEVICE_DATA_ERROR, -EIO, "STATUS_DEVICE_DATA_ERROR"}, {STATUS_DEVICE_NOT_CONNECTED, -EIO, "STATUS_DEVICE_NOT_CONNECTED"}, -- cgit v1.2.3 From c3a58fecdd1934a8538ada9073107625f5151687 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Fri, 17 Aug 2012 18:19:38 -0300 Subject: Make inode64 a remountable option Actually, there is no reason about why a user must umount and mount a XFS filesystem to enable 'inode64' option. So, this patch makes this a remountable option. Signed-off-by: Carlos Maiolino Reviewed-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 19e2380fb867..c416a01fcb14 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -120,12 +120,13 @@ mempool_t *xfs_ioend_pool; * in the future, too. */ enum { - Opt_barrier, Opt_nobarrier, Opt_err + Opt_barrier, Opt_nobarrier, Opt_inode64, Opt_err }; static const match_table_t tokens = { {Opt_barrier, "barrier"}, {Opt_nobarrier, "nobarrier"}, + {Opt_inode64, "inode64"}, {Opt_err, NULL} }; @@ -1031,6 +1032,30 @@ xfs_restore_resvblks(struct xfs_mount *mp) xfs_reserve_blocks(mp, &resblks, NULL); } +STATIC void +xfs_set_inode64(struct xfs_mount *mp) +{ + int i = 0; + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, i); + pag->pagi_inodeok = 1; + pag->pagf_metadata = 0; + xfs_perag_put(pag); + } + + /* There is no need for lock protection on m_flags, + * the rw_semaphore of the VFS superblock is locked + * during mount/umount/remount operations, so this is + * enough to avoid concurency on the m_flags field + */ + mp->m_flags &= ~(XFS_MOUNT_32BITINODES | + XFS_MOUNT_SMALL_INUMS); + mp->m_maxagi = i; +} + STATIC int xfs_fs_remount( struct super_block *sb, @@ -1056,6 +1081,9 @@ xfs_fs_remount( case Opt_nobarrier: mp->m_flags &= ~XFS_MOUNT_BARRIER; break; + case Opt_inode64: + xfs_set_inode64(mp); + break; default: /* * Logically we would return an error here to prevent -- cgit v1.2.3 From 8aea3ff411b2ce8fe7b46644298ed243a920eb24 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 20 Sep 2012 10:32:36 -0300 Subject: xfs: Fix m_agirotor reset during AG selection xfs_ialloc_next_ag() currently resets m_agirotor when it is equal to m_maxagi: if (++mp->m_agirotor == mp->m_maxagi) mp->m_agirotor = 0; But, if for some reason mp->m_maxagi changes to a lower value than current m_agirotor, this condition will never be true, causing m_agirotor to exceed the maximum allowed value (m_maxagi). This implies mainly during lookups for xfs_perag structs in its radix tree, since the agno value used for the lookup is based on m_agirotor. An out-of-range m_agirotor may cause a lookup failure which in case will return NULL. As an example, the value of m_maxagi is decreased during inode64->inode32 remount process, case where I've found this problem. Signed-off-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_ialloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 5aceb3f8ecd6..445bf1aef31c 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -431,7 +431,7 @@ xfs_ialloc_next_ag( spin_lock(&mp->m_agirotor_lock); agno = mp->m_agirotor; - if (++mp->m_agirotor == mp->m_maxagi) + if (++mp->m_agirotor >= mp->m_maxagi) mp->m_agirotor = 0; spin_unlock(&mp->m_agirotor_lock); -- cgit v1.2.3 From 08bf540412ed82a15cb9068249ad49b410a7b082 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 20 Sep 2012 10:32:37 -0300 Subject: xfs: make inode64 as the default allocation mode since 64-bit inodes can be accessed while using inode32, and these can also be used on 32-bit kernels, there is no reason to still keep inode32 as the default mount option. If the filesystem cannot handle 64bit inode numbers (i.e CONFIG_LBDAF is not enabled and BITS_PER_LONG == 32), XFS_MOUNT_SMALL_INUMS will still be set by default, so inode64 is not an unconditional default value. Signed-off-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c416a01fcb14..996257d36fd1 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -88,6 +88,8 @@ mempool_t *xfs_ioend_pool; * unwritten extent conversion */ #define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ #define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ +#define MNTOPT_32BITINODE "inode32" /* inode allocation limited to + * XFS_MAXINUMBER_32 */ #define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ #define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ #define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ @@ -198,7 +200,9 @@ xfs_parseargs( */ mp->m_flags |= XFS_MOUNT_BARRIER; mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; +#if !XFS_BIG_INUMS mp->m_flags |= XFS_MOUNT_SMALL_INUMS; +#endif /* * These can be overridden by the mount option parsing. @@ -295,6 +299,8 @@ xfs_parseargs( return EINVAL; } dswidth = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_32BITINODE)) { + mp->m_flags |= XFS_MOUNT_SMALL_INUMS; } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; #if !XFS_BIG_INUMS @@ -493,6 +499,7 @@ xfs_showargs( { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, + { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE }, { 0, NULL } }; static struct proc_xfs_info xfs_info_unset[] = { -- cgit v1.2.3 From 2d2194f61fddab3a9731b6e7a7ae3a4a19dd810c Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 20 Sep 2012 10:32:38 -0300 Subject: xfs: reduce code duplication handling inode32/64 options Add xfs_set_inode32() to be used to enable inode32 allocation mode. this will reduce the amount of duplicated code needed to mount/remount a filesystem with inode32 option. This patch also changes xfs_set_inode64() to return the maximum AG number that inodes can be allocated instead of set mp->m_maxagi by itself, so that the behaviour is the same as xfs_set_inode32(). This simplifies code that calls these functions and needs to know the maximum AG that inodes can be allocated in. Signed-off-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_mount.c | 43 +++----------------------- fs/xfs/xfs_super.c | 89 +++++++++++++++++++++++++++++++++++++++--------------- fs/xfs/xfs_super.h | 2 ++ 3 files changed, 72 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 29c2f83d4147..b2bd3a0e6376 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -440,7 +440,7 @@ xfs_initialize_perag( xfs_agnumber_t agcount, xfs_agnumber_t *maxagi) { - xfs_agnumber_t index, max_metadata; + xfs_agnumber_t index; xfs_agnumber_t first_initialised = 0; xfs_perag_t *pag; xfs_agino_t agino; @@ -500,43 +500,10 @@ xfs_initialize_perag( else mp->m_flags &= ~XFS_MOUNT_32BITINODES; - if (mp->m_flags & XFS_MOUNT_32BITINODES) { - /* - * Calculate how much should be reserved for inodes to meet - * the max inode percentage. - */ - if (mp->m_maxicount) { - __uint64_t icount; - - icount = sbp->sb_dblocks * sbp->sb_imax_pct; - do_div(icount, 100); - icount += sbp->sb_agblocks - 1; - do_div(icount, sbp->sb_agblocks); - max_metadata = icount; - } else { - max_metadata = agcount; - } - - for (index = 0; index < agcount; index++) { - ino = XFS_AGINO_TO_INO(mp, index, agino); - if (ino > XFS_MAXINUMBER_32) { - index++; - break; - } - - pag = xfs_perag_get(mp, index); - pag->pagi_inodeok = 1; - if (index < max_metadata) - pag->pagf_metadata = 1; - xfs_perag_put(pag); - } - } else { - for (index = 0; index < agcount; index++) { - pag = xfs_perag_get(mp, index); - pag->pagi_inodeok = 1; - xfs_perag_put(pag); - } - } + if (mp->m_flags & XFS_MOUNT_32BITINODES) + index = xfs_set_inode32(mp); + else + index = xfs_set_inode64(mp); if (maxagi) *maxagi = index; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 996257d36fd1..d6619d685531 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -599,6 +599,71 @@ xfs_max_file_offset( return (((__uint64_t)pagefactor) << bitshift) - 1; } +xfs_agnumber_t +xfs_set_inode32(struct xfs_mount *mp) +{ + xfs_agnumber_t index = 0; + xfs_sb_t *sbp = &mp->m_sb; + xfs_agnumber_t max_metadata; + xfs_agino_t agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0); + xfs_ino_t ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino); + xfs_perag_t *pag; + + /* Calculate how much should be reserved for inodes to meet + * the max inode percentage. + */ + if (mp->m_maxicount) { + __uint64_t icount; + + icount = sbp->sb_dblocks * sbp->sb_imax_pct; + do_div(icount, 100); + icount += sbp->sb_agblocks - 1; + do_div(icount, sbp->sb_agblocks); + max_metadata = icount; + } else { + max_metadata = sbp->sb_agcount; + } + + for (index = 0; index < sbp->sb_agcount; index++) { + ino = XFS_AGINO_TO_INO(mp, index, agino); + if (ino > XFS_MAXINUMBER_32) { + index++; + break; + } + + pag = xfs_perag_get(mp, index); + pag->pagi_inodeok = 1; + if (index < max_metadata) + pag->pagf_metadata = 1; + xfs_perag_put(pag); + } + return index; +} + +xfs_agnumber_t +xfs_set_inode64(struct xfs_mount *mp) +{ + xfs_agnumber_t index = 0; + + for (index = 0; index < mp->m_sb.sb_agcount; index++) { + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, index); + pag->pagi_inodeok = 1; + pag->pagf_metadata = 0; + xfs_perag_put(pag); + } + + /* There is no need for lock protection on m_flags, + * the rw_semaphore of the VFS superblock is locked + * during mount/umount/remount operations, so this is + * enough to avoid concurency on the m_flags field + */ + mp->m_flags &= ~(XFS_MOUNT_32BITINODES | + XFS_MOUNT_SMALL_INUMS); + return index; +} + STATIC int xfs_blkdev_get( xfs_mount_t *mp, @@ -1039,30 +1104,6 @@ xfs_restore_resvblks(struct xfs_mount *mp) xfs_reserve_blocks(mp, &resblks, NULL); } -STATIC void -xfs_set_inode64(struct xfs_mount *mp) -{ - int i = 0; - - for (i = 0; i < mp->m_sb.sb_agcount; i++) { - struct xfs_perag *pag; - - pag = xfs_perag_get(mp, i); - pag->pagi_inodeok = 1; - pag->pagf_metadata = 0; - xfs_perag_put(pag); - } - - /* There is no need for lock protection on m_flags, - * the rw_semaphore of the VFS superblock is locked - * during mount/umount/remount operations, so this is - * enough to avoid concurency on the m_flags field - */ - mp->m_flags &= ~(XFS_MOUNT_32BITINODES | - XFS_MOUNT_SMALL_INUMS); - mp->m_maxagi = i; -} - STATIC int xfs_fs_remount( struct super_block *sb, diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 09b0c26b2245..9de4a920ba05 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -75,6 +75,8 @@ struct block_device; extern __uint64_t xfs_max_file_offset(unsigned int); extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); +extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *); +extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *); extern const struct export_operations xfs_export_operations; extern const struct xattr_handler *xfs_xattr_handlers[]; -- cgit v1.2.3 From 4c0837224c677db35cd85b04a77504c496cadb66 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 20 Sep 2012 10:32:39 -0300 Subject: xfs: Fix mp->m_maxagi update during inode64 remount With the changes made on xfs_set_inode64(), to make it behave as xfs_set_inode32() (now leaving to the caller the responsibility to update mp->m_maxagi), we use the return value of xfs_set_inode64() to update mp->m_maxagi during remount. Signed-off-by: Carlos Maiolino Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index d6619d685531..aeb03f9a8967 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1130,7 +1130,7 @@ xfs_fs_remount( mp->m_flags &= ~XFS_MOUNT_BARRIER; break; case Opt_inode64: - xfs_set_inode64(mp); + mp->m_maxagi = xfs_set_inode64(mp); break; default: /* -- cgit v1.2.3 From 4056c1d08d2a7c50ae7414db7c1783ba45b4835d Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 20 Sep 2012 10:32:40 -0300 Subject: xfs: add inode64->inode32 transition into xfs_set_inode32() To make inode32 a remountable option, xfs_set_inode32() should be able to make a transition from inode64 option, disabling inode allocation on higher AGs. Signed-off-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index aeb03f9a8967..168d4984ce89 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -603,6 +603,7 @@ xfs_agnumber_t xfs_set_inode32(struct xfs_mount *mp) { xfs_agnumber_t index = 0; + xfs_agnumber_t maxagi = 0; xfs_sb_t *sbp = &mp->m_sb; xfs_agnumber_t max_metadata; xfs_agino_t agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0); @@ -626,18 +627,26 @@ xfs_set_inode32(struct xfs_mount *mp) for (index = 0; index < sbp->sb_agcount; index++) { ino = XFS_AGINO_TO_INO(mp, index, agino); + if (ino > XFS_MAXINUMBER_32) { - index++; - break; + pag = xfs_perag_get(mp, index); + pag->pagi_inodeok = 0; + pag->pagf_metadata = 0; + xfs_perag_put(pag); + continue; } pag = xfs_perag_get(mp, index); pag->pagi_inodeok = 1; + maxagi++; if (index < max_metadata) pag->pagf_metadata = 1; xfs_perag_put(pag); } - return index; + mp->m_flags |= (XFS_MOUNT_32BITINODES | + XFS_MOUNT_SMALL_INUMS); + + return maxagi; } xfs_agnumber_t -- cgit v1.2.3 From 2ea0392983a82f7dc3055568ae0f2558724d119b Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 20 Sep 2012 10:32:41 -0300 Subject: xfs: Make inode32 a remountable option As inode64 is the default option now, and was also made remountable previously, inode32 can also be remounted on-the-fly when it is needed. Signed-off-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 168d4984ce89..d93f2c7364cc 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -122,13 +122,18 @@ mempool_t *xfs_ioend_pool; * in the future, too. */ enum { - Opt_barrier, Opt_nobarrier, Opt_inode64, Opt_err + Opt_barrier, + Opt_nobarrier, + Opt_inode64, + Opt_inode32, + Opt_err }; static const match_table_t tokens = { {Opt_barrier, "barrier"}, {Opt_nobarrier, "nobarrier"}, {Opt_inode64, "inode64"}, + {Opt_inode32, "inode32"}, {Opt_err, NULL} }; @@ -1141,6 +1146,9 @@ xfs_fs_remount( case Opt_inode64: mp->m_maxagi = xfs_set_inode64(mp); break; + case Opt_inode32: + mp->m_maxagi = xfs_set_inode32(mp); + break; default: /* * Logically we would return an error here to prevent -- cgit v1.2.3 From 85556c9a503ababa889bd7bb7a9b3effba795d00 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 26 Sep 2012 20:43:37 -0400 Subject: ext4: use kmem_cache_zalloc instead of kmem_cache_alloc/memset Using kmem_cache_zalloc() instead of kmem_cache_alloc() and memset(). spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bb821a924049..8ec6f88b7a37 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2232,12 +2232,11 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); - meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); + meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL); if (meta_group_info[i] == NULL) { ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); goto exit_group_info; } - memset(meta_group_info[i], 0, kmem_cache_size(cachep)); set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(meta_group_info[i]->bb_state)); @@ -4010,7 +4009,6 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, ext4_get_group_no_and_offset(sb, goal, &group, &block); /* set up allocation goals */ - memset(ac, 0, sizeof(struct ext4_allocation_context)); ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); ac->ac_status = AC_STATUS_CONTINUE; ac->ac_sb = sb; @@ -4293,7 +4291,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, } } - ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS); if (!ac) { ar->len = 0; *errp = -ENOMEM; -- cgit v1.2.3 From 1a7bd2265fc57f29400d57f66275cc5918e30aa6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Aug 2012 17:18:05 -0400 Subject: make get_unused_fd_flags() a function ... and get_unused_fd() a macro around it Signed-off-by: Al Viro --- fs/file.c | 6 +++--- include/linux/file.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index ba3f6053025c..5b46e9970a7a 100644 --- a/fs/file.c +++ b/fs/file.c @@ -477,8 +477,8 @@ out: return error; } -int get_unused_fd(void) +int get_unused_fd_flags(unsigned flags) { - return alloc_fd(0, 0); + return alloc_fd(0, flags); } -EXPORT_SYMBOL(get_unused_fd); +EXPORT_SYMBOL(get_unused_fd_flags); diff --git a/include/linux/file.h b/include/linux/file.h index a22408bac0d0..86795ec6027d 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -33,8 +33,8 @@ extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); extern void set_close_on_exec(unsigned int fd, int flag); extern void put_filp(struct file *); extern int alloc_fd(unsigned start, unsigned flags); -extern int get_unused_fd(void); -#define get_unused_fd_flags(flags) alloc_fd(0, (flags)) +extern int get_unused_fd_flags(unsigned flags); +#define get_unused_fd() get_unused_fd_flags(0) extern void put_unused_fd(unsigned int fd); extern void fd_install(unsigned int fd, struct file *file); -- cgit v1.2.3 From c921b40d6201f7ec7b1edf7ea9a844f93e1a27f4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Aug 2012 18:04:37 -0400 Subject: autofs4: don't open-code fd_install() The only difference between autofs_dev_ioctl_fd_install() and fd_install() is __set_close_on_exec() done by the latter. Just use get_unused_fd_flags(O_CLOEXEC) to allocate the descriptor and be done with that... Signed-off-by: Al Viro --- fs/autofs4/dev-ioctl.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index abf645c1703b..a16214109d31 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -221,20 +221,6 @@ static int test_by_type(struct path *path, void *p) return ino && ino->sbi->type & *(unsigned *)p; } -static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) -{ - struct files_struct *files = current->files; - struct fdtable *fdt; - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - BUG_ON(fdt->fd[fd] != NULL); - rcu_assign_pointer(fdt->fd[fd], file); - __set_close_on_exec(fd, fdt); - spin_unlock(&files->file_lock); -} - - /* * Open a file descriptor on the autofs mount point corresponding * to the given path and device number (aka. new_encode_dev(sb->s_dev)). @@ -243,7 +229,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) { int err, fd; - fd = get_unused_fd(); + fd = get_unused_fd_flags(O_CLOEXEC); if (likely(fd >= 0)) { struct file *filp; struct path path; @@ -264,7 +250,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) goto out; } - autofs_dev_ioctl_fd_install(fd, filp); + fd_install(fd, filp); } return fd; -- cgit v1.2.3 From 5b249b1b07c42c61d0c53b8ab4fad026e39a9be9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Aug 2012 12:17:29 -0400 Subject: pipe(2) - race-free error recovery don't mess with sys_close() if copy_to_user() fails; just postpone fd_install() until we know it hasn't. Signed-off-by: Al Viro --- fs/pipe.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/pipe.c b/fs/pipe.c index 8d85d7068c1e..bd3479db4b62 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1064,9 +1064,8 @@ err_inode: return err; } -int do_pipe_flags(int *fd, int flags) +static int __do_pipe_flags(int *fd, struct file **files, int flags) { - struct file *files[2]; int error; int fdw, fdr; @@ -1088,11 +1087,8 @@ int do_pipe_flags(int *fd, int flags) fdw = error; audit_fd_pair(fdr, fdw); - fd_install(fdr, files[0]); - fd_install(fdw, files[1]); fd[0] = fdr; fd[1] = fdw; - return 0; err_fdr: @@ -1103,21 +1099,38 @@ int do_pipe_flags(int *fd, int flags) return error; } +int do_pipe_flags(int *fd, int flags) +{ + struct file *files[2]; + int error = __do_pipe_flags(fd, files, flags); + if (!error) { + fd_install(fd[0], files[0]); + fd_install(fd[1], files[1]); + } + return error; +} + /* * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way Unix traditionally does this, though. */ SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) { + struct file *files[2]; int fd[2]; int error; - error = do_pipe_flags(fd, flags); + error = __do_pipe_flags(fd, files, flags); if (!error) { - if (copy_to_user(fildes, fd, sizeof(fd))) { - sys_close(fd[0]); - sys_close(fd[1]); + if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) { + fput(files[0]); + fput(files[1]); + put_unused_fd(fd[0]); + put_unused_fd(fd[1]); error = -EFAULT; + } else { + fd_install(fd[0], files[0]); + fd_install(fd[1], files[1]); } } return error; -- cgit v1.2.3 From 352e3b249284235e00745f3e71fc348b913e5deb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Aug 2012 12:30:45 -0400 Subject: fanotify: sanitize failure exits in copy_event_to_user() * do copy_to_user() before prepare_for_access_response(); that kills the need in remove_access_response(). * don't do fd_install() until we are past the last possible failure exit. Don't use sys_close() on cleanup side - just put_unused_fd() and fput(). Less racy that way... Signed-off-by: Al Viro --- fs/notify/fanotify/fanotify_user.c | 59 +++++++++++++------------------------- 1 file changed, 20 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index d43803669739..ea48693940f1 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -58,7 +58,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, return fsnotify_remove_notify_event(group); } -static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) +static int create_fd(struct fsnotify_group *group, + struct fsnotify_event *event, + struct file **file) { int client_fd; struct file *new_file; @@ -98,7 +100,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) put_unused_fd(client_fd); client_fd = PTR_ERR(new_file); } else { - fd_install(client_fd, new_file); + *file = new_file; } return client_fd; @@ -106,13 +108,15 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) static int fill_event_metadata(struct fsnotify_group *group, struct fanotify_event_metadata *metadata, - struct fsnotify_event *event) + struct fsnotify_event *event, + struct file **file) { int ret = 0; pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, group, metadata, event); + *file = NULL; metadata->event_len = FAN_EVENT_METADATA_LEN; metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; @@ -121,7 +125,7 @@ static int fill_event_metadata(struct fsnotify_group *group, if (unlikely(event->mask & FAN_Q_OVERFLOW)) metadata->fd = FAN_NOFD; else { - metadata->fd = create_fd(group, event); + metadata->fd = create_fd(group, event, file); if (metadata->fd < 0) ret = metadata->fd; } @@ -220,25 +224,6 @@ static int prepare_for_access_response(struct fsnotify_group *group, return 0; } -static void remove_access_response(struct fsnotify_group *group, - struct fsnotify_event *event, - __s32 fd) -{ - struct fanotify_response_event *re; - - if (!(event->mask & FAN_ALL_PERM_EVENTS)) - return; - - re = dequeue_re(group, fd); - if (!re) - return; - - BUG_ON(re->event != event); - - kmem_cache_free(fanotify_response_event_cache, re); - - return; -} #else static int prepare_for_access_response(struct fsnotify_group *group, struct fsnotify_event *event, @@ -247,12 +232,6 @@ static int prepare_for_access_response(struct fsnotify_group *group, return 0; } -static void remove_access_response(struct fsnotify_group *group, - struct fsnotify_event *event, - __s32 fd) -{ - return; -} #endif static ssize_t copy_event_to_user(struct fsnotify_group *group, @@ -260,31 +239,33 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, char __user *buf) { struct fanotify_event_metadata fanotify_event_metadata; + struct file *f; int fd, ret; pr_debug("%s: group=%p event=%p\n", __func__, group, event); - ret = fill_event_metadata(group, &fanotify_event_metadata, event); + ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f); if (ret < 0) goto out; fd = fanotify_event_metadata.fd; - ret = prepare_for_access_response(group, event, fd); - if (ret) - goto out_close_fd; - ret = -EFAULT; if (copy_to_user(buf, &fanotify_event_metadata, fanotify_event_metadata.event_len)) - goto out_kill_access_response; + goto out_close_fd; + + ret = prepare_for_access_response(group, event, fd); + if (ret) + goto out_close_fd; + fd_install(fd, f); return fanotify_event_metadata.event_len; -out_kill_access_response: - remove_access_response(group, event, fd); out_close_fd: - if (fd != FAN_NOFD) - sys_close(fd); + if (fd != FAN_NOFD) { + put_unused_fd(fd); + fput(f); + } out: #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (event->mask & FAN_ALL_PERM_EVENTS) { -- cgit v1.2.3 From f33ff9927f42045116d738ee47ff7bc59f739bd7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Aug 2012 16:17:59 -0400 Subject: take rlimit check to callers of expand_files() ... except for one in android, where the check is different and already done in caller. No need to recalculate rlimit many times in alloc_fd() either. Signed-off-by: Al Viro --- fs/fcntl.c | 3 +++ fs/file.c | 16 +++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index 887b5ba8c9b5..08e6af5c1b1f 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -64,6 +64,9 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) if (unlikely(oldfd == newfd)) return -EINVAL; + if (newfd >= rlimit(RLIMIT_NOFILE)) + return -EMFILE; + spin_lock(&files->file_lock); err = expand_files(files, newfd); file = fcheck(oldfd); diff --git a/fs/file.c b/fs/file.c index 5b46e9970a7a..08922af4a62c 100644 --- a/fs/file.c +++ b/fs/file.c @@ -251,13 +251,6 @@ int expand_files(struct files_struct *files, int nr) fdt = files_fdtable(files); - /* - * N.B. For clone tasks sharing a files structure, this test - * will limit the total number of files that can be opened. - */ - if (nr >= rlimit(RLIMIT_NOFILE)) - return -EMFILE; - /* Do we need to expand? */ if (nr < fdt->max_fds) return 0; @@ -431,6 +424,7 @@ int alloc_fd(unsigned start, unsigned flags) { struct files_struct *files = current->files; unsigned int fd; + unsigned end = rlimit(RLIMIT_NOFILE); int error; struct fdtable *fdt; @@ -444,6 +438,14 @@ repeat: if (fd < fdt->max_fds) fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); + /* + * N.B. For clone tasks sharing a files structure, this test + * will limit the total number of files that can be opened. + */ + error = -EMFILE; + if (fd >= end) + goto out; + error = expand_files(files, fd); if (error < 0) goto out; -- cgit v1.2.3 From dcfadfa4ec5a12404a99ad6426871a6b03a62b37 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Aug 2012 17:27:30 -0400 Subject: new helper: __alloc_fd() Essentially, alloc_fd() in a files_struct we own a reference to. Most of the time wanting to use it is a sign of lousy API design (such as android/binder). It's *not* a general-purpose interface; better that than open-coding its guts, but again, playing with other process' descriptor table is a sign of bad design. Signed-off-by: Al Viro --- drivers/staging/android/binder.c | 59 ++++------------------------------------ fs/file.c | 12 +++++--- include/linux/fdtable.h | 3 ++ 3 files changed, 16 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index b9a534c46aac..4946d282a35c 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -362,71 +362,22 @@ struct binder_transaction { static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); -/* - * copied from get_unused_fd_flags - */ int task_get_unused_fd_flags(struct binder_proc *proc, int flags) { struct files_struct *files = proc->files; - int fd, error; - struct fdtable *fdt; unsigned long rlim_cur; unsigned long irqs; if (files == NULL) return -ESRCH; - error = -EMFILE; - spin_lock(&files->file_lock); + if (!lock_task_sighand(proc->tsk, &irqs)) + return -EMFILE; -repeat: - fdt = files_fdtable(files); - fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, files->next_fd); - - /* - * N.B. For clone tasks sharing a files structure, this test - * will limit the total number of files that can be opened. - */ - rlim_cur = 0; - if (lock_task_sighand(proc->tsk, &irqs)) { - rlim_cur = proc->tsk->signal->rlim[RLIMIT_NOFILE].rlim_cur; - unlock_task_sighand(proc->tsk, &irqs); - } - if (fd >= rlim_cur) - goto out; - - /* Do we need to expand the fd array or fd set? */ - error = expand_files(files, fd); - if (error < 0) - goto out; - - if (error) { - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - error = -EMFILE; - goto repeat; - } - - __set_open_fd(fd, fdt); - if (flags & O_CLOEXEC) - __set_close_on_exec(fd, fdt); - else - __clear_close_on_exec(fd, fdt); - files->next_fd = fd + 1; -#if 1 - /* Sanity check */ - if (fdt->fd[fd] != NULL) { - pr_warn("get_unused_fd: slot %d not NULL!\n", fd); - fdt->fd[fd] = NULL; - } -#endif - error = fd; + rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); + unlock_task_sighand(proc->tsk, &irqs); -out: - spin_unlock(&files->file_lock); - return error; + return __alloc_fd(files, 0, rlim_cur, flags); } /* diff --git a/fs/file.c b/fs/file.c index 08922af4a62c..a3a0705f8f51 100644 --- a/fs/file.c +++ b/fs/file.c @@ -420,11 +420,10 @@ struct files_struct init_files = { /* * allocate a file descriptor, mark it busy. */ -int alloc_fd(unsigned start, unsigned flags) +int __alloc_fd(struct files_struct *files, + unsigned start, unsigned end, unsigned flags) { - struct files_struct *files = current->files; unsigned int fd; - unsigned end = rlimit(RLIMIT_NOFILE); int error; struct fdtable *fdt; @@ -479,8 +478,13 @@ out: return error; } +int alloc_fd(unsigned start, unsigned flags) +{ + return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); +} + int get_unused_fd_flags(unsigned flags) { - return alloc_fd(0, flags); + return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); } EXPORT_SYMBOL(get_unused_fd_flags); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 158a41eed314..b84ca064f727 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -125,6 +125,9 @@ void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, int *); +extern int __alloc_fd(struct files_struct *files, + unsigned start, unsigned end, unsigned flags); + extern struct kmem_cache *files_cachep; #endif /* __LINUX_FDTABLE_H */ -- cgit v1.2.3 From 7cf4dc3c8dbfdfde163d4636f621cf99a1f63bfb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 19:56:12 -0400 Subject: move files_struct-related bits from kernel/exit.c to fs/file.c Signed-off-by: Al Viro --- fs/file.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/fdtable.h | 6 --- kernel/exit.c | 93 -------------------------------------------- 3 files changed, 99 insertions(+), 100 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index a3a0705f8f51..0be423cadb26 100644 --- a/fs/file.c +++ b/fs/file.c @@ -84,7 +84,7 @@ static void free_fdtable_work(struct work_struct *work) } } -void free_fdtable_rcu(struct rcu_head *rcu) +static void free_fdtable_rcu(struct rcu_head *rcu) { struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); struct fdtable_defer *fddef; @@ -116,6 +116,11 @@ void free_fdtable_rcu(struct rcu_head *rcu) } } +static inline void free_fdtable(struct fdtable *fdt) +{ + call_rcu(&fdt->rcu, free_fdtable_rcu); +} + /* * Expand the fdset in the files_struct. Called with the files spinlock * held for write. @@ -388,6 +393,99 @@ out: return NULL; } +static void close_files(struct files_struct * files) +{ + int i, j; + struct fdtable *fdt; + + j = 0; + + /* + * It is safe to dereference the fd table without RCU or + * ->file_lock because this is the last reference to the + * files structure. But use RCU to shut RCU-lockdep up. + */ + rcu_read_lock(); + fdt = files_fdtable(files); + rcu_read_unlock(); + for (;;) { + unsigned long set; + i = j * BITS_PER_LONG; + if (i >= fdt->max_fds) + break; + set = fdt->open_fds[j++]; + while (set) { + if (set & 1) { + struct file * file = xchg(&fdt->fd[i], NULL); + if (file) { + filp_close(file, files); + cond_resched(); + } + } + i++; + set >>= 1; + } + } +} + +struct files_struct *get_files_struct(struct task_struct *task) +{ + struct files_struct *files; + + task_lock(task); + files = task->files; + if (files) + atomic_inc(&files->count); + task_unlock(task); + + return files; +} + +void put_files_struct(struct files_struct *files) +{ + struct fdtable *fdt; + + if (atomic_dec_and_test(&files->count)) { + close_files(files); + /* + * Free the fd and fdset arrays if we expanded them. + * If the fdtable was embedded, pass files for freeing + * at the end of the RCU grace period. Otherwise, + * you can free files immediately. + */ + rcu_read_lock(); + fdt = files_fdtable(files); + if (fdt != &files->fdtab) + kmem_cache_free(files_cachep, files); + free_fdtable(fdt); + rcu_read_unlock(); + } +} + +void reset_files_struct(struct files_struct *files) +{ + struct task_struct *tsk = current; + struct files_struct *old; + + old = tsk->files; + task_lock(tsk); + tsk->files = files; + task_unlock(tsk); + put_files_struct(old); +} + +void exit_files(struct task_struct *tsk) +{ + struct files_struct * files = tsk->files; + + if (files) { + task_lock(tsk); + tsk->files = NULL; + task_unlock(tsk); + put_files_struct(files); + } +} + static void __devinit fdtable_defer_list_init(int cpu) { struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index b84ca064f727..3855f4febe70 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -94,14 +94,8 @@ struct vfsmount; struct dentry; extern int expand_files(struct files_struct *, int nr); -extern void free_fdtable_rcu(struct rcu_head *rcu); extern void __init files_defer_init(void); -static inline void free_fdtable(struct fdtable *fdt) -{ - call_rcu(&fdt->rcu, free_fdtable_rcu); -} - static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) { struct file * file = NULL; diff --git a/kernel/exit.c b/kernel/exit.c index f65345f9e5bb..20dfc7617c2e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -466,99 +466,6 @@ void daemonize(const char *name, ...) EXPORT_SYMBOL(daemonize); -static void close_files(struct files_struct * files) -{ - int i, j; - struct fdtable *fdt; - - j = 0; - - /* - * It is safe to dereference the fd table without RCU or - * ->file_lock because this is the last reference to the - * files structure. But use RCU to shut RCU-lockdep up. - */ - rcu_read_lock(); - fdt = files_fdtable(files); - rcu_read_unlock(); - for (;;) { - unsigned long set; - i = j * BITS_PER_LONG; - if (i >= fdt->max_fds) - break; - set = fdt->open_fds[j++]; - while (set) { - if (set & 1) { - struct file * file = xchg(&fdt->fd[i], NULL); - if (file) { - filp_close(file, files); - cond_resched(); - } - } - i++; - set >>= 1; - } - } -} - -struct files_struct *get_files_struct(struct task_struct *task) -{ - struct files_struct *files; - - task_lock(task); - files = task->files; - if (files) - atomic_inc(&files->count); - task_unlock(task); - - return files; -} - -void put_files_struct(struct files_struct *files) -{ - struct fdtable *fdt; - - if (atomic_dec_and_test(&files->count)) { - close_files(files); - /* - * Free the fd and fdset arrays if we expanded them. - * If the fdtable was embedded, pass files for freeing - * at the end of the RCU grace period. Otherwise, - * you can free files immediately. - */ - rcu_read_lock(); - fdt = files_fdtable(files); - if (fdt != &files->fdtab) - kmem_cache_free(files_cachep, files); - free_fdtable(fdt); - rcu_read_unlock(); - } -} - -void reset_files_struct(struct files_struct *files) -{ - struct task_struct *tsk = current; - struct files_struct *old; - - old = tsk->files; - task_lock(tsk); - tsk->files = files; - task_unlock(tsk); - put_files_struct(old); -} - -void exit_files(struct task_struct *tsk) -{ - struct files_struct * files = tsk->files; - - if (files) { - task_lock(tsk); - tsk->files = NULL; - task_unlock(tsk); - put_files_struct(files); - } -} - #ifdef CONFIG_MM_OWNER /* * A task is exiting. If it owned this mm, find a new owner for the mm. -- cgit v1.2.3 From b9e02af0ae0783894abb576fbab45ec29aa8e7fc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 20:00:58 -0400 Subject: don't bother with call_rcu() in put_files_struct() At that point nobody can see us anyway; everything that looks at files_fdtable(files) is separated from the guts of put_files_struct(files) - either since files is current->files or because we fetched it under task_lock() and hadn't dropped that yet, or because we'd bumped files->count while holding task_lock()... Signed-off-by: Al Viro --- fs/file.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 0be423cadb26..533fa5d56a5f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -447,18 +447,14 @@ void put_files_struct(struct files_struct *files) if (atomic_dec_and_test(&files->count)) { close_files(files); - /* - * Free the fd and fdset arrays if we expanded them. - * If the fdtable was embedded, pass files for freeing - * at the end of the RCU grace period. Otherwise, - * you can free files immediately. - */ + /* not really needed, since nobody can see us */ rcu_read_lock(); fdt = files_fdtable(files); - if (fdt != &files->fdtab) - kmem_cache_free(files_cachep, files); - free_fdtable(fdt); rcu_read_unlock(); + /* free the arrays if they are not embedded */ + if (fdt != &files->fdtab) + __free_fdtable(fdt); + kmem_cache_free(files_cachep, files); } } -- cgit v1.2.3 From 1983e781da2f7f77906f4ccc2c3dc279cd61d1ff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 20:06:36 -0400 Subject: trim free_fdtable_rcu() embedded case isn't hit anymore Signed-off-by: Al Viro --- fs/file.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 533fa5d56a5f..4ce4a0fcf320 100644 --- a/fs/file.c +++ b/fs/file.c @@ -90,16 +90,8 @@ static void free_fdtable_rcu(struct rcu_head *rcu) struct fdtable_defer *fddef; BUG_ON(!fdt); + BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT); - if (fdt->max_fds <= NR_OPEN_DEFAULT) { - /* - * This fdtable is embedded in the files structure and that - * structure itself is getting destroyed. - */ - kmem_cache_free(files_cachep, - container_of(fdt, struct files_struct, fdtab)); - return; - } if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { kfree(fdt->fd); kfree(fdt->open_fds); @@ -116,11 +108,6 @@ static void free_fdtable_rcu(struct rcu_head *rcu) } } -static inline void free_fdtable(struct fdtable *fdt) -{ - call_rcu(&fdt->rcu, free_fdtable_rcu); -} - /* * Expand the fdset in the files_struct. Called with the files spinlock * held for write. @@ -234,7 +221,7 @@ static int expand_fdtable(struct files_struct *files, int nr) copy_fdtable(new_fdt, cur_fdt); rcu_assign_pointer(files->fdt, new_fdt); if (cur_fdt->max_fds > NR_OPEN_DEFAULT) - free_fdtable(cur_fdt); + call_rcu(&cur_fdt->rcu, free_fdtable_rcu); } else { /* Somebody else expanded, so undo our attempt */ __free_fdtable(new_fdt); -- cgit v1.2.3 From 56007cae94f349387c088e738c7dcb6bc513063b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 21:03:26 -0400 Subject: move put_unused_fd() and fd_install() to fs/file.c Signed-off-by: Al Viro --- fs/file.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ fs/open.c | 44 -------------------------------------------- 2 files changed, 44 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 4ce4a0fcf320..78cf88f2a0e8 100644 --- a/fs/file.c +++ b/fs/file.c @@ -569,3 +569,47 @@ int get_unused_fd_flags(unsigned flags) return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); } EXPORT_SYMBOL(get_unused_fd_flags); + +static void __put_unused_fd(struct files_struct *files, unsigned int fd) +{ + struct fdtable *fdt = files_fdtable(files); + __clear_open_fd(fd, fdt); + if (fd < files->next_fd) + files->next_fd = fd; +} + +void put_unused_fd(unsigned int fd) +{ + struct files_struct *files = current->files; + spin_lock(&files->file_lock); + __put_unused_fd(files, fd); + spin_unlock(&files->file_lock); +} + +EXPORT_SYMBOL(put_unused_fd); + +/* + * Install a file pointer in the fd array. + * + * The VFS is full of places where we drop the files lock between + * setting the open_fds bitmap and installing the file in the file + * array. At any such point, we are vulnerable to a dup2() race + * installing a file in the array before us. We need to detect this and + * fput() the struct file we are about to overwrite in this case. + * + * It should never happen - if we allow dup2() do it, _really_ bad things + * will follow. + */ + +void fd_install(unsigned int fd, struct file *file) +{ + struct files_struct *files = current->files; + struct fdtable *fdt; + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + BUG_ON(fdt->fd[fd] != NULL); + rcu_assign_pointer(fdt->fd[fd], file); + spin_unlock(&files->file_lock); +} + +EXPORT_SYMBOL(fd_install); diff --git a/fs/open.c b/fs/open.c index e1f2cdb91a4d..c525bd0e65b6 100644 --- a/fs/open.c +++ b/fs/open.c @@ -803,50 +803,6 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); -static void __put_unused_fd(struct files_struct *files, unsigned int fd) -{ - struct fdtable *fdt = files_fdtable(files); - __clear_open_fd(fd, fdt); - if (fd < files->next_fd) - files->next_fd = fd; -} - -void put_unused_fd(unsigned int fd) -{ - struct files_struct *files = current->files; - spin_lock(&files->file_lock); - __put_unused_fd(files, fd); - spin_unlock(&files->file_lock); -} - -EXPORT_SYMBOL(put_unused_fd); - -/* - * Install a file pointer in the fd array. - * - * The VFS is full of places where we drop the files lock between - * setting the open_fds bitmap and installing the file in the file - * array. At any such point, we are vulnerable to a dup2() race - * installing a file in the array before us. We need to detect this and - * fput() the struct file we are about to overwrite in this case. - * - * It should never happen - if we allow dup2() do it, _really_ bad things - * will follow. - */ - -void fd_install(unsigned int fd, struct file *file) -{ - struct files_struct *files = current->files; - struct fdtable *fdt; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - BUG_ON(fdt->fd[fd] != NULL); - rcu_assign_pointer(fdt->fd[fd], file); - spin_unlock(&files->file_lock); -} - -EXPORT_SYMBOL(fd_install); - static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) { int lookup_flags = 0; -- cgit v1.2.3 From f869e8a7f753e3fd43d6483e796774776f645edb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 21:06:33 -0400 Subject: expose a low-level variant of fd_install() for binder Similar situation to that of __alloc_fd(); do not use unless you really have to. You should not touch any descriptor table other than your own; it's a sure sign of a really bad API design. As with __alloc_fd(), you *must* use a first-class reference to struct files_struct; something obtained by get_files_struct(some task) (let alone direct task->files) will not do. It must be either current->files, or obtained by get_files_struct(current) by the owner of that sucker and given to you. Signed-off-by: Al Viro --- drivers/staging/android/binder.c | 13 ++----------- fs/file.c | 16 ++++++++++++++-- include/linux/fdtable.h | 2 ++ 3 files changed, 18 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 4946d282a35c..9e1a98a360d4 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -386,17 +386,8 @@ int task_get_unused_fd_flags(struct binder_proc *proc, int flags) static void task_fd_install( struct binder_proc *proc, unsigned int fd, struct file *file) { - struct files_struct *files = proc->files; - struct fdtable *fdt; - - if (files == NULL) - return; - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - BUG_ON(fdt->fd[fd] != NULL); - rcu_assign_pointer(fdt->fd[fd], file); - spin_unlock(&files->file_lock); + if (proc->files) + __fd_install(proc->files, fd, file); } /* diff --git a/fs/file.c b/fs/file.c index 78cf88f2a0e8..0d1bf0515111 100644 --- a/fs/file.c +++ b/fs/file.c @@ -599,11 +599,18 @@ EXPORT_SYMBOL(put_unused_fd); * * It should never happen - if we allow dup2() do it, _really_ bad things * will follow. + * + * NOTE: __fd_install() variant is really, really low-level; don't + * use it unless you are forced to by truly lousy API shoved down + * your throat. 'files' *MUST* be either current->files or obtained + * by get_files_struct(current) done by whoever had given it to you, + * or really bad things will happen. Normally you want to use + * fd_install() instead. */ -void fd_install(unsigned int fd, struct file *file) +void __fd_install(struct files_struct *files, unsigned int fd, + struct file *file) { - struct files_struct *files = current->files; struct fdtable *fdt; spin_lock(&files->file_lock); fdt = files_fdtable(files); @@ -612,4 +619,9 @@ void fd_install(unsigned int fd, struct file *file) spin_unlock(&files->file_lock); } +void fd_install(unsigned int fd, struct file *file) +{ + __fd_install(current->files, fd, file); +} + EXPORT_SYMBOL(fd_install); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 3855f4febe70..59d4fc7f10c8 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -121,6 +121,8 @@ struct files_struct *dup_fd(struct files_struct *, int *); extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); +extern void __fd_install(struct files_struct *files, + unsigned int fd, struct file *file); extern struct kmem_cache *files_cachep; -- cgit v1.2.3 From 0ee8cdfe6af052deb56dccd54838a1eb32fb4ca2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 21:12:10 -0400 Subject: take fget() and friends to fs/file.c Signed-off-by: Al Viro --- fs/file.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/file_table.c | 106 -------------------------------------------------------- 2 files changed, 106 insertions(+), 106 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 0d1bf0515111..6eef55ce30c9 100644 --- a/fs/file.c +++ b/fs/file.c @@ -625,3 +625,109 @@ void fd_install(unsigned int fd, struct file *file) } EXPORT_SYMBOL(fd_install); + +struct file *fget(unsigned int fd) +{ + struct file *file; + struct files_struct *files = current->files; + + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + /* File object ref couldn't be taken */ + if (file->f_mode & FMODE_PATH || + !atomic_long_inc_not_zero(&file->f_count)) + file = NULL; + } + rcu_read_unlock(); + + return file; +} + +EXPORT_SYMBOL(fget); + +struct file *fget_raw(unsigned int fd) +{ + struct file *file; + struct files_struct *files = current->files; + + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + /* File object ref couldn't be taken */ + if (!atomic_long_inc_not_zero(&file->f_count)) + file = NULL; + } + rcu_read_unlock(); + + return file; +} + +EXPORT_SYMBOL(fget_raw); + +/* + * Lightweight file lookup - no refcnt increment if fd table isn't shared. + * + * You can use this instead of fget if you satisfy all of the following + * conditions: + * 1) You must call fput_light before exiting the syscall and returning control + * to userspace (i.e. you cannot remember the returned struct file * after + * returning to userspace). + * 2) You must not call filp_close on the returned struct file * in between + * calls to fget_light and fput_light. + * 3) You must not clone the current task in between the calls to fget_light + * and fput_light. + * + * The fput_needed flag returned by fget_light should be passed to the + * corresponding fput_light. + */ +struct file *fget_light(unsigned int fd, int *fput_needed) +{ + struct file *file; + struct files_struct *files = current->files; + + *fput_needed = 0; + if (atomic_read(&files->count) == 1) { + file = fcheck_files(files, fd); + if (file && (file->f_mode & FMODE_PATH)) + file = NULL; + } else { + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + if (!(file->f_mode & FMODE_PATH) && + atomic_long_inc_not_zero(&file->f_count)) + *fput_needed = 1; + else + /* Didn't get the reference, someone's freed */ + file = NULL; + } + rcu_read_unlock(); + } + + return file; +} + +struct file *fget_raw_light(unsigned int fd, int *fput_needed) +{ + struct file *file; + struct files_struct *files = current->files; + + *fput_needed = 0; + if (atomic_read(&files->count) == 1) { + file = fcheck_files(files, fd); + } else { + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + if (atomic_long_inc_not_zero(&file->f_count)) + *fput_needed = 1; + else + /* Didn't get the reference, someone's freed */ + file = NULL; + } + rcu_read_unlock(); + } + + return file; +} diff --git a/fs/file_table.c b/fs/file_table.c index 701985e4ccda..c6780163bf3e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -339,112 +339,6 @@ void __fput_sync(struct file *file) EXPORT_SYMBOL(fput); -struct file *fget(unsigned int fd) -{ - struct file *file; - struct files_struct *files = current->files; - - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - /* File object ref couldn't be taken */ - if (file->f_mode & FMODE_PATH || - !atomic_long_inc_not_zero(&file->f_count)) - file = NULL; - } - rcu_read_unlock(); - - return file; -} - -EXPORT_SYMBOL(fget); - -struct file *fget_raw(unsigned int fd) -{ - struct file *file; - struct files_struct *files = current->files; - - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - /* File object ref couldn't be taken */ - if (!atomic_long_inc_not_zero(&file->f_count)) - file = NULL; - } - rcu_read_unlock(); - - return file; -} - -EXPORT_SYMBOL(fget_raw); - -/* - * Lightweight file lookup - no refcnt increment if fd table isn't shared. - * - * You can use this instead of fget if you satisfy all of the following - * conditions: - * 1) You must call fput_light before exiting the syscall and returning control - * to userspace (i.e. you cannot remember the returned struct file * after - * returning to userspace). - * 2) You must not call filp_close on the returned struct file * in between - * calls to fget_light and fput_light. - * 3) You must not clone the current task in between the calls to fget_light - * and fput_light. - * - * The fput_needed flag returned by fget_light should be passed to the - * corresponding fput_light. - */ -struct file *fget_light(unsigned int fd, int *fput_needed) -{ - struct file *file; - struct files_struct *files = current->files; - - *fput_needed = 0; - if (atomic_read(&files->count) == 1) { - file = fcheck_files(files, fd); - if (file && (file->f_mode & FMODE_PATH)) - file = NULL; - } else { - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - if (!(file->f_mode & FMODE_PATH) && - atomic_long_inc_not_zero(&file->f_count)) - *fput_needed = 1; - else - /* Didn't get the reference, someone's freed */ - file = NULL; - } - rcu_read_unlock(); - } - - return file; -} - -struct file *fget_raw_light(unsigned int fd, int *fput_needed) -{ - struct file *file; - struct files_struct *files = current->files; - - *fput_needed = 0; - if (atomic_read(&files->count) == 1) { - file = fcheck_files(files, fd); - } else { - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - if (atomic_long_inc_not_zero(&file->f_count)) - *fput_needed = 1; - else - /* Didn't get the reference, someone's freed */ - file = NULL; - } - rcu_read_unlock(); - } - - return file; -} - void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { -- cgit v1.2.3 From 483ce1d4b8c3b82bc9c9a1dd9dbc44f50b3aaf5a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Aug 2012 12:04:24 -0400 Subject: take descriptor-related part of close() to file.c Signed-off-by: Al Viro --- drivers/staging/android/binder.c | 34 ++-------------------------------- fs/file.c | 26 ++++++++++++++++++++++++++ fs/open.c | 22 +--------------------- include/linux/fdtable.h | 2 ++ 4 files changed, 31 insertions(+), 53 deletions(-) (limited to 'fs') diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 9e1a98a360d4..f71d624995ea 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -390,43 +390,17 @@ static void task_fd_install( __fd_install(proc->files, fd, file); } -/* - * copied from __put_unused_fd in open.c - */ -static void __put_unused_fd(struct files_struct *files, unsigned int fd) -{ - struct fdtable *fdt = files_fdtable(files); - __clear_open_fd(fd, fdt); - if (fd < files->next_fd) - files->next_fd = fd; -} - /* * copied from sys_close */ static long task_close_fd(struct binder_proc *proc, unsigned int fd) { - struct file *filp; - struct files_struct *files = proc->files; - struct fdtable *fdt; int retval; - if (files == NULL) + if (proc->files == NULL) return -ESRCH; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - if (fd >= fdt->max_fds) - goto out_unlock; - filp = fdt->fd[fd]; - if (!filp) - goto out_unlock; - rcu_assign_pointer(fdt->fd[fd], NULL); - __clear_close_on_exec(fd, fdt); - __put_unused_fd(files, fd); - spin_unlock(&files->file_lock); - retval = filp_close(filp, files); - + retval = __close_fd(proc->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || retval == -ERESTARTNOINTR || @@ -435,10 +409,6 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) retval = -EINTR; return retval; - -out_unlock: - spin_unlock(&files->file_lock); - return -EBADF; } static void binder_set_nice(long nice) diff --git a/fs/file.c b/fs/file.c index 6eef55ce30c9..fd4694e688ab 100644 --- a/fs/file.c +++ b/fs/file.c @@ -626,6 +626,32 @@ void fd_install(unsigned int fd, struct file *file) EXPORT_SYMBOL(fd_install); +/* + * The same warnings as for __alloc_fd()/__fd_install() apply here... + */ +int __close_fd(struct files_struct *files, unsigned fd) +{ + struct file *file; + struct fdtable *fdt; + + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + if (fd >= fdt->max_fds) + goto out_unlock; + file = fdt->fd[fd]; + if (!file) + goto out_unlock; + rcu_assign_pointer(fdt->fd[fd], NULL); + __clear_close_on_exec(fd, fdt); + __put_unused_fd(files, fd); + spin_unlock(&files->file_lock); + return filp_close(file, files); + +out_unlock: + spin_unlock(&files->file_lock); + return -EBADF; +} + struct file *fget(unsigned int fd) { struct file *file; diff --git a/fs/open.c b/fs/open.c index c525bd0e65b6..30760017deed 100644 --- a/fs/open.c +++ b/fs/open.c @@ -994,23 +994,7 @@ EXPORT_SYMBOL(filp_close); */ SYSCALL_DEFINE1(close, unsigned int, fd) { - struct file * filp; - struct files_struct *files = current->files; - struct fdtable *fdt; - int retval; - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - if (fd >= fdt->max_fds) - goto out_unlock; - filp = fdt->fd[fd]; - if (!filp) - goto out_unlock; - rcu_assign_pointer(fdt->fd[fd], NULL); - __clear_close_on_exec(fd, fdt); - __put_unused_fd(files, fd); - spin_unlock(&files->file_lock); - retval = filp_close(filp, files); + int retval = __close_fd(current->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || @@ -1020,10 +1004,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd) retval = -EINTR; return retval; - -out_unlock: - spin_unlock(&files->file_lock); - return -EBADF; } EXPORT_SYMBOL(sys_close); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 59d4fc7f10c8..59488f2392bc 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -123,6 +123,8 @@ extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); extern void __fd_install(struct files_struct *files, unsigned int fd, struct file *file); +extern int __close_fd(struct files_struct *files, + unsigned int fd); extern struct kmem_cache *files_cachep; -- cgit v1.2.3 From 63fedaf1c2dd546da22ff5b4ae06e9b4efba5146 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 26 Sep 2012 21:09:06 -0400 Subject: ext4: remove unused function ext4_ext_check_cache Remove unused function ext4_ext_check_cache() and merge the code back to the ext4_ext_in_cache(). Reviewed-by: Carlos Maiolino Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 48 +++++++++--------------------------------------- 1 file changed, 9 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a510917c175a..cbcc6b3f2ae0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2136,13 +2136,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, } /* - * ext4_ext_check_cache() + * ext4_ext_in_cache() * Checks to see if the given block is in the cache. * If it is, the cached extent is stored in the given - * cache extent pointer. If the cached extent is a hole, - * this routine should be used instead of - * ext4_ext_in_cache if the calling function needs to - * know the size of the hole. + * cache extent pointer. * * @inode: The files inode * @block: The block to look for in the cache @@ -2151,8 +2148,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, * * Return 0 if cache is invalid; 1 if the cache is valid */ -static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, - struct ext4_ext_cache *ex){ +static int +ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, + struct ext4_extent *ex) +{ struct ext4_ext_cache *cex; struct ext4_sb_info *sbi; int ret = 0; @@ -2169,7 +2168,9 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, goto errout; if (in_range(block, cex->ec_block, cex->ec_len)) { - memcpy(ex, cex, sizeof(struct ext4_ext_cache)); + ex->ee_block = cpu_to_le32(cex->ec_block); + ext4_ext_store_pblock(ex, cex->ec_start); + ex->ee_len = cpu_to_le16(cex->ec_len); ext_debug("%u cached by %u:%u:%llu\n", block, cex->ec_block, cex->ec_len, cex->ec_start); @@ -2181,37 +2182,6 @@ errout: return ret; } -/* - * ext4_ext_in_cache() - * Checks to see if the given block is in the cache. - * If it is, the cached extent is stored in the given - * extent pointer. - * - * @inode: The files inode - * @block: The block to look for in the cache - * @ex: Pointer where the cached extent will be stored - * if it contains block - * - * Return 0 if cache is invalid; 1 if the cache is valid - */ -static int -ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, - struct ext4_extent *ex) -{ - struct ext4_ext_cache cex; - int ret = 0; - - if (ext4_ext_check_cache(inode, block, &cex)) { - ex->ee_block = cpu_to_le32(cex.ec_block); - ext4_ext_store_pblock(ex, cex.ec_start); - ex->ee_len = cpu_to_le16(cex.ec_len); - ret = 1; - } - - return ret; -} - - /* * ext4_ext_rm_idx: * removes index from the index block. -- cgit v1.2.3 From 6a6d27de340c89c5323565b49f7851362619925d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 09:56:33 -0400 Subject: take close-on-exec logics to fs/file.c, clean it up a bit ... and add cond_resched() there, while we are at it. We can get large latencies as is... Signed-off-by: Al Viro --- fs/exec.c | 41 ++++++----------------------------------- fs/file.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/fdtable.h | 1 + 3 files changed, 44 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 574cf4de4ec3..f2b6af585d4a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1006,40 +1006,6 @@ no_thread_group: return 0; } -/* - * These functions flushes out all traces of the currently running executable - * so that a new one can be started - */ -static void flush_old_files(struct files_struct * files) -{ - long j = -1; - struct fdtable *fdt; - - spin_lock(&files->file_lock); - for (;;) { - unsigned long set, i; - - j++; - i = j * BITS_PER_LONG; - fdt = files_fdtable(files); - if (i >= fdt->max_fds) - break; - set = fdt->close_on_exec[j]; - if (!set) - continue; - fdt->close_on_exec[j] = 0; - spin_unlock(&files->file_lock); - for ( ; set ; i++,set >>= 1) { - if (set & 1) { - sys_close(i); - } - } - spin_lock(&files->file_lock); - - } - spin_unlock(&files->file_lock); -} - char *get_task_comm(char *buf, struct task_struct *tsk) { /* buf must be at least sizeof(tsk->comm) in size */ @@ -1050,6 +1016,11 @@ char *get_task_comm(char *buf, struct task_struct *tsk) } EXPORT_SYMBOL_GPL(get_task_comm); +/* + * These functions flushes out all traces of the currently running executable + * so that a new one can be started + */ + void set_task_comm(struct task_struct *tsk, char *buf) { task_lock(tsk); @@ -1171,7 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm) current->self_exec_id++; flush_signal_handlers(current, 0); - flush_old_files(current->files); + do_close_on_exec(current->files); } EXPORT_SYMBOL(setup_new_exec); diff --git a/fs/file.c b/fs/file.c index fd4694e688ab..92197dd9fdc8 100644 --- a/fs/file.c +++ b/fs/file.c @@ -652,6 +652,43 @@ out_unlock: return -EBADF; } +void do_close_on_exec(struct files_struct *files) +{ + unsigned i; + struct fdtable *fdt; + + /* exec unshares first */ + BUG_ON(atomic_read(&files->count) != 1); + spin_lock(&files->file_lock); + for (i = 0; ; i++) { + unsigned long set; + unsigned fd = i * BITS_PER_LONG; + fdt = files_fdtable(files); + if (fd >= fdt->max_fds) + break; + set = fdt->close_on_exec[i]; + if (!set) + continue; + fdt->close_on_exec[i] = 0; + for ( ; set ; fd++, set >>= 1) { + struct file *file; + if (!(set & 1)) + continue; + file = fdt->fd[fd]; + if (!file) + continue; + rcu_assign_pointer(fdt->fd[fd], NULL); + __put_unused_fd(files, fd); + spin_unlock(&files->file_lock); + filp_close(file, files); + cond_resched(); + spin_lock(&files->file_lock); + } + + } + spin_unlock(&files->file_lock); +} + struct file *fget(unsigned int fd) { struct file *file; diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 59488f2392bc..ef4b2137e6bc 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -118,6 +118,7 @@ void put_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, int *); +void do_close_on_exec(struct files_struct *); extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); -- cgit v1.2.3 From fe17f22d7fd0e344ef6447238f799bb49f670c6f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 11:48:11 -0400 Subject: take purely descriptor-related stuff from fcntl.c to file.c Signed-off-by: Al Viro --- fs/fcntl.c | 131 ++------------------------------------------------ fs/file.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/file.h | 2 + 3 files changed, 137 insertions(+), 128 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index 08e6af5c1b1f..40a5bfb72cca 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -26,127 +26,6 @@ #include #include -void set_close_on_exec(unsigned int fd, int flag) -{ - struct files_struct *files = current->files; - struct fdtable *fdt; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - if (flag) - __set_close_on_exec(fd, fdt); - else - __clear_close_on_exec(fd, fdt); - spin_unlock(&files->file_lock); -} - -static bool get_close_on_exec(unsigned int fd) -{ - struct files_struct *files = current->files; - struct fdtable *fdt; - bool res; - rcu_read_lock(); - fdt = files_fdtable(files); - res = close_on_exec(fd, fdt); - rcu_read_unlock(); - return res; -} - -SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) -{ - int err = -EBADF; - struct file * file, *tofree; - struct files_struct * files = current->files; - struct fdtable *fdt; - - if ((flags & ~O_CLOEXEC) != 0) - return -EINVAL; - - if (unlikely(oldfd == newfd)) - return -EINVAL; - - if (newfd >= rlimit(RLIMIT_NOFILE)) - return -EMFILE; - - spin_lock(&files->file_lock); - err = expand_files(files, newfd); - file = fcheck(oldfd); - if (unlikely(!file)) - goto Ebadf; - if (unlikely(err < 0)) { - if (err == -EMFILE) - goto Ebadf; - goto out_unlock; - } - /* - * We need to detect attempts to do dup2() over allocated but still - * not finished descriptor. NB: OpenBSD avoids that at the price of - * extra work in their equivalent of fget() - they insert struct - * file immediately after grabbing descriptor, mark it larval if - * more work (e.g. actual opening) is needed and make sure that - * fget() treats larval files as absent. Potentially interesting, - * but while extra work in fget() is trivial, locking implications - * and amount of surgery on open()-related paths in VFS are not. - * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" - * deadlocks in rather amusing ways, AFAICS. All of that is out of - * scope of POSIX or SUS, since neither considers shared descriptor - * tables and this condition does not arise without those. - */ - err = -EBUSY; - fdt = files_fdtable(files); - tofree = fdt->fd[newfd]; - if (!tofree && fd_is_open(newfd, fdt)) - goto out_unlock; - get_file(file); - rcu_assign_pointer(fdt->fd[newfd], file); - __set_open_fd(newfd, fdt); - if (flags & O_CLOEXEC) - __set_close_on_exec(newfd, fdt); - else - __clear_close_on_exec(newfd, fdt); - spin_unlock(&files->file_lock); - - if (tofree) - filp_close(tofree, files); - - return newfd; - -Ebadf: - err = -EBADF; -out_unlock: - spin_unlock(&files->file_lock); - return err; -} - -SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) -{ - if (unlikely(newfd == oldfd)) { /* corner case */ - struct files_struct *files = current->files; - int retval = oldfd; - - rcu_read_lock(); - if (!fcheck_files(files, oldfd)) - retval = -EBADF; - rcu_read_unlock(); - return retval; - } - return sys_dup3(oldfd, newfd, 0); -} - -SYSCALL_DEFINE1(dup, unsigned int, fildes) -{ - int ret = -EBADF; - struct file *file = fget_raw(fildes); - - if (file) { - ret = get_unused_fd(); - if (ret >= 0) - fd_install(ret, file); - else - fput(file); - } - return ret; -} - #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) static int setfl(int fd, struct file * filp, unsigned long arg) @@ -376,14 +255,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, switch (cmd) { case F_DUPFD: + err = f_dupfd(arg, filp, 0); + break; case F_DUPFD_CLOEXEC: - if (arg >= rlimit(RLIMIT_NOFILE)) - break; - err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); - if (err >= 0) { - get_file(filp); - fd_install(err, filp); - } + err = f_dupfd(arg, filp, FD_CLOEXEC); break; case F_GETFD: err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; diff --git a/fs/file.c b/fs/file.c index 92197dd9fdc8..7f29544755d0 100644 --- a/fs/file.c +++ b/fs/file.c @@ -6,6 +6,7 @@ * Manage the dynamic fd arrays in the process files_struct. */ +#include #include #include #include @@ -794,3 +795,134 @@ struct file *fget_raw_light(unsigned int fd, int *fput_needed) return file; } + +void set_close_on_exec(unsigned int fd, int flag) +{ + struct files_struct *files = current->files; + struct fdtable *fdt; + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + if (flag) + __set_close_on_exec(fd, fdt); + else + __clear_close_on_exec(fd, fdt); + spin_unlock(&files->file_lock); +} + +bool get_close_on_exec(unsigned int fd) +{ + struct files_struct *files = current->files; + struct fdtable *fdt; + bool res; + rcu_read_lock(); + fdt = files_fdtable(files); + res = close_on_exec(fd, fdt); + rcu_read_unlock(); + return res; +} + +SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) +{ + int err = -EBADF; + struct file * file, *tofree; + struct files_struct * files = current->files; + struct fdtable *fdt; + + if ((flags & ~O_CLOEXEC) != 0) + return -EINVAL; + + if (newfd >= rlimit(RLIMIT_NOFILE)) + return -EMFILE; + + spin_lock(&files->file_lock); + err = expand_files(files, newfd); + file = fcheck(oldfd); + if (unlikely(!file)) + goto Ebadf; + if (unlikely(err < 0)) { + if (err == -EMFILE) + goto Ebadf; + goto out_unlock; + } + /* + * We need to detect attempts to do dup2() over allocated but still + * not finished descriptor. NB: OpenBSD avoids that at the price of + * extra work in their equivalent of fget() - they insert struct + * file immediately after grabbing descriptor, mark it larval if + * more work (e.g. actual opening) is needed and make sure that + * fget() treats larval files as absent. Potentially interesting, + * but while extra work in fget() is trivial, locking implications + * and amount of surgery on open()-related paths in VFS are not. + * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" + * deadlocks in rather amusing ways, AFAICS. All of that is out of + * scope of POSIX or SUS, since neither considers shared descriptor + * tables and this condition does not arise without those. + */ + err = -EBUSY; + fdt = files_fdtable(files); + tofree = fdt->fd[newfd]; + if (!tofree && fd_is_open(newfd, fdt)) + goto out_unlock; + get_file(file); + rcu_assign_pointer(fdt->fd[newfd], file); + __set_open_fd(newfd, fdt); + if (flags & O_CLOEXEC) + __set_close_on_exec(newfd, fdt); + else + __clear_close_on_exec(newfd, fdt); + spin_unlock(&files->file_lock); + + if (tofree) + filp_close(tofree, files); + + return newfd; + +Ebadf: + err = -EBADF; +out_unlock: + spin_unlock(&files->file_lock); + return err; +} + +SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) +{ + if (unlikely(newfd == oldfd)) { /* corner case */ + struct files_struct *files = current->files; + int retval = oldfd; + + rcu_read_lock(); + if (!fcheck_files(files, oldfd)) + retval = -EBADF; + rcu_read_unlock(); + return retval; + } + return sys_dup3(oldfd, newfd, 0); +} + +SYSCALL_DEFINE1(dup, unsigned int, fildes) +{ + int ret = -EBADF; + struct file *file = fget_raw(fildes); + + if (file) { + ret = get_unused_fd(); + if (ret >= 0) + fd_install(ret, file); + else + fput(file); + } + return ret; +} + +int f_dupfd(unsigned int from, struct file *file, unsigned flags) +{ + int err; + if (from >= rlimit(RLIMIT_NOFILE)) + return -EINVAL; + err = alloc_fd(from, flags); + if (err >= 0) { + get_file(file); + fd_install(err, file); + } + return err; +} diff --git a/include/linux/file.h b/include/linux/file.h index 86795ec6027d..da84fa0f4579 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -30,7 +30,9 @@ extern struct file *fget(unsigned int fd); extern struct file *fget_light(unsigned int fd, int *fput_needed); extern struct file *fget_raw(unsigned int fd); extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); +extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); +extern bool get_close_on_exec(unsigned int fd); extern void put_filp(struct file *); extern int alloc_fd(unsigned start, unsigned flags); extern int get_unused_fd_flags(unsigned flags); -- cgit v1.2.3 From 8280d16172243702ed43432f826ca6130edb4086 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 12:11:46 -0400 Subject: new helper: replace_fd() analog of dup2(), except that it takes struct file * as source. Signed-off-by: Al Viro --- fs/exec.c | 11 +------ fs/file.c | 91 +++++++++++++++++++++++++++++++++++----------------- include/linux/file.h | 1 + 3 files changed, 64 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index f2b6af585d4a..3fc74681cc6c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2041,23 +2041,14 @@ static void wait_for_dump_helpers(struct file *file) static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) { struct file *files[2]; - struct fdtable *fdt; struct coredump_params *cp = (struct coredump_params *)info->data; - struct files_struct *cf = current->files; int err = create_pipe_files(files, 0); if (err) return err; cp->file = files[1]; - sys_close(0); - fd_install(0, files[0]); - spin_lock(&cf->file_lock); - fdt = files_fdtable(cf); - __set_open_fd(0, fdt); - __clear_close_on_exec(0, fdt); - spin_unlock(&cf->file_lock); - + replace_fd(0, files[0], 0); /* and disallow core files too */ current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; diff --git a/fs/file.c b/fs/file.c index 7f29544755d0..a7bbe0324478 100644 --- a/fs/file.c +++ b/fs/file.c @@ -821,29 +821,12 @@ bool get_close_on_exec(unsigned int fd) return res; } -SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) +static int do_dup2(struct files_struct *files, + struct file *file, unsigned fd, unsigned flags) { - int err = -EBADF; - struct file * file, *tofree; - struct files_struct * files = current->files; + struct file *tofree; struct fdtable *fdt; - if ((flags & ~O_CLOEXEC) != 0) - return -EINVAL; - - if (newfd >= rlimit(RLIMIT_NOFILE)) - return -EMFILE; - - spin_lock(&files->file_lock); - err = expand_files(files, newfd); - file = fcheck(oldfd); - if (unlikely(!file)) - goto Ebadf; - if (unlikely(err < 0)) { - if (err == -EMFILE) - goto Ebadf; - goto out_unlock; - } /* * We need to detect attempts to do dup2() over allocated but still * not finished descriptor. NB: OpenBSD avoids that at the price of @@ -858,24 +841,74 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) * scope of POSIX or SUS, since neither considers shared descriptor * tables and this condition does not arise without those. */ - err = -EBUSY; fdt = files_fdtable(files); - tofree = fdt->fd[newfd]; - if (!tofree && fd_is_open(newfd, fdt)) - goto out_unlock; + tofree = fdt->fd[fd]; + if (!tofree && fd_is_open(fd, fdt)) + goto Ebusy; get_file(file); - rcu_assign_pointer(fdt->fd[newfd], file); - __set_open_fd(newfd, fdt); + rcu_assign_pointer(fdt->fd[fd], file); + __set_open_fd(fd, fdt); if (flags & O_CLOEXEC) - __set_close_on_exec(newfd, fdt); + __set_close_on_exec(fd, fdt); else - __clear_close_on_exec(newfd, fdt); + __clear_close_on_exec(fd, fdt); spin_unlock(&files->file_lock); if (tofree) filp_close(tofree, files); - return newfd; + return fd; + +Ebusy: + spin_unlock(&files->file_lock); + return -EBUSY; +} + +int replace_fd(unsigned fd, struct file *file, unsigned flags) +{ + int err; + struct files_struct *files = current->files; + + if (!file) + return __close_fd(files, fd); + + if (fd >= rlimit(RLIMIT_NOFILE)) + return -EMFILE; + + spin_lock(&files->file_lock); + err = expand_files(files, fd); + if (unlikely(err < 0)) + goto out_unlock; + return do_dup2(files, file, fd, flags); + +out_unlock: + spin_unlock(&files->file_lock); + return err; +} + +SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) +{ + int err = -EBADF; + struct file *file; + struct files_struct *files = current->files; + + if ((flags & ~O_CLOEXEC) != 0) + return -EINVAL; + + if (newfd >= rlimit(RLIMIT_NOFILE)) + return -EMFILE; + + spin_lock(&files->file_lock); + err = expand_files(files, newfd); + file = fcheck(oldfd); + if (unlikely(!file)) + goto Ebadf; + if (unlikely(err < 0)) { + if (err == -EMFILE) + goto Ebadf; + goto out_unlock; + } + return do_dup2(files, file, newfd, flags); Ebadf: err = -EBADF; diff --git a/include/linux/file.h b/include/linux/file.h index da84fa0f4579..6239591a6122 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -31,6 +31,7 @@ extern struct file *fget_light(unsigned int fd, int *fput_needed); extern struct file *fget_raw(unsigned int fd); extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); +extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); extern bool get_close_on_exec(unsigned int fd); extern void put_filp(struct file *); -- cgit v1.2.3 From b8318b01a8f7f760ae3ecae052ccc7fc123d9508 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 20:09:42 -0400 Subject: take __{set,clear}_{open_fd,close_on_exec}() into fs/file.c nobody uses those outside anymore. Signed-off-by: Al Viro --- fs/file.c | 20 ++++++++++++++++++++ include/linux/fdtable.h | 20 -------------------- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index a7bbe0324478..40ddef9fe041 100644 --- a/fs/file.c +++ b/fs/file.c @@ -256,6 +256,26 @@ int expand_files(struct files_struct *files, int nr) return expand_fdtable(files, nr); } +static inline void __set_close_on_exec(int fd, struct fdtable *fdt) +{ + __set_bit(fd, fdt->close_on_exec); +} + +static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) +{ + __clear_bit(fd, fdt->close_on_exec); +} + +static inline void __set_open_fd(int fd, struct fdtable *fdt) +{ + __set_bit(fd, fdt->open_fds); +} + +static inline void __clear_open_fd(int fd, struct fdtable *fdt) +{ + __clear_bit(fd, fdt->open_fds); +} + static int count_open_files(struct fdtable *fdt) { int size = fdt->max_fds; diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index ef4b2137e6bc..9ff26319d44f 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -30,31 +30,11 @@ struct fdtable { struct fdtable *next; }; -static inline void __set_close_on_exec(int fd, struct fdtable *fdt) -{ - __set_bit(fd, fdt->close_on_exec); -} - -static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) -{ - __clear_bit(fd, fdt->close_on_exec); -} - static inline bool close_on_exec(int fd, const struct fdtable *fdt) { return test_bit(fd, fdt->close_on_exec); } -static inline void __set_open_fd(int fd, struct fdtable *fdt) -{ - __set_bit(fd, fdt->open_fds); -} - -static inline void __clear_open_fd(int fd, struct fdtable *fdt) -{ - __clear_bit(fd, fdt->open_fds); -} - static inline bool fd_is_open(int fd, const struct fdtable *fdt) { return test_bit(fd, fdt->open_fds); -- cgit v1.2.3 From ad47bd7252bf402fe7dba92f5240b5ed16832ae7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 20:11:34 -0400 Subject: make expand_files() and alloc_fd() static no callers outside of fs/file.c left Signed-off-by: Al Viro --- fs/file.c | 4 ++-- include/linux/fdtable.h | 1 - include/linux/file.h | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 40ddef9fe041..967bd0dadbe5 100644 --- a/fs/file.c +++ b/fs/file.c @@ -238,7 +238,7 @@ static int expand_fdtable(struct files_struct *files, int nr) * expanded and execution may have blocked. * The files->file_lock should be held on entry, and will be held on exit. */ -int expand_files(struct files_struct *files, int nr) +static int expand_files(struct files_struct *files, int nr) { struct fdtable *fdt; @@ -580,7 +580,7 @@ out: return error; } -int alloc_fd(unsigned start, unsigned flags) +static int alloc_fd(unsigned start, unsigned flags) { return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); } diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 9ff26319d44f..de2b71caa0f0 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -73,7 +73,6 @@ struct file_operations; struct vfsmount; struct dentry; -extern int expand_files(struct files_struct *, int nr); extern void __init files_defer_init(void); static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) diff --git a/include/linux/file.h b/include/linux/file.h index 6239591a6122..6eee54aea279 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -35,7 +35,6 @@ extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); extern bool get_close_on_exec(unsigned int fd); extern void put_filp(struct file *); -extern int alloc_fd(unsigned start, unsigned flags); extern int get_unused_fd_flags(unsigned flags); #define get_unused_fd() get_unused_fd_flags(0) extern void put_unused_fd(unsigned int fd); -- cgit v1.2.3 From c3c073f808b22dfae15ef8412b6f7b998644139a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 22:32:06 -0400 Subject: new helper: iterate_fd() iterates through the opened files in given descriptor table, calling a supplied function; we stop once non-zero is returned. Callback gets struct file *, descriptor number and const void * argument passed to iterator. It is called with files->file_lock held, so it is not allowed to block. tty_io, netprio_cgroup and selinux flush_unauthorized_files() converted to its use. Signed-off-by: Al Viro --- drivers/tty/tty_io.c | 36 +++++++++++------------------- fs/file.c | 21 +++++++++++++++++ include/linux/fdtable.h | 3 +++ net/core/netprio_cgroup.c | 38 ++++++++++--------------------- security/selinux/hooks.c | 57 ++++++++++++++++++----------------------------- 5 files changed, 71 insertions(+), 84 deletions(-) (limited to 'fs') diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index b425c79675ad..71d95cfbabec 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2791,6 +2791,13 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd, } #endif +static int this_tty(const void *t, struct file *file, unsigned fd) +{ + if (likely(file->f_op->read != tty_read)) + return 0; + return file_tty(file) != t ? 0 : fd + 1; +} + /* * This implements the "Secure Attention Key" --- the idea is to * prevent trojan horses by killing all processes associated with this @@ -2818,8 +2825,6 @@ void __do_SAK(struct tty_struct *tty) struct task_struct *g, *p; struct pid *session; int i; - struct file *filp; - struct fdtable *fdt; if (!tty) return; @@ -2849,27 +2854,12 @@ void __do_SAK(struct tty_struct *tty) continue; } task_lock(p); - if (p->files) { - /* - * We don't take a ref to the file, so we must - * hold ->file_lock instead. - */ - spin_lock(&p->files->file_lock); - fdt = files_fdtable(p->files); - for (i = 0; i < fdt->max_fds; i++) { - filp = fcheck_files(p->files, i); - if (!filp) - continue; - if (filp->f_op->read == tty_read && - file_tty(filp) == tty) { - printk(KERN_NOTICE "SAK: killed process %d" - " (%s): fd#%d opened to the tty\n", - task_pid_nr(p), p->comm, i); - force_sig(SIGKILL, p); - break; - } - } - spin_unlock(&p->files->file_lock); + i = iterate_fd(p->files, 0, this_tty, tty); + if (i != 0) { + printk(KERN_NOTICE "SAK: killed process %d" + " (%s): fd#%d opened to the tty\n", + task_pid_nr(p), p->comm, i - 1); + force_sig(SIGKILL, p); } task_unlock(p); } while_each_thread(g, p); diff --git a/fs/file.c b/fs/file.c index 967bd0dadbe5..e6e418122587 100644 --- a/fs/file.c +++ b/fs/file.c @@ -979,3 +979,24 @@ int f_dupfd(unsigned int from, struct file *file, unsigned flags) } return err; } + +int iterate_fd(struct files_struct *files, unsigned n, + int (*f)(const void *, struct file *, unsigned), + const void *p) +{ + struct fdtable *fdt; + struct file *file; + int res = 0; + if (!files) + return 0; + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + while (!res && n < fdt->max_fds) { + file = rcu_dereference_check_fdtable(files, fdt->fd[n++]); + if (file) + res = f(p, file, n); + } + spin_unlock(&files->file_lock); + return res; +} +EXPORT_SYMBOL(iterate_fd); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index de2b71caa0f0..fb7dacae0522 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -98,6 +98,9 @@ void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, int *); void do_close_on_exec(struct files_struct *); +int iterate_fd(struct files_struct *, unsigned, + int (*)(const void *, struct file *, unsigned), + const void *); extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index c75e3f9d060f..5ffd084c6a83 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -272,38 +272,24 @@ out_free_devname: return ret; } +static int update_netprio(const void *v, struct file *file, unsigned n) +{ + int err; + struct socket *sock = sock_from_file(file, &err); + if (sock) + sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; + return 0; +} + void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *p; + void *v; cgroup_taskset_for_each(p, cgrp, tset) { - unsigned int fd; - struct fdtable *fdt; - struct files_struct *files; - task_lock(p); - files = p->files; - if (!files) { - task_unlock(p); - continue; - } - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - for (fd = 0; fd < fdt->max_fds; fd++) { - struct file *file; - struct socket *sock; - int err; - - file = fcheck_files(files, fd); - if (!file) - continue; - - sock = sock_from_file(file, &err); - if (sock) - sock_update_netprioidx(sock->sk, p); - } - spin_unlock(&files->file_lock); + v = (void *)(unsigned long)task_netprioidx(p); + iterate_fd(p->files, 0, update_netprio, v); task_unlock(p); } } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 00b50113642d..4dfbcea10eb7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2088,15 +2088,19 @@ static int selinux_bprm_secureexec(struct linux_binprm *bprm) return (atsecure || cap_bprm_secureexec(bprm)); } +static int match_file(const void *p, struct file *file, unsigned fd) +{ + return file_has_perm(p, file, file_to_av(file)) ? fd + 1 : 0; +} + /* Derived from fs/exec.c:flush_old_files. */ static inline void flush_unauthorized_files(const struct cred *cred, struct files_struct *files) { struct file *file, *devnull = NULL; struct tty_struct *tty; - struct fdtable *fdt; - long j = -1; int drop_tty = 0; + unsigned n; tty = get_current_tty(); if (tty) { @@ -2123,41 +2127,24 @@ static inline void flush_unauthorized_files(const struct cred *cred, no_tty(); /* Revalidate access to inherited open files. */ - spin_lock(&files->file_lock); - for (;;) { - unsigned long set, i; - j++; - i = j * BITS_PER_LONG; - fdt = files_fdtable(files); - if (i >= fdt->max_fds) - break; - set = fdt->open_fds[j]; - if (!set) - continue; - spin_unlock(&files->file_lock); - for ( ; set ; i++, set >>= 1) { - if (!(set & 1)) - continue; - file = fget(i); - if (!file) - continue; - if (file_has_perm(cred, file, file_to_av(file))) { - if (devnull) { - get_file(devnull); - } else { - devnull = dentry_open(&selinux_null, - O_RDWR, cred); - if (IS_ERR(devnull)) - devnull = NULL; - } - replace_fd(i, devnull, 0); - } - fput(file); - } - spin_lock(&files->file_lock); + n = iterate_fd(files, 0, match_file, cred); + if (!n) /* none found? */ + return; + devnull = dentry_open(&selinux_null, O_RDWR, cred); + if (!IS_ERR(devnull)) { + /* replace all the matching ones with this */ + do { + get_file(devnull); + replace_fd(n - 1, devnull, 0); + } while ((n = iterate_fd(files, n, match_file, cred)) != 0); + fput(devnull); + } else { + /* just close all the matching ones */ + do { + replace_fd(n - 1, NULL, 0); + } while ((n = iterate_fd(files, n, match_file, cred)) != 0); } - spin_unlock(&files->file_lock); } /* -- cgit v1.2.3 From 179e037fc1370288188cb1f90b81156d75a3cb2d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 22:43:47 -0400 Subject: do_coredump(): make sure that descriptor table isn't shared Signed-off-by: Al Viro --- fs/exec.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 3fc74681cc6c..beb05a95e4a3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2066,6 +2066,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) int retval = 0; int flag = 0; int ispipe; + struct files_struct *displaced; bool need_nonrelative = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { @@ -2219,6 +2220,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) goto close_fail; } + /* get us an unshared descriptor table; almost always a no-op */ + retval = unshare_files(&displaced); + if (retval) + goto close_fail; + if (displaced) + put_files_struct(displaced); retval = binfmt->core_dump(&cprm); if (retval) current->signal->group_exit_code |= 0x80; -- cgit v1.2.3 From 864bdb3b6cbd9911222543fef1cfe36f88183f44 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 22 Aug 2012 18:42:10 -0400 Subject: new helper: daemonize_descriptors() descriptor-related parts of daemonize, done right. As the result we simplify the locking rules for ->files - we hold task_lock in *all* cases when we modify ->files. Signed-off-by: Al Viro --- fs/file.c | 6 ++++++ include/linux/fdtable.h | 1 + kernel/exit.c | 4 +--- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index e6e418122587..15750b80e3ce 100644 --- a/fs/file.c +++ b/fs/file.c @@ -519,6 +519,12 @@ struct files_struct init_files = { .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), }; +void daemonize_descriptors(void) +{ + atomic_inc(&init_files.count); + reset_files_struct(&init_files); +} + /* * allocate a file descriptor, mark it busy. */ diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index fb7dacae0522..45052aa814c8 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -95,6 +95,7 @@ struct task_struct; struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); +void daemonize_descriptors(void); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, int *); void do_close_on_exec(struct files_struct *); diff --git a/kernel/exit.c b/kernel/exit.c index 20dfc7617c2e..095113321318 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -457,9 +457,7 @@ void daemonize(const char *name, ...) /* Become as one with the init task */ daemonize_fs_struct(); - exit_files(current); - current->files = init_task.files; - atomic_inc(¤t->files->count); + daemonize_descriptors(); reparent_to_kthreadd(); } -- cgit v1.2.3 From faf60af17f8da87e1c87a6be527344791025ce9e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 23 Aug 2012 14:43:24 +0400 Subject: procfs: Move /proc/pid/fd[info] handling code to fd.[ch] This patch prepares the ground for further extension of /proc/pid/fd[info] handling code by moving fdinfo handling code into fs/proc/fd.c. I think such move makes both fs/proc/base.c and fs/proc/fd.c easier to read. Signed-off-by: Cyrill Gorcunov Acked-by: Pavel Emelyanov CC: Al Viro CC: Alexey Dobriyan CC: Andrew Morton CC: James Bottomley CC: "Aneesh Kumar K.V" CC: Alexey Dobriyan CC: Matthew Helsley CC: "J. Bruce Fields" CC: "Aneesh Kumar K.V" Signed-off-by: Al Viro --- fs/proc/Makefile | 2 +- fs/proc/base.c | 388 +---------------------------------------------------- fs/proc/fd.c | 351 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/proc/fd.h | 14 ++ fs/proc/internal.h | 48 +++++++ 5 files changed, 416 insertions(+), 387 deletions(-) create mode 100644 fs/proc/fd.c create mode 100644 fs/proc/fd.h (limited to 'fs') diff --git a/fs/proc/Makefile b/fs/proc/Makefile index c1c729335924..99349efbbc2b 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ - proc_tty.o + proc_tty.o fd.o proc-y += cmdline.o proc-y += consoles.o proc-y += cpuinfo.o diff --git a/fs/proc/base.c b/fs/proc/base.c index 1b6c84cbdb73..b55c3bb298e3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -90,6 +90,7 @@ #endif #include #include "internal.h" +#include "fd.h" /* NOTE: * Implementing inode permission operations in /proc is almost @@ -136,8 +137,6 @@ struct pid_entry { NULL, &proc_single_file_operations, \ { .proc_show = show } ) -static int proc_fd_permission(struct inode *inode, int mask); - /* * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. @@ -1492,7 +1491,7 @@ out: return error; } -static const struct inode_operations proc_pid_link_inode_operations = { +const struct inode_operations proc_pid_link_inode_operations = { .readlink = proc_pid_readlink, .follow_link = proc_pid_follow_link, .setattr = proc_setattr, @@ -1501,21 +1500,6 @@ static const struct inode_operations proc_pid_link_inode_operations = { /* building an inode */ -static int task_dumpable(struct task_struct *task) -{ - int dumpable = 0; - struct mm_struct *mm; - - task_lock(task); - mm = task->mm; - if (mm) - dumpable = get_dumpable(mm); - task_unlock(task); - if(dumpable == 1) - return 1; - return 0; -} - struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) { struct inode * inode; @@ -1641,15 +1625,6 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) return 0; } -static int pid_delete_dentry(const struct dentry * dentry) -{ - /* Is the task we represent dead? - * If so, then don't put the dentry on the lru list, - * kill it immediately. - */ - return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; -} - const struct dentry_operations pid_dentry_operations = { .d_revalidate = pid_revalidate, @@ -1712,289 +1687,6 @@ end_instantiate: return filldir(dirent, name, len, filp->f_pos, ino, type); } -static unsigned name_to_int(struct dentry *dentry) -{ - const char *name = dentry->d_name.name; - int len = dentry->d_name.len; - unsigned n = 0; - - if (len > 1 && *name == '0') - goto out; - while (len-- > 0) { - unsigned c = *name++ - '0'; - if (c > 9) - goto out; - if (n >= (~0U-9)/10) - goto out; - n *= 10; - n += c; - } - return n; -out: - return ~0U; -} - -#define PROC_FDINFO_MAX 64 - -static int proc_fd_info(struct inode *inode, struct path *path, char *info) -{ - struct task_struct *task = get_proc_task(inode); - struct files_struct *files = NULL; - struct file *file; - int fd = proc_fd(inode); - - if (task) { - files = get_files_struct(task); - put_task_struct(task); - } - if (files) { - /* - * We are not taking a ref to the file structure, so we must - * hold ->file_lock. - */ - spin_lock(&files->file_lock); - file = fcheck_files(files, fd); - if (file) { - unsigned int f_flags; - struct fdtable *fdt; - - fdt = files_fdtable(files); - f_flags = file->f_flags & ~O_CLOEXEC; - if (close_on_exec(fd, fdt)) - f_flags |= O_CLOEXEC; - - if (path) { - *path = file->f_path; - path_get(&file->f_path); - } - if (info) - snprintf(info, PROC_FDINFO_MAX, - "pos:\t%lli\n" - "flags:\t0%o\n", - (long long) file->f_pos, - f_flags); - spin_unlock(&files->file_lock); - put_files_struct(files); - return 0; - } - spin_unlock(&files->file_lock); - put_files_struct(files); - } - return -ENOENT; -} - -static int proc_fd_link(struct dentry *dentry, struct path *path) -{ - return proc_fd_info(dentry->d_inode, path, NULL); -} - -static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) -{ - struct inode *inode; - struct task_struct *task; - int fd; - struct files_struct *files; - const struct cred *cred; - - if (flags & LOOKUP_RCU) - return -ECHILD; - - inode = dentry->d_inode; - task = get_proc_task(inode); - fd = proc_fd(inode); - - if (task) { - files = get_files_struct(task); - if (files) { - struct file *file; - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - unsigned f_mode = file->f_mode; - - rcu_read_unlock(); - put_files_struct(files); - - if (task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - } - - if (S_ISLNK(inode->i_mode)) { - unsigned i_mode = S_IFLNK; - if (f_mode & FMODE_READ) - i_mode |= S_IRUSR | S_IXUSR; - if (f_mode & FMODE_WRITE) - i_mode |= S_IWUSR | S_IXUSR; - inode->i_mode = i_mode; - } - - security_task_to_inode(task, inode); - put_task_struct(task); - return 1; - } - rcu_read_unlock(); - put_files_struct(files); - } - put_task_struct(task); - } - d_drop(dentry); - return 0; -} - -static const struct dentry_operations tid_fd_dentry_operations = -{ - .d_revalidate = tid_fd_revalidate, - .d_delete = pid_delete_dentry, -}; - -static struct dentry *proc_fd_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) -{ - unsigned fd = (unsigned long)ptr; - struct inode *inode; - struct proc_inode *ei; - struct dentry *error = ERR_PTR(-ENOENT); - - inode = proc_pid_make_inode(dir->i_sb, task); - if (!inode) - goto out; - ei = PROC_I(inode); - ei->fd = fd; - - inode->i_mode = S_IFLNK; - inode->i_op = &proc_pid_link_inode_operations; - inode->i_size = 64; - ei->op.proc_get_link = proc_fd_link; - d_set_d_op(dentry, &tid_fd_dentry_operations); - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, 0)) - error = NULL; - - out: - return error; -} - -static struct dentry *proc_lookupfd_common(struct inode *dir, - struct dentry *dentry, - instantiate_t instantiate) -{ - struct task_struct *task = get_proc_task(dir); - unsigned fd = name_to_int(dentry); - struct dentry *result = ERR_PTR(-ENOENT); - - if (!task) - goto out_no_task; - if (fd == ~0U) - goto out; - - result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); -out: - put_task_struct(task); -out_no_task: - return result; -} - -static int proc_readfd_common(struct file * filp, void * dirent, - filldir_t filldir, instantiate_t instantiate) -{ - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; - struct task_struct *p = get_proc_task(inode); - unsigned int fd, ino; - int retval; - struct files_struct * files; - - retval = -ENOENT; - if (!p) - goto out_no_task; - retval = 0; - - fd = filp->f_pos; - switch (fd) { - case 0: - if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) - goto out; - filp->f_pos++; - case 1: - ino = parent_ino(dentry); - if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) - goto out; - filp->f_pos++; - default: - files = get_files_struct(p); - if (!files) - goto out; - rcu_read_lock(); - for (fd = filp->f_pos-2; - fd < files_fdtable(files)->max_fds; - fd++, filp->f_pos++) { - char name[PROC_NUMBUF]; - int len; - int rv; - - if (!fcheck_files(files, fd)) - continue; - rcu_read_unlock(); - - len = snprintf(name, sizeof(name), "%d", fd); - rv = proc_fill_cache(filp, dirent, filldir, - name, len, instantiate, p, - (void *)(unsigned long)fd); - if (rv < 0) - goto out_fd_loop; - rcu_read_lock(); - } - rcu_read_unlock(); -out_fd_loop: - put_files_struct(files); - } -out: - put_task_struct(p); -out_no_task: - return retval; -} - -static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, - unsigned int flags) -{ - return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); -} - -static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) -{ - return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); -} - -static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - char tmp[PROC_FDINFO_MAX]; - int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); - if (!err) - err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); - return err; -} - -static const struct file_operations proc_fdinfo_file_operations = { - .open = nonseekable_open, - .read = proc_fdinfo_read, - .llseek = no_llseek, -}; - -static const struct file_operations proc_fd_operations = { - .read = generic_read_dir, - .readdir = proc_readfd, - .llseek = default_llseek, -}; - #ifdef CONFIG_CHECKPOINT_RESTORE /* @@ -2337,82 +2029,6 @@ static const struct file_operations proc_map_files_operations = { #endif /* CONFIG_CHECKPOINT_RESTORE */ -/* - * /proc/pid/fd needs a special permission handler so that a process can still - * access /proc/self/fd after it has executed a setuid(). - */ -static int proc_fd_permission(struct inode *inode, int mask) -{ - int rv = generic_permission(inode, mask); - if (rv == 0) - return 0; - if (task_pid(current) == proc_pid(inode)) - rv = 0; - return rv; -} - -/* - * proc directories can do almost nothing.. - */ -static const struct inode_operations proc_fd_inode_operations = { - .lookup = proc_lookupfd, - .permission = proc_fd_permission, - .setattr = proc_setattr, -}; - -static struct dentry *proc_fdinfo_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) -{ - unsigned fd = (unsigned long)ptr; - struct inode *inode; - struct proc_inode *ei; - struct dentry *error = ERR_PTR(-ENOENT); - - inode = proc_pid_make_inode(dir->i_sb, task); - if (!inode) - goto out; - ei = PROC_I(inode); - ei->fd = fd; - inode->i_mode = S_IFREG | S_IRUSR; - inode->i_fop = &proc_fdinfo_file_operations; - d_set_d_op(dentry, &tid_fd_dentry_operations); - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, 0)) - error = NULL; - - out: - return error; -} - -static struct dentry *proc_lookupfdinfo(struct inode *dir, - struct dentry *dentry, - unsigned int flags) -{ - return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); -} - -static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) -{ - return proc_readfd_common(filp, dirent, filldir, - proc_fdinfo_instantiate); -} - -static const struct file_operations proc_fdinfo_operations = { - .read = generic_read_dir, - .readdir = proc_readfdinfo, - .llseek = default_llseek, -}; - -/* - * proc directories can do almost nothing.. - */ -static const struct inode_operations proc_fdinfo_inode_operations = { - .lookup = proc_lookupfdinfo, - .setattr = proc_setattr, -}; - - static struct dentry *proc_pident_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { diff --git a/fs/proc/fd.c b/fs/proc/fd.c new file mode 100644 index 000000000000..1b0f932b4bd9 --- /dev/null +++ b/fs/proc/fd.c @@ -0,0 +1,351 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "internal.h" +#include "fd.h" + +#define PROC_FDINFO_MAX 64 + +static int proc_fd_info(struct inode *inode, struct path *path, char *info) +{ + struct task_struct *task = get_proc_task(inode); + struct files_struct *files = NULL; + int fd = proc_fd(inode); + struct file *file; + + if (task) { + files = get_files_struct(task); + put_task_struct(task); + } + if (files) { + /* + * We are not taking a ref to the file structure, so we must + * hold ->file_lock. + */ + spin_lock(&files->file_lock); + file = fcheck_files(files, fd); + if (file) { + unsigned int f_flags; + struct fdtable *fdt; + + fdt = files_fdtable(files); + f_flags = file->f_flags & ~O_CLOEXEC; + if (close_on_exec(fd, fdt)) + f_flags |= O_CLOEXEC; + + if (path) { + *path = file->f_path; + path_get(&file->f_path); + } + if (info) + snprintf(info, PROC_FDINFO_MAX, + "pos:\t%lli\n" + "flags:\t0%o\n", + (long long) file->f_pos, + f_flags); + spin_unlock(&files->file_lock); + put_files_struct(files); + return 0; + } + spin_unlock(&files->file_lock); + put_files_struct(files); + } + return -ENOENT; +} + +static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct files_struct *files; + struct task_struct *task; + const struct cred *cred; + struct inode *inode; + int fd; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; + task = get_proc_task(inode); + fd = proc_fd(inode); + + if (task) { + files = get_files_struct(task); + if (files) { + struct file *file; + + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + unsigned f_mode = file->f_mode; + + rcu_read_unlock(); + put_files_struct(files); + + if (task_dumpable(task)) { + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); + } else { + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + } + + if (S_ISLNK(inode->i_mode)) { + unsigned i_mode = S_IFLNK; + if (f_mode & FMODE_READ) + i_mode |= S_IRUSR | S_IXUSR; + if (f_mode & FMODE_WRITE) + i_mode |= S_IWUSR | S_IXUSR; + inode->i_mode = i_mode; + } + + security_task_to_inode(task, inode); + put_task_struct(task); + return 1; + } + rcu_read_unlock(); + put_files_struct(files); + } + put_task_struct(task); + } + + d_drop(dentry); + return 0; +} + +static const struct dentry_operations tid_fd_dentry_operations = { + .d_revalidate = tid_fd_revalidate, + .d_delete = pid_delete_dentry, +}; + +static int proc_fd_link(struct dentry *dentry, struct path *path) +{ + return proc_fd_info(dentry->d_inode, path, NULL); +} + +static struct dentry * +proc_fd_instantiate(struct inode *dir, struct dentry *dentry, + struct task_struct *task, const void *ptr) +{ + struct dentry *error = ERR_PTR(-ENOENT); + unsigned fd = (unsigned long)ptr; + struct proc_inode *ei; + struct inode *inode; + + inode = proc_pid_make_inode(dir->i_sb, task); + if (!inode) + goto out; + + ei = PROC_I(inode); + ei->fd = fd; + + inode->i_mode = S_IFLNK; + inode->i_op = &proc_pid_link_inode_operations; + inode->i_size = 64; + + ei->op.proc_get_link = proc_fd_link; + + d_set_d_op(dentry, &tid_fd_dentry_operations); + d_add(dentry, inode); + + /* Close the race of the process dying before we return the dentry */ + if (tid_fd_revalidate(dentry, 0)) + error = NULL; + out: + return error; +} + +static struct dentry *proc_lookupfd_common(struct inode *dir, + struct dentry *dentry, + instantiate_t instantiate) +{ + struct task_struct *task = get_proc_task(dir); + struct dentry *result = ERR_PTR(-ENOENT); + unsigned fd = name_to_int(dentry); + + if (!task) + goto out_no_task; + if (fd == ~0U) + goto out; + + result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); +out: + put_task_struct(task); +out_no_task: + return result; +} + +static int proc_readfd_common(struct file * filp, void * dirent, + filldir_t filldir, instantiate_t instantiate) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + struct task_struct *p = get_proc_task(inode); + struct files_struct *files; + unsigned int fd, ino; + int retval; + + retval = -ENOENT; + if (!p) + goto out_no_task; + retval = 0; + + fd = filp->f_pos; + switch (fd) { + case 0: + if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) + goto out; + filp->f_pos++; + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) + goto out; + filp->f_pos++; + default: + files = get_files_struct(p); + if (!files) + goto out; + rcu_read_lock(); + for (fd = filp->f_pos - 2; + fd < files_fdtable(files)->max_fds; + fd++, filp->f_pos++) { + char name[PROC_NUMBUF]; + int len; + int rv; + + if (!fcheck_files(files, fd)) + continue; + rcu_read_unlock(); + + len = snprintf(name, sizeof(name), "%d", fd); + rv = proc_fill_cache(filp, dirent, filldir, + name, len, instantiate, p, + (void *)(unsigned long)fd); + if (rv < 0) + goto out_fd_loop; + rcu_read_lock(); + } + rcu_read_unlock(); +out_fd_loop: + put_files_struct(files); + } +out: + put_task_struct(p); +out_no_task: + return retval; +} + +static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + char tmp[PROC_FDINFO_MAX]; + int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); + if (!err) + err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); + return err; +} + +static const struct file_operations proc_fdinfo_file_operations = { + .open = nonseekable_open, + .read = proc_fdinfo_read, + .llseek = no_llseek, +}; + +static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) +{ + return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); +} + +const struct file_operations proc_fd_operations = { + .read = generic_read_dir, + .readdir = proc_readfd, + .llseek = default_llseek, +}; + +static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); +} + +/* + * /proc/pid/fd needs a special permission handler so that a process can still + * access /proc/self/fd after it has executed a setuid(). + */ +int proc_fd_permission(struct inode *inode, int mask) +{ + int rv = generic_permission(inode, mask); + if (rv == 0) + return 0; + if (task_pid(current) == proc_pid(inode)) + rv = 0; + return rv; +} + +const struct inode_operations proc_fd_inode_operations = { + .lookup = proc_lookupfd, + .permission = proc_fd_permission, + .setattr = proc_setattr, +}; + +static struct dentry * +proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, + struct task_struct *task, const void *ptr) +{ + struct dentry *error = ERR_PTR(-ENOENT); + unsigned fd = (unsigned long)ptr; + struct proc_inode *ei; + struct inode *inode; + + inode = proc_pid_make_inode(dir->i_sb, task); + if (!inode) + goto out; + + ei = PROC_I(inode); + ei->fd = fd; + + inode->i_mode = S_IFREG | S_IRUSR; + inode->i_fop = &proc_fdinfo_file_operations; + + d_set_d_op(dentry, &tid_fd_dentry_operations); + d_add(dentry, inode); + + /* Close the race of the process dying before we return the dentry */ + if (tid_fd_revalidate(dentry, 0)) + error = NULL; + out: + return error; +} + +static struct dentry * +proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ + return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); +} + +static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) +{ + return proc_readfd_common(filp, dirent, filldir, + proc_fdinfo_instantiate); +} + +const struct inode_operations proc_fdinfo_inode_operations = { + .lookup = proc_lookupfdinfo, + .setattr = proc_setattr, +}; + +const struct file_operations proc_fdinfo_operations = { + .read = generic_read_dir, + .readdir = proc_readfdinfo, + .llseek = default_llseek, +}; diff --git a/fs/proc/fd.h b/fs/proc/fd.h new file mode 100644 index 000000000000..cbb1d47deda8 --- /dev/null +++ b/fs/proc/fd.h @@ -0,0 +1,14 @@ +#ifndef __PROCFS_FD_H__ +#define __PROCFS_FD_H__ + +#include + +extern const struct file_operations proc_fd_operations; +extern const struct inode_operations proc_fd_inode_operations; + +extern const struct file_operations proc_fdinfo_operations; +extern const struct inode_operations proc_fdinfo_inode_operations; + +extern int proc_fd_permission(struct inode *inode, int mask); + +#endif /* __PROCFS_FD_H__ */ diff --git a/fs/proc/internal.h b/fs/proc/internal.h index e1167a1c9126..67925a7bd8cb 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -9,6 +9,7 @@ * 2 of the License, or (at your option) any later version. */ +#include #include struct ctl_table_header; @@ -65,6 +66,7 @@ extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; +extern const struct inode_operations proc_pid_link_inode_operations; struct proc_maps_private { struct pid *pid; @@ -91,6 +93,52 @@ static inline int proc_fd(struct inode *inode) return PROC_I(inode)->fd; } +static inline int task_dumpable(struct task_struct *task) +{ + int dumpable = 0; + struct mm_struct *mm; + + task_lock(task); + mm = task->mm; + if (mm) + dumpable = get_dumpable(mm); + task_unlock(task); + if(dumpable == 1) + return 1; + return 0; +} + +static inline int pid_delete_dentry(const struct dentry * dentry) +{ + /* Is the task we represent dead? + * If so, then don't put the dentry on the lru list, + * kill it immediately. + */ + return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; +} + +static inline unsigned name_to_int(struct dentry *dentry) +{ + const char *name = dentry->d_name.name; + int len = dentry->d_name.len; + unsigned n = 0; + + if (len > 1 && *name == '0') + goto out; + while (len-- > 0) { + unsigned c = *name++ - '0'; + if (c > 9) + goto out; + if (n >= (~0U-9)/10) + goto out; + n *= 10; + n += c; + } + return n; +out: + return ~0U; +} + struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, struct dentry *dentry); int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, -- cgit v1.2.3 From ddd3e0771bc7b869c550687c204e21f0155d5496 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 26 Aug 2012 18:28:20 +0400 Subject: procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2 This patch converts /proc/pid/fdinfo/ handling routines to seq-file which is needed to extend seq operations and plug in auxiliary fdinfo provides from subsystems like eventfd/eventpoll/fsnotify. Note the proc_fd_link no longer call for proc_fd_info, simply because the guts of proc_fd_info() got merged into ->show() of that seq_file Signed-off-by: Al Viro --- fs/proc/fd.c | 112 ++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 64 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 1b0f932b4bd9..9cef449c0f76 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -6,61 +6,68 @@ #include #include #include +#include +#include #include #include "internal.h" #include "fd.h" -#define PROC_FDINFO_MAX 64 - -static int proc_fd_info(struct inode *inode, struct path *path, char *info) +static int seq_show(struct seq_file *m, void *v) { - struct task_struct *task = get_proc_task(inode); struct files_struct *files = NULL; - int fd = proc_fd(inode); - struct file *file; + int f_flags = 0, ret = -ENOENT; + struct file *file = NULL; + struct task_struct *task; + + task = get_proc_task(m->private); + if (!task) + return -ENOENT; + + files = get_files_struct(task); + put_task_struct(task); - if (task) { - files = get_files_struct(task); - put_task_struct(task); - } if (files) { - /* - * We are not taking a ref to the file structure, so we must - * hold ->file_lock. - */ + int fd = proc_fd(m->private); + spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (file) { - unsigned int f_flags; - struct fdtable *fdt; + struct fdtable *fdt = files_fdtable(files); - fdt = files_fdtable(files); f_flags = file->f_flags & ~O_CLOEXEC; if (close_on_exec(fd, fdt)) f_flags |= O_CLOEXEC; - if (path) { - *path = file->f_path; - path_get(&file->f_path); - } - if (info) - snprintf(info, PROC_FDINFO_MAX, - "pos:\t%lli\n" - "flags:\t0%o\n", - (long long) file->f_pos, - f_flags); - spin_unlock(&files->file_lock); - put_files_struct(files); - return 0; + get_file(file); + ret = 0; } spin_unlock(&files->file_lock); put_files_struct(files); } - return -ENOENT; + + if (!ret) { + seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", + (long long)file->f_pos, f_flags); + fput(file); + } + + return ret; } +static int seq_fdinfo_open(struct inode *inode, struct file *file) +{ + return single_open(file, seq_show, inode); +} + +static const struct file_operations proc_fdinfo_file_operations = { + .open = seq_fdinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) { struct files_struct *files; @@ -130,7 +137,32 @@ static const struct dentry_operations tid_fd_dentry_operations = { static int proc_fd_link(struct dentry *dentry, struct path *path) { - return proc_fd_info(dentry->d_inode, path, NULL); + struct files_struct *files = NULL; + struct task_struct *task; + int ret = -ENOENT; + + task = get_proc_task(dentry->d_inode); + if (task) { + files = get_files_struct(task); + put_task_struct(task); + } + + if (files) { + int fd = proc_fd(dentry->d_inode); + struct file *fd_file; + + spin_lock(&files->file_lock); + fd_file = fcheck_files(files, fd); + if (fd_file) { + *path = fd_file->f_path; + path_get(&fd_file->f_path); + ret = 0; + } + spin_unlock(&files->file_lock); + put_files_struct(files); + } + + return ret; } static struct dentry * @@ -245,22 +277,6 @@ out_no_task: return retval; } -static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - char tmp[PROC_FDINFO_MAX]; - int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); - if (!err) - err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); - return err; -} - -static const struct file_operations proc_fdinfo_file_operations = { - .open = nonseekable_open, - .read = proc_fdinfo_read, - .llseek = no_llseek, -}; - static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) { return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); -- cgit v1.2.3 From c6f3d81115989e274c42a852222b80d2e14ced6f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 11:01:04 -0400 Subject: don't leak O_CLOEXEC into ->f_flags Signed-off-by: Al Viro --- fs/open.c | 2 +- fs/proc/fd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 30760017deed..03028d0e7487 100644 --- a/fs/open.c +++ b/fs/open.c @@ -814,7 +814,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o op->mode = 0; /* Must never be set by userspace */ - flags &= ~FMODE_NONOTIFY; + flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC; /* * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 9cef449c0f76..f28a875f8779 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -36,7 +36,7 @@ static int seq_show(struct seq_file *m, void *v) if (file) { struct fdtable *fdt = files_fdtable(files); - f_flags = file->f_flags & ~O_CLOEXEC; + f_flags = file->f_flags; if (close_on_exec(fd, fdt)) f_flags |= O_CLOEXEC; -- cgit v1.2.3 From f6d2ac5ca79d1fd468dbc77e488aec06e86f3035 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 12:55:54 -0400 Subject: namei.c: fix BS comment get_write_access() is needed for nfsd, not binfmt_aout (the latter has no business doing anything of that kind, of course) Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index dd1ed1b8e98e..c5b85b3523c0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3971,7 +3971,7 @@ EXPORT_SYMBOL(user_path_at); EXPORT_SYMBOL(follow_down_one); EXPORT_SYMBOL(follow_down); EXPORT_SYMBOL(follow_up); -EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ +EXPORT_SYMBOL(get_write_access); /* nfsd */ EXPORT_SYMBOL(getname); EXPORT_SYMBOL(lock_rename); EXPORT_SYMBOL(lookup_one_len); -- cgit v1.2.3 From bf2965d5b5950d09e934ea5d961d79d0ed1fae7e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 20:13:36 -0400 Subject: switch ftruncate(2) to fget_light Signed-off-by: Al Viro --- fs/open.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 03028d0e7487..9f61d7269d39 100644 --- a/fs/open.c +++ b/fs/open.c @@ -132,16 +132,16 @@ SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { - struct inode * inode; + struct inode *inode; struct dentry *dentry; - struct file * file; - int error; + struct file *file; + int error, fput_needed; error = -EINVAL; if (length < 0) goto out; error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) goto out; @@ -172,7 +172,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); sb_end_write(inode->i_sb); out_putf: - fput(file); + fput_light(file, fput_needed); out: return error; } -- cgit v1.2.3 From 6b48c5b2079af1f81d8f249ae07a988d8c45b32f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 20:15:40 -0400 Subject: switch fallocate(2) to fget_light() Signed-off-by: Al Viro --- fs/open.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 9f61d7269d39..da6d3f1ac243 100644 --- a/fs/open.c +++ b/fs/open.c @@ -277,12 +277,12 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) { struct file *file; - int error = -EBADF; + int error = -EBADF, fput_needed; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (file) { error = do_fallocate(file, mode, offset, len); - fput(file); + fput_light(file, fput_needed); } return error; -- cgit v1.2.3 From d6483b7a78438bc333560d11b69e6a6a6cf55940 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 20:22:10 -0400 Subject: switch fchmod(2) to fget_light() Signed-off-by: Al Viro --- fs/open.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index da6d3f1ac243..3c741eae6b99 100644 --- a/fs/open.c +++ b/fs/open.c @@ -582,23 +582,21 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) { - struct file * file; - int error = -EBADF; - struct dentry * dentry; + struct file *file; + int error = -EBADF, fput_needed; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) goto out; error = mnt_want_write_file(file); if (error) goto out_fput; - dentry = file->f_path.dentry; - audit_inode(NULL, dentry); + audit_inode(NULL, file->f_path.dentry); error = chown_common(&file->f_path, user, group); mnt_drop_write_file(file); out_fput: - fput(file); + fput_light(file, fput_needed); out: return error; } -- cgit v1.2.3 From 399c9b862f853f5c33e5a3b1f9a3c2507bdd526b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 21:00:03 -0400 Subject: ext4: close struct file leak on EXT4_IOC_MOVE_EXT Signed-off-by: Al Viro --- fs/ext4/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7f7dad787603..a0ee682dc394 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -258,7 +258,8 @@ group_extend_out: EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { ext4_msg(sb, KERN_ERR, "Online defrag not supported with bigalloc"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto mext_out; } err = mnt_want_write_file(filp); -- cgit v1.2.3 From 4557c669ef9801d96cf663331cdd1dcb8fa9c2f1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 28 Aug 2012 10:19:41 -0400 Subject: export fget_light Signed-off-by: Al Viro --- fs/file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 15750b80e3ce..0f1bda4bebfa 100644 --- a/fs/file.c +++ b/fs/file.c @@ -797,6 +797,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed) return file; } +EXPORT_SYMBOL(fget_light); struct file *fget_raw_light(unsigned int fd, int *fput_needed) { -- cgit v1.2.3 From 6bdf2954016ef7c1f4d4fa07a338ee197d9c3506 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 21:01:46 -0400 Subject: switch EXT4_IOC_MOVE_EXT to fget_light() Signed-off-by: Al Viro --- fs/ext4/ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a0ee682dc394..39646f224514 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -234,7 +234,7 @@ group_extend_out: case EXT4_IOC_MOVE_EXT: { struct move_extent me; struct file *donor_filp; - int err; + int err, fput_needed; if (!(filp->f_mode & FMODE_READ) || !(filp->f_mode & FMODE_WRITE)) @@ -245,7 +245,7 @@ group_extend_out: return -EFAULT; me.moved_len = 0; - donor_filp = fget(me.donor_fd); + donor_filp = fget_light(me.donor_fd, &fput_needed); if (!donor_filp) return -EBADF; @@ -274,7 +274,7 @@ group_extend_out: &me, sizeof(me))) err = -EFAULT; mext_out: - fput(donor_filp); + fput_light(donor_filp, fput_needed); return err; } -- cgit v1.2.3 From ecd188159efa112770d5f8a4a62f8d3586784f48 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 21:20:24 -0400 Subject: switch btrfs_ioctl_snap_create_transid() to fget_light() Signed-off-by: Al Viro --- fs/btrfs/ioctl.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9df50fa8a078..3c88abb0e265 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1422,7 +1422,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, NULL, transid, readonly, inherit); } else { struct inode *src_inode; - src_file = fget(fd); + int fput_needed; + src_file = fget_light(fd, &fput_needed); if (!src_file) { ret = -EINVAL; goto out_drop_write; @@ -1433,13 +1434,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, printk(KERN_INFO "btrfs: Snapshot src from " "another FS\n"); ret = -EINVAL; - fput(src_file); - goto out_drop_write; + } else { + ret = btrfs_mksubvol(&file->f_path, name, namelen, + BTRFS_I(src_inode)->root, + transid, readonly, inherit); } - ret = btrfs_mksubvol(&file->f_path, name, namelen, - BTRFS_I(src_inode)->root, - transid, readonly, inherit); - fput(src_file); + fput_light(src_file, fput_needed); } out_drop_write: mnt_drop_write_file(file); -- cgit v1.2.3 From 5e196a9cf557dbc2bf808824c6c4998150e18d06 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 21:27:40 -0400 Subject: switch epoll_wait(2) to fget_light() Signed-off-by: Al Viro --- fs/eventpoll.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index eedec84c1809..567ae72e155d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1809,7 +1809,7 @@ error_return: SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout) { - int error; + int error, fput_needed; struct file *file; struct eventpoll *ep; @@ -1825,7 +1825,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, /* Get the "struct file *" for the eventpoll file */ error = -EBADF; - file = fget(epfd); + file = fget_light(epfd, &fput_needed); if (!file) goto error_return; @@ -1847,7 +1847,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, error = ep_poll(ep, events, maxevents, timeout); error_fput: - fput(file); + fput_light(file, fput_needed); error_return: return error; -- cgit v1.2.3 From 4109633f4c4dcdaedf0d85ae74dba334760c577b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 21:32:02 -0400 Subject: switch timerfd_[sg]ettime(2) to fget_light() Signed-off-by: Al Viro --- fs/timerfd.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/timerfd.c b/fs/timerfd.c index dffeb3795af1..dd91e9420c9e 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -234,15 +234,15 @@ static const struct file_operations timerfd_fops = { .llseek = noop_llseek, }; -static struct file *timerfd_fget(int fd) +static struct file *timerfd_fget(int fd, int *fput_needed) { struct file *file; - file = fget(fd); + file = fget_light(fd, fput_needed); if (!file) return ERR_PTR(-EBADF); if (file->f_op != &timerfd_fops) { - fput(file); + fput_light(file, *fput_needed); return ERR_PTR(-EINVAL); } @@ -287,7 +287,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, struct file *file; struct timerfd_ctx *ctx; struct itimerspec ktmr, kotmr; - int ret; + int ret, fput_needed; if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) return -EFAULT; @@ -297,7 +297,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, !timespec_valid(&ktmr.it_interval)) return -EINVAL; - file = timerfd_fget(ufd); + file = timerfd_fget(ufd, &fput_needed); if (IS_ERR(file)) return PTR_ERR(file); ctx = file->private_data; @@ -334,7 +334,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, ret = timerfd_setup(ctx, flags, &ktmr); spin_unlock_irq(&ctx->wqh.lock); - fput(file); + fput_light(file, fput_needed); if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) return -EFAULT; @@ -346,8 +346,9 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) struct file *file; struct timerfd_ctx *ctx; struct itimerspec kotmr; + int fput_needed; - file = timerfd_fget(ufd); + file = timerfd_fget(ufd, &fput_needed); if (IS_ERR(file)) return PTR_ERR(file); ctx = file->private_data; @@ -362,7 +363,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); kotmr.it_interval = ktime_to_timespec(ctx->tintv); spin_unlock_irq(&ctx->wqh.lock); - fput(file); + fput_light(file, fput_needed); return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; } -- cgit v1.2.3 From 8319aa9127a1282b24c3ece473a058d246f35b0d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 03:18:55 -0400 Subject: switch btrfs_ioctl_clone() to fget_light() Signed-off-by: Al Viro --- fs/btrfs/ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3c88abb0e265..3494f2f44167 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2350,7 +2350,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, struct btrfs_key key; u32 nritems; int slot; - int ret; + int ret, fput_needed; u64 len = olen; u64 bs = root->fs_info->sb->s_blocksize; u64 hint_byte; @@ -2376,7 +2376,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (ret) return ret; - src_file = fget(srcfd); + src_file = fget_light(srcfd, &fput_needed); if (!src_file) { ret = -EBADF; goto out_drop_write; @@ -2724,7 +2724,7 @@ out_unlock: vfree(buf); btrfs_free_path(path); out_fput: - fput(src_file); + fput_light(src_file, fput_needed); out_drop_write: mnt_drop_write_file(file); return ret; -- cgit v1.2.3 From 78f7d75e5dd9aa1027e90d0b71d394603933c2ed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 12:54:13 -0400 Subject: switch coda get_device_index() to fget_light() Signed-off-by: Al Viro --- fs/coda/inode.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/coda/inode.c b/fs/coda/inode.c index f1813120d753..bd2313d106e5 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -109,41 +109,39 @@ static int get_device_index(struct coda_mount_data *data) { struct file *file; struct inode *inode; - int idx; + int idx, fput_needed; - if(data == NULL) { + if (data == NULL) { printk("coda_read_super: Bad mount data\n"); return -1; } - if(data->version != CODA_MOUNT_VERSION) { + if (data->version != CODA_MOUNT_VERSION) { printk("coda_read_super: Bad mount version\n"); return -1; } - file = fget(data->fd); - inode = NULL; - if(file) - inode = file->f_path.dentry->d_inode; - - if(!inode || !S_ISCHR(inode->i_mode) || - imajor(inode) != CODA_PSDEV_MAJOR) { - if(file) - fput(file); - - printk("coda_read_super: Bad file\n"); - return -1; + file = fget_light(data->fd, &fput_needed); + if (!file) + goto Ebadf; + inode = file->f_path.dentry->d_inode; + if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) { + fput_light(file, fput_needed); + goto Ebadf; } idx = iminor(inode); - fput(file); + fput_light(file, fput_needed); - if(idx < 0 || idx >= MAX_CODADEVS) { + if (idx < 0 || idx >= MAX_CODADEVS) { printk("coda_read_super: Bad minor number\n"); return -1; } return idx; +Ebadf: + printk("coda_read_super: Bad file\n"); + return -1; } static int coda_fill_super(struct super_block *sb, void *data, int silent) -- cgit v1.2.3 From 1ea65c96077f9bb5c0e5e224a4da751d269c5f94 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 12:57:12 -0400 Subject: switch xfs_swapext() to fget_light() Signed-off-by: Al Viro --- fs/xfs/xfs_dfrag.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index e00de08dc8ac..e6cdf224d7ea 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -49,10 +49,10 @@ xfs_swapext( { xfs_inode_t *ip, *tip; struct file *file, *tmp_file; - int error = 0; + int error = 0, fput_needed, fput_needed_tmp; /* Pull information for the target fd */ - file = fget((int)sxp->sx_fdtarget); + file = fget_light((int)sxp->sx_fdtarget, &fput_needed); if (!file) { error = XFS_ERROR(EINVAL); goto out; @@ -65,7 +65,7 @@ xfs_swapext( goto out_put_file; } - tmp_file = fget((int)sxp->sx_fdtmp); + tmp_file = fget_light((int)sxp->sx_fdtmp, &fput_needed_tmp); if (!tmp_file) { error = XFS_ERROR(EINVAL); goto out_put_file; @@ -105,9 +105,9 @@ xfs_swapext( error = xfs_swap_extents(ip, tip, sxp); out_put_tmp_file: - fput(tmp_file); + fput_light(tmp_file, fput_needed_tmp); out_put_file: - fput(file); + fput_light(file, fput_needed); out: return error; } -- cgit v1.2.3 From 64e09fa2e1fef1696a8685c7aad7e0d3dd24ce71 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 12:59:52 -0400 Subject: switch xfs_find_handle() to fget_light() Signed-off-by: Al Viro --- fs/xfs/xfs_ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0e0232c3b6d9..21483eac402d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -72,11 +72,11 @@ xfs_find_handle( struct inode *inode; struct file *file = NULL; struct path path; - int error; + int error, fput_needed; struct xfs_inode *ip; if (cmd == XFS_IOC_FD_TO_HANDLE) { - file = fget(hreq->fd); + file = fget_light(hreq->fd, &fput_needed); if (!file) return -EBADF; inode = file->f_path.dentry->d_inode; @@ -134,7 +134,7 @@ xfs_find_handle( out_put: if (cmd == XFS_IOC_FD_TO_HANDLE) - fput(file); + fput_light(file, fput_needed); else path_put(&path); return error; -- cgit v1.2.3 From cb0942b81249798e15c3f04eee2946ef543e8115 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 14:48:26 -0400 Subject: make get_file() return its argument simplifies a bunch of callers... Signed-off-by: Al Viro --- arch/ia64/kernel/perfmon.c | 4 +--- drivers/base/dma-buf.c | 3 +-- drivers/staging/omapdrm/omap_gem.c | 3 +-- drivers/tty/tty_io.c | 9 +++------ fs/autofs4/waitq.c | 3 +-- fs/fuse/dev.c | 3 +-- fs/nfsd/nfs4state.c | 3 +-- fs/proc/base.c | 3 +-- fs/select.c | 3 +-- include/linux/fs.h | 6 +++++- mm/fremap.c | 3 +-- mm/mmap.c | 3 +-- mm/nommu.c | 6 ++---- net/compat.c | 3 +-- net/core/scm.c | 3 +-- security/selinux/hooks.c | 3 +-- 16 files changed, 23 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 79826c13b8b6..ff5d4e4c3733 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2306,7 +2306,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t * partially initialize the vma for the sampling buffer */ vma->vm_mm = mm; - vma->vm_file = filp; + vma->vm_file = get_file(filp); vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ @@ -2345,8 +2345,6 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t goto error; } - get_file(filp); - /* * now insert the vma in the vm list for the process, must be * done with mmap lock held diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index c30f3e1d0efc..460e22dee36d 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -460,8 +460,7 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, if (vma->vm_file) fput(vma->vm_file); - vma->vm_file = dmabuf->file; - get_file(vma->vm_file); + vma->vm_file = get_file(dmabuf->file); vma->vm_pgoff = pgoff; diff --git a/drivers/staging/omapdrm/omap_gem.c b/drivers/staging/omapdrm/omap_gem.c index 3a0d035a9e03..2a6bb7f9ee68 100644 --- a/drivers/staging/omapdrm/omap_gem.c +++ b/drivers/staging/omapdrm/omap_gem.c @@ -566,9 +566,8 @@ int omap_gem_mmap_obj(struct drm_gem_object *obj, * in particular in the case of mmap'd dmabufs) */ fput(vma->vm_file); - get_file(obj->filp); vma->vm_pgoff = 0; - vma->vm_file = obj->filp; + vma->vm_file = get_file(obj->filp); vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); } diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 71d95cfbabec..c7561f29d894 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1163,10 +1163,8 @@ ssize_t redirected_tty_write(struct file *file, const char __user *buf, struct file *p = NULL; spin_lock(&redirect_lock); - if (redirect) { - get_file(redirect); - p = redirect; - } + if (redirect) + p = get_file(redirect); spin_unlock(&redirect_lock); if (p) { @@ -2246,8 +2244,7 @@ static int tioccons(struct file *file) spin_unlock(&redirect_lock); return -EBUSY; } - get_file(file); - redirect = file; + redirect = get_file(file); spin_unlock(&redirect_lock); return 0; } diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index da8876d38a7b..dce436e595c1 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -175,8 +175,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, return; } - pipe = sbi->pipe; - get_file(pipe); + pipe = get_file(sbi->pipe); mutex_unlock(&sbi->wq_mutex); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f4246cfc8d87..8c23fa7a91e6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -148,8 +148,7 @@ static struct fuse_req *get_reserved_req(struct fuse_conn *fc, if (ff->reserved_req) { req = ff->reserved_req; ff->reserved_req = NULL; - get_file(file); - req->stolen_file = file; + req->stolen_file = get_file(file); } spin_unlock(&fc->lock); } while (!req); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cc894eda385a..48a1bad37334 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2837,8 +2837,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag) return -ENOMEM; } fp->fi_lease = fl; - fp->fi_deleg_file = fl->fl_file; - get_file(fp->fi_deleg_file); + fp->fi_deleg_file = get_file(fl->fl_file); atomic_set(&fp->fi_delegees, 1); list_add(&dp->dl_perfile, &fp->fi_delegations); return 0; diff --git a/fs/proc/base.c b/fs/proc/base.c index b55c3bb298e3..f1e8438d21b5 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1979,8 +1979,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) if (++pos <= filp->f_pos) continue; - get_file(vma->vm_file); - info.file = vma->vm_file; + info.file = get_file(vma->vm_file); info.len = snprintf(info.name, sizeof(info.name), "%lx-%lx", vma->vm_start, vma->vm_end); diff --git a/fs/select.c b/fs/select.c index db14c781335e..ffdd16d6e691 100644 --- a/fs/select.c +++ b/fs/select.c @@ -220,8 +220,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, struct poll_table_entry *entry = poll_get_entry(pwq); if (!entry) return; - get_file(filp); - entry->filp = filp; + entry->filp = get_file(filp); entry->wait_address = wait_address; entry->key = p->_key; init_waitqueue_func_entry(&entry->wait, pollwake); diff --git a/include/linux/fs.h b/include/linux/fs.h index aa110476a95b..de1db1c12080 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1074,7 +1074,11 @@ struct file_handle { unsigned char f_handle[0]; }; -#define get_file(x) atomic_long_inc(&(x)->f_count) +static inline struct file *get_file(struct file *f) +{ + atomic_long_inc(&f->f_count); + return f; +} #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) #define file_count(x) atomic_long_read(&(x)->f_count) diff --git a/mm/fremap.c b/mm/fremap.c index 9ed4fd432467..048659c0c03d 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -195,10 +195,9 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, */ if (mapping_cap_account_dirty(mapping)) { unsigned long addr; - struct file *file = vma->vm_file; + struct file *file = get_file(vma->vm_file); flags &= MAP_NONBLOCK; - get_file(file); addr = mmap_region(file, start, size, flags, vma->vm_flags, pgoff); fput(file); diff --git a/mm/mmap.c b/mm/mmap.c index ae18a48e7e4e..872441e81914 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1301,8 +1301,7 @@ munmap_back: goto free_vma; correct_wcount = 1; } - vma->vm_file = file; - get_file(file); + vma->vm_file = get_file(file); error = file->f_op->mmap(file, vma); if (error) goto unmap_and_free_vma; diff --git a/mm/nommu.c b/mm/nommu.c index d4b0c10872de..dee2ff89fd58 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1282,10 +1282,8 @@ unsigned long do_mmap_pgoff(struct file *file, vma->vm_pgoff = pgoff; if (file) { - region->vm_file = file; - get_file(file); - vma->vm_file = file; - get_file(file); + region->vm_file = get_file(file); + vma->vm_file = get_file(file); if (vm_flags & VM_EXECUTABLE) { added_exe_file_vma(current->mm); vma->vm_mm = current->mm; diff --git a/net/compat.c b/net/compat.c index 74ed1d7a84a2..79ae88485001 100644 --- a/net/compat.c +++ b/net/compat.c @@ -301,8 +301,7 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) break; } /* Bump the usage count and install the file. */ - get_file(fp[i]); - fd_install(new_fd, fp[i]); + fd_install(new_fd, get_file(fp[i])); } if (i > 0) { diff --git a/net/core/scm.c b/net/core/scm.c index 040cebeed45b..b0098d259233 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -281,11 +281,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) break; } /* Bump the usage count and install the file. */ - get_file(fp[i]); sock = sock_from_file(fp[i], &err); if (sock) sock_update_netprioidx(sock->sk, current); - fd_install(new_fd, fp[i]); + fd_install(new_fd, get_file(fp[i])); } if (i > 0) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4dfbcea10eb7..651d8456611a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2135,8 +2135,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, if (!IS_ERR(devnull)) { /* replace all the matching ones with this */ do { - get_file(devnull); - replace_fd(n - 1, devnull, 0); + replace_fd(n - 1, get_file(devnull), 0); } while ((n = iterate_fd(files, n, match_file, cred)) != 0); fput(devnull); } else { -- cgit v1.2.3 From 7b540d0646ce122f0ba4520412be91e530719742 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 14:55:26 -0400 Subject: proc_map_files_readdir(): don't bother with grabbing files all we need is their ->f_mode, so just collect _that_ Signed-off-by: Al Viro --- fs/proc/base.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index f1e8438d21b5..df18db61d6d8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1805,7 +1805,7 @@ out: } struct map_files_info { - struct file *file; + fmode_t mode; unsigned long len; unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ }; @@ -1814,13 +1814,10 @@ static struct dentry * proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { - const struct file *file = ptr; + fmode_t mode = (fmode_t)(unsigned long)ptr; struct proc_inode *ei; struct inode *inode; - if (!file) - return ERR_PTR(-ENOENT); - inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) return ERR_PTR(-ENOENT); @@ -1832,9 +1829,9 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, inode->i_size = 64; inode->i_mode = S_IFLNK; - if (file->f_mode & FMODE_READ) + if (mode & FMODE_READ) inode->i_mode |= S_IRUSR; - if (file->f_mode & FMODE_WRITE) + if (mode & FMODE_WRITE) inode->i_mode |= S_IWUSR; d_set_d_op(dentry, &tid_map_files_dentry_operations); @@ -1878,7 +1875,8 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, if (!vma) goto out_no_vma; - result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); + result = proc_map_files_instantiate(dir, dentry, task, + (void *)(unsigned long)vma->vm_file->f_mode); out_no_vma: up_read(&mm->mmap_sem); @@ -1979,7 +1977,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) if (++pos <= filp->f_pos) continue; - info.file = get_file(vma->vm_file); + info.mode = vma->vm_file->f_mode; info.len = snprintf(info.name, sizeof(info.name), "%lx-%lx", vma->vm_start, vma->vm_end); @@ -1994,19 +1992,11 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) ret = proc_fill_cache(filp, dirent, filldir, p->name, p->len, proc_map_files_instantiate, - task, p->file); + task, + (void *)(unsigned long)p->mode); if (ret) break; filp->f_pos++; - fput(p->file); - } - for (; i < nr_files; i++) { - /* - * In case of error don't forget - * to put rest of file refs. - */ - p = flex_array_get(fa, i); - fput(p->file); } if (fa) flex_array_free(fa); -- cgit v1.2.3 From 2a117354b7bdfe13750a64307755e75436fb157a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 17:55:17 -0400 Subject: switch o2hb_region_dev_write() to fget_light() Signed-off-by: Al Viro --- fs/ocfs2/cluster/heartbeat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index a4e855e3690e..61c28ae266f5 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1750,6 +1750,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, struct inode *inode = NULL; ssize_t ret = -EINVAL; int live_threshold; + int fput_needed; if (reg->hr_bdev) goto out; @@ -1766,7 +1767,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, if (fd < 0 || fd >= INT_MAX) goto out; - filp = fget(fd); + filp = fget_light(fd, &fput_needed); if (filp == NULL) goto out; @@ -1884,7 +1885,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, out: if (filp) - fput(filp); + fput_light(filp, fput_needed); if (inode) iput(inode); if (ret < 0) { -- cgit v1.2.3 From 6a08f447facb4f9e29fcc30fb68060bb5a0d21c2 Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Wed, 26 Sep 2012 21:24:57 -0400 Subject: ext4: always set i_op in ext4_mknod() ext4_special_inode_operations have their own ifdef CONFIG_EXT4_FS_XATTR to mask those methods. And ext4_iget also always sets it, so there is an inconsistency. Signed-off-by: Bernd Schubert Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/namei.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 8f4bda75122e..bd87b7a66afb 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2156,9 +2156,7 @@ retry: err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); -#ifdef CONFIG_EXT4_FS_XATTR inode->i_op = &ext4_special_inode_operations; -#endif err = ext4_add_nondir(handle, dentry, inode); } ext4_journal_stop(handle); -- cgit v1.2.3 From b71fc079b5d8f42b2a52743c8d2f1d35d655b1c5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 26 Sep 2012 21:52:20 -0400 Subject: ext4: fix fdatasync() for files with only i_size changes Code tracking when transaction needs to be committed on fdatasync(2) forgets to handle a situation when only inode's i_size is changed. Thus in such situations fdatasync(2) doesn't force transaction with new i_size to disk and that can result in wrong i_size after a crash. Fix the issue by updating inode's i_datasync_tid whenever its size is updated. CC: # >= 2.6.32 Reported-by: Kristian Nielsen Signed-off-by: Jan Kara --- fs/ext4/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0a31197590d7..4df5e95801b4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4054,6 +4054,7 @@ static int ext4_do_update_inode(handle_t *handle, struct ext4_inode_info *ei = EXT4_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; + int need_datasync = 0; uid_t i_uid; gid_t i_gid; @@ -4104,7 +4105,10 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_file_acl_high = cpu_to_le16(ei->i_file_acl >> 32); raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); - ext4_isize_set(raw_inode, ei->i_disksize); + if (ei->i_disksize != ext4_isize(raw_inode)) { + ext4_isize_set(raw_inode, ei->i_disksize); + need_datasync = 1; + } if (ei->i_disksize > 0x7fffffffULL) { struct super_block *sb = inode->i_sb; if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, @@ -4157,7 +4161,7 @@ static int ext4_do_update_inode(handle_t *handle, err = rc; ext4_clear_inode_state(inode, EXT4_STATE_NEW); - ext4_update_inode_fsync_trans(handle, inode, 0); + ext4_update_inode_fsync_trans(handle, inode, need_datasync); out_brelse: brelse(bh); ext4_std_error(inode->i_sb, err); -- cgit v1.2.3 From 2903ff019b346ab8d36ebbf54853c3aaf6590608 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 28 Aug 2012 12:52:22 -0400 Subject: switch simple cases of fget_light to fdget Signed-off-by: Al Viro --- arch/alpha/kernel/osf_sys.c | 15 +-- arch/ia64/kernel/perfmon.c | 15 ++- arch/parisc/hpux/fs.c | 17 ++- arch/powerpc/platforms/cell/spu_syscalls.c | 21 ++-- drivers/infiniband/core/ucma.c | 12 +- drivers/infiniband/core/uverbs_cmd.c | 18 +-- drivers/infiniband/core/uverbs_main.c | 12 +- drivers/vfio/vfio.c | 17 ++- drivers/video/msm/mdp.c | 12 +- fs/btrfs/ioctl.c | 26 ++--- fs/coda/inode.c | 14 +-- fs/compat.c | 90 +++++++-------- fs/compat_ioctl.c | 27 ++--- fs/eventpoll.c | 25 ++-- fs/ext4/ioctl.c | 14 +-- fs/fcntl.c | 32 +++--- fs/fhandle.c | 17 ++- fs/ioctl.c | 25 ++-- fs/locks.c | 20 ++-- fs/namei.c | 39 +++---- fs/notify/fanotify/fanotify_user.c | 28 +++-- fs/notify/inotify/inotify_user.c | 28 ++--- fs/ocfs2/cluster/heartbeat.c | 39 ++++--- fs/open.c | 64 +++++------ fs/read_write.c | 176 +++++++++++++---------------- fs/readdir.c | 36 +++--- fs/select.c | 28 ++--- fs/signalfd.c | 13 +-- fs/splice.c | 69 ++++++----- fs/stat.c | 10 +- fs/statfs.c | 9 +- fs/sync.c | 33 +++--- fs/timerfd.c | 48 ++++---- fs/utimes.c | 11 +- fs/xattr.c | 52 ++++----- fs/xfs/xfs_dfrag.c | 36 +++--- fs/xfs/xfs_ioctl.c | 12 +- include/linux/file.h | 5 +- ipc/mqueue.c | 84 +++++++------- kernel/events/core.c | 70 +++++------- kernel/sys.c | 16 +-- kernel/taskstats.c | 11 +- mm/fadvise.c | 35 +++--- mm/readahead.c | 15 ++- 44 files changed, 633 insertions(+), 763 deletions(-) (limited to 'fs') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index d6c49e67d3fc..f1daf7ae42e9 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -144,28 +144,25 @@ SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd, struct osf_dirent __user *, dirent, unsigned int, count, long __user *, basep) { - int error, fput_needed; - struct file *file; + int error; + struct fd arg = fdget(fd); struct osf_dirent_callback buf; - error = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) - goto out; + if (!arg.file) + return -EBADF; buf.dirent = dirent; buf.basep = basep; buf.count = count; buf.error = 0; - error = vfs_readdir(file, osf_filldir, &buf); + error = vfs_readdir(arg.file, osf_filldir, &buf); if (error >= 0) error = buf.error; if (count != buf.count) error = count - buf.count; - fput_light(file, fput_needed); - out: + fdput(arg); return error; } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index ff5d4e4c3733..e3bd7b8aceab 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -4780,7 +4780,7 @@ recheck: asmlinkage long sys_perfmonctl (int fd, int cmd, void __user *arg, int count) { - struct file *file = NULL; + struct fd f = {NULL, 0}; pfm_context_t *ctx = NULL; unsigned long flags = 0UL; void *args_k = NULL; @@ -4789,7 +4789,6 @@ sys_perfmonctl (int fd, int cmd, void __user *arg, int count) int narg, completed_args = 0, call_made = 0, cmd_flags; int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); int (*getsize)(void *arg, size_t *sz); - int fput_needed; #define PFM_MAX_ARGSIZE 4096 /* @@ -4878,17 +4877,17 @@ restart_args: ret = -EBADF; - file = fget_light(fd, &fput_needed); - if (unlikely(file == NULL)) { + f = fdget(fd); + if (unlikely(f.file == NULL)) { DPRINT(("invalid fd %d\n", fd)); goto error_args; } - if (unlikely(PFM_IS_FILE(file) == 0)) { + if (unlikely(PFM_IS_FILE(f.file) == 0)) { DPRINT(("fd %d not related to perfmon\n", fd)); goto error_args; } - ctx = file->private_data; + ctx = f.file->private_data; if (unlikely(ctx == NULL)) { DPRINT(("no context for fd %d\n", fd)); goto error_args; @@ -4918,8 +4917,8 @@ abort_locked: if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; error_args: - if (file) - fput_light(file, fput_needed); + if (f.file) + fdput(f); kfree(args_k); diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index 41e01832cb21..6785de7bd2a0 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -109,33 +109,32 @@ Efault: int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned int count) { - struct file * file; + struct fd arg; struct hpux_dirent __user * lastdirent; struct getdents_callback buf; - int error = -EBADF, fput_needed; + int error; - file = fget_light(fd, &fput_needed); - if (!file) - goto out; + arg = fdget(fd); + if (!arg.file) + return -EBADF; buf.current_dir = dirent; buf.previous = NULL; buf.count = count; buf.error = 0; - error = vfs_readdir(file, filldir, &buf); + error = vfs_readdir(arg.file, filldir, &buf); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { - if (put_user(file->f_pos, &lastdirent->d_off)) + if (put_user(arg.file->f_pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } - fput_light(file, fput_needed); -out: + fdput(arg); return error; } diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c index 714bbfc3162c..db4e638cf408 100644 --- a/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -69,8 +69,6 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, umode_t, mode, int, neighbor_fd) { long ret; - struct file *neighbor; - int fput_needed; struct spufs_calls *calls; calls = spufs_calls_get(); @@ -78,11 +76,11 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, return -ENOSYS; if (flags & SPU_CREATE_AFFINITY_SPU) { + struct fd neighbor = fdget(neighbor_fd); ret = -EBADF; - neighbor = fget_light(neighbor_fd, &fput_needed); - if (neighbor) { - ret = calls->create_thread(name, flags, mode, neighbor); - fput_light(neighbor, fput_needed); + if (neighbor.file) { + ret = calls->create_thread(name, flags, mode, neighbor.file); + fdput(neighbor); } } else ret = calls->create_thread(name, flags, mode, NULL); @@ -94,8 +92,7 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) { long ret; - struct file *filp; - int fput_needed; + struct fd arg; struct spufs_calls *calls; calls = spufs_calls_get(); @@ -103,10 +100,10 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) return -ENOSYS; ret = -EBADF; - filp = fget_light(fd, &fput_needed); - if (filp) { - ret = calls->spu_run(filp, unpc, ustatus); - fput_light(filp, fput_needed); + arg = fdget(fd); + if (arg.file) { + ret = calls->spu_run(arg.file, unpc, ustatus); + fdput(arg); } spufs_calls_put(calls); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 6b2ae729de92..6f28da9f4cad 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1184,20 +1184,20 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, struct rdma_ucm_migrate_id cmd; struct rdma_ucm_migrate_resp resp; struct ucma_context *ctx; - struct file *filp; + struct fd f; struct ucma_file *cur_file; - int ret = 0, fput_needed; + int ret = 0; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; /* Get current fd to protect against it being closed */ - filp = fget_light(cmd.fd, &fput_needed); - if (!filp) + f = fdget(cmd.fd); + if (!f.file) return -ENOENT; /* Validate current fd and prevent destruction of id. */ - ctx = ucma_get_ctx(filp->private_data, cmd.id); + ctx = ucma_get_ctx(f.file->private_data, cmd.id); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); goto file_put; @@ -1231,7 +1231,7 @@ response: ucma_put_ctx(ctx); file_put: - fput_light(filp, fput_needed); + fdput(f); return ret; } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 402679bd30a3..0cb0007724a2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -705,9 +705,9 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, struct ib_udata udata; struct ib_uxrcd_object *obj; struct ib_xrcd *xrcd = NULL; - struct file *f = NULL; + struct fd f = {NULL, 0}; struct inode *inode = NULL; - int ret = 0, fput_needed; + int ret = 0; int new_xrcd = 0; if (out_len < sizeof resp) @@ -724,13 +724,13 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, if (cmd.fd != -1) { /* search for file descriptor */ - f = fget_light(cmd.fd, &fput_needed); - if (!f) { + f = fdget(cmd.fd); + if (!f.file) { ret = -EBADF; goto err_tree_mutex_unlock; } - inode = f->f_dentry->d_inode; + inode = f.file->f_path.dentry->d_inode; xrcd = find_xrcd(file->device, inode); if (!xrcd && !(cmd.oflags & O_CREAT)) { /* no file descriptor. Need CREATE flag */ @@ -795,8 +795,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, goto err_copy; } - if (f) - fput_light(f, fput_needed); + if (f.file) + fdput(f); mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); @@ -825,8 +825,8 @@ err: put_uobj_write(&obj->uobject); err_tree_mutex_unlock: - if (f) - fput_light(f, fput_needed); + if (f.file) + fdput(f); mutex_unlock(&file->device->xrcd_tree_mutex); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index acf75c2cf7ef..6f2ce6fa98f8 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -541,17 +541,15 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) { struct ib_uverbs_event_file *ev_file = NULL; - struct file *filp; - int fput_needed; + struct fd f = fdget(fd); - filp = fget_light(fd, &fput_needed); - if (!filp) + if (!f.file) return NULL; - if (filp->f_op != &uverbs_event_fops) + if (f.file->f_op != &uverbs_event_fops) goto out; - ev_file = filp->private_data; + ev_file = f.file->private_data; if (ev_file->is_async) { ev_file = NULL; goto out; @@ -560,7 +558,7 @@ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) kref_get(&ev_file->ref); out: - fput_light(filp, fput_needed); + fdput(f); return ev_file; } diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 91bcd97d3061..56097c6d072d 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1014,25 +1014,25 @@ static void vfio_group_try_dissolve_container(struct vfio_group *group) static int vfio_group_set_container(struct vfio_group *group, int container_fd) { - struct file *filep; + struct fd f; struct vfio_container *container; struct vfio_iommu_driver *driver; - int ret = 0, fput_needed; + int ret = 0; if (atomic_read(&group->container_users)) return -EINVAL; - filep = fget_light(container_fd, &fput_needed); - if (!filep) + f = fdget(container_fd); + if (!f.file) return -EBADF; /* Sanity check, is this really our fd? */ - if (filep->f_op != &vfio_fops) { - fput_light(filep, fput_needed); + if (f.file->f_op != &vfio_fops) { + fdput(f); return -EINVAL; } - container = filep->private_data; + container = f.file->private_data; WARN_ON(!container); /* fget ensures we don't race vfio_release */ mutex_lock(&container->group_lock); @@ -1054,8 +1054,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) unlock_out: mutex_unlock(&container->group_lock); - fput_light(filep, fput_needed); - + fdput(f); return ret; } diff --git a/drivers/video/msm/mdp.c b/drivers/video/msm/mdp.c index cb2ddf164c98..07c9d8ab2c3b 100644 --- a/drivers/video/msm/mdp.c +++ b/drivers/video/msm/mdp.c @@ -257,19 +257,17 @@ int get_img(struct mdp_img *img, struct fb_info *info, unsigned long *start, unsigned long *len, struct file **filep) { - int put_needed, ret = 0; - struct file *file; - - file = fget_light(img->memory_id, &put_needed); - if (file == NULL) + int ret = 0; + struct fd f = fdget(img->memory_id); + if (f.file == NULL) return -1; - if (MAJOR(file->f_dentry->d_inode->i_rdev) == FB_MAJOR) { + if (MAJOR(f.file->f_dentry->d_inode->i_rdev) == FB_MAJOR) { *start = info->fix.smem_start; *len = info->fix.smem_len; } else ret = -1; - fput_light(file, put_needed); + fdput(f); return ret; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3494f2f44167..0a4f0c8bc58f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1397,7 +1397,6 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, u64 *transid, bool readonly, struct btrfs_qgroup_inherit **inherit) { - struct file *src_file; int namelen; int ret = 0; @@ -1421,15 +1420,14 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, ret = btrfs_mksubvol(&file->f_path, name, namelen, NULL, transid, readonly, inherit); } else { + struct fd src = fdget(fd); struct inode *src_inode; - int fput_needed; - src_file = fget_light(fd, &fput_needed); - if (!src_file) { + if (!src.file) { ret = -EINVAL; goto out_drop_write; } - src_inode = src_file->f_path.dentry->d_inode; + src_inode = src.file->f_path.dentry->d_inode; if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { printk(KERN_INFO "btrfs: Snapshot src from " "another FS\n"); @@ -1439,7 +1437,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, BTRFS_I(src_inode)->root, transid, readonly, inherit); } - fput_light(src_file, fput_needed); + fdput(src); } out_drop_write: mnt_drop_write_file(file); @@ -2341,7 +2339,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct file *src_file; + struct fd src_file; struct inode *src; struct btrfs_trans_handle *trans; struct btrfs_path *path; @@ -2350,7 +2348,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, struct btrfs_key key; u32 nritems; int slot; - int ret, fput_needed; + int ret; u64 len = olen; u64 bs = root->fs_info->sb->s_blocksize; u64 hint_byte; @@ -2376,24 +2374,24 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (ret) return ret; - src_file = fget_light(srcfd, &fput_needed); - if (!src_file) { + src_file = fdget(srcfd); + if (!src_file.file) { ret = -EBADF; goto out_drop_write; } ret = -EXDEV; - if (src_file->f_path.mnt != file->f_path.mnt) + if (src_file.file->f_path.mnt != file->f_path.mnt) goto out_fput; - src = src_file->f_dentry->d_inode; + src = src_file.file->f_dentry->d_inode; ret = -EINVAL; if (src == inode) goto out_fput; /* the src must be open for reading */ - if (!(src_file->f_mode & FMODE_READ)) + if (!(src_file.file->f_mode & FMODE_READ)) goto out_fput; /* don't make the dst file partly checksummed */ @@ -2724,7 +2722,7 @@ out_unlock: vfree(buf); btrfs_free_path(path); out_fput: - fput_light(src_file, fput_needed); + fdput(src_file); out_drop_write: mnt_drop_write_file(file); return ret; diff --git a/fs/coda/inode.c b/fs/coda/inode.c index bd2313d106e5..d315c6c5891a 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -107,9 +107,9 @@ static const struct super_operations coda_super_operations = static int get_device_index(struct coda_mount_data *data) { - struct file *file; + struct fd f; struct inode *inode; - int idx, fput_needed; + int idx; if (data == NULL) { printk("coda_read_super: Bad mount data\n"); @@ -121,17 +121,17 @@ static int get_device_index(struct coda_mount_data *data) return -1; } - file = fget_light(data->fd, &fput_needed); - if (!file) + f = fdget(data->fd); + if (!f.file) goto Ebadf; - inode = file->f_path.dentry->d_inode; + inode = f.file->f_path.dentry->d_inode; if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) { - fput_light(file, fput_needed); + fdput(f); goto Ebadf; } idx = iminor(inode); - fput_light(file, fput_needed); + fdput(f); if (idx < 0 || idx >= MAX_CODADEVS) { printk("coda_read_super: Bad minor number\n"); diff --git a/fs/compat.c b/fs/compat.c index 1bdb350ea5d3..d72d51e19025 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -870,22 +870,20 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, struct compat_old_linux_dirent __user *dirent, unsigned int count) { int error; - struct file *file; - int fput_needed; + struct fd f = fdget(fd); struct compat_readdir_callback buf; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) return -EBADF; buf.result = 0; buf.dirent = dirent; - error = vfs_readdir(file, compat_fillonedir, &buf); + error = vfs_readdir(f.file, compat_fillonedir, &buf); if (buf.result) error = buf.result; - fput_light(file, fput_needed); + fdput(f); return error; } @@ -949,17 +947,16 @@ efault: asmlinkage long compat_sys_getdents(unsigned int fd, struct compat_linux_dirent __user *dirent, unsigned int count) { - struct file * file; + struct fd f; struct compat_linux_dirent __user * lastdirent; struct compat_getdents_callback buf; - int fput_needed; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) return -EFAULT; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return -EBADF; buf.current_dir = dirent; @@ -967,17 +964,17 @@ asmlinkage long compat_sys_getdents(unsigned int fd, buf.count = count; buf.error = 0; - error = vfs_readdir(file, compat_filldir, &buf); + error = vfs_readdir(f.file, compat_filldir, &buf); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { - if (put_user(file->f_pos, &lastdirent->d_off)) + if (put_user(f.file->f_pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } - fput_light(file, fput_needed); + fdput(f); return error; } @@ -1035,17 +1032,16 @@ efault: asmlinkage long compat_sys_getdents64(unsigned int fd, struct linux_dirent64 __user * dirent, unsigned int count) { - struct file * file; + struct fd f; struct linux_dirent64 __user * lastdirent; struct compat_getdents_callback64 buf; - int fput_needed; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) return -EFAULT; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return -EBADF; buf.current_dir = dirent; @@ -1053,18 +1049,18 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, buf.count = count; buf.error = 0; - error = vfs_readdir(file, compat_filldir64, &buf); + error = vfs_readdir(f.file, compat_filldir64, &buf); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { - typeof(lastdirent->d_off) d_off = file->f_pos; + typeof(lastdirent->d_off) d_off = f.file->f_pos; if (__put_user_unaligned(d_off, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } - fput_light(file, fput_needed); + fdput(f); return error; } #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ @@ -1152,18 +1148,16 @@ asmlinkage ssize_t compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen) { - struct file *file; - int fput_needed; + struct fd f = fdget(fd); ssize_t ret; loff_t pos; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) return -EBADF; - pos = file->f_pos; - ret = compat_readv(file, vec, vlen, &pos); - file->f_pos = pos; - fput_light(file, fput_needed); + pos = f.file->f_pos; + ret = compat_readv(f.file, vec, vlen, &pos); + f.file->f_pos = pos; + fdput(f); return ret; } @@ -1171,19 +1165,18 @@ asmlinkage ssize_t compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen, loff_t pos) { - struct file *file; - int fput_needed; + struct fd f; ssize_t ret; if (pos < 0) return -EINVAL; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return -EBADF; ret = -ESPIPE; - if (file->f_mode & FMODE_PREAD) - ret = compat_readv(file, vec, vlen, &pos); - fput_light(file, fput_needed); + if (f.file->f_mode & FMODE_PREAD) + ret = compat_readv(f.file, vec, vlen, &pos); + fdput(f); return ret; } @@ -1221,18 +1214,16 @@ asmlinkage ssize_t compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen) { - struct file *file; - int fput_needed; + struct fd f = fdget(fd); ssize_t ret; loff_t pos; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) return -EBADF; - pos = file->f_pos; - ret = compat_writev(file, vec, vlen, &pos); - file->f_pos = pos; - fput_light(file, fput_needed); + pos = f.file->f_pos; + ret = compat_writev(f.file, vec, vlen, &pos); + f.file->f_pos = pos; + fdput(f); return ret; } @@ -1240,19 +1231,18 @@ asmlinkage ssize_t compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen, loff_t pos) { - struct file *file; - int fput_needed; + struct fd f; ssize_t ret; if (pos < 0) return -EINVAL; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return -EBADF; ret = -ESPIPE; - if (file->f_mode & FMODE_PWRITE) - ret = compat_writev(file, vec, vlen, &pos); - fput_light(file, fput_needed); + if (f.file->f_mode & FMODE_PWRITE) + ret = compat_writev(f.file, vec, vlen, &pos); + fdput(f); return ret; } diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index debdfe0fc809..48f1987bec34 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1531,16 +1531,13 @@ static int compat_ioctl_check_table(unsigned int xcmd) asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) { - struct file *filp; + struct fd f = fdget(fd); int error = -EBADF; - int fput_needed; - - filp = fget_light(fd, &fput_needed); - if (!filp) + if (!f.file) goto out; /* RED-PEN how should LSM module know it's handling 32bit? */ - error = security_file_ioctl(filp, cmd, arg); + error = security_file_ioctl(f.file, cmd, arg); if (error) goto out_fput; @@ -1560,30 +1557,30 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) case FS_IOC_RESVSP_32: case FS_IOC_RESVSP64_32: - error = compat_ioctl_preallocate(filp, compat_ptr(arg)); + error = compat_ioctl_preallocate(f.file, compat_ptr(arg)); goto out_fput; #else case FS_IOC_RESVSP: case FS_IOC_RESVSP64: - error = ioctl_preallocate(filp, compat_ptr(arg)); + error = ioctl_preallocate(f.file, compat_ptr(arg)); goto out_fput; #endif case FIBMAP: case FIGETBSZ: case FIONREAD: - if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode)) break; /*FALL THROUGH*/ default: - if (filp->f_op && filp->f_op->compat_ioctl) { - error = filp->f_op->compat_ioctl(filp, cmd, arg); + if (f.file->f_op && f.file->f_op->compat_ioctl) { + error = f.file->f_op->compat_ioctl(f.file, cmd, arg); if (error != -ENOIOCTLCMD) goto out_fput; } - if (!filp->f_op || !filp->f_op->unlocked_ioctl) + if (!f.file->f_op || !f.file->f_op->unlocked_ioctl) goto do_ioctl; break; } @@ -1591,7 +1588,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, if (compat_ioctl_check_table(XFORM(cmd))) goto found_handler; - error = do_ioctl_trans(fd, cmd, arg, filp); + error = do_ioctl_trans(fd, cmd, arg, f.file); if (error == -ENOIOCTLCMD) error = -ENOTTY; @@ -1600,9 +1597,9 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, found_handler: arg = (unsigned long)compat_ptr(arg); do_ioctl: - error = do_vfs_ioctl(filp, fd, cmd, arg); + error = do_vfs_ioctl(f.file, fd, cmd, arg); out_fput: - fput_light(filp, fput_needed); + fdput(f); out: return error; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 567ae72e155d..cd96649bfe62 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1809,8 +1809,8 @@ error_return: SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout) { - int error, fput_needed; - struct file *file; + int error; + struct fd f; struct eventpoll *ep; /* The maximum number of event must be greater than zero */ @@ -1818,38 +1818,33 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, return -EINVAL; /* Verify that the area passed by the user is writeable */ - if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) { - error = -EFAULT; - goto error_return; - } + if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) + return -EFAULT; /* Get the "struct file *" for the eventpoll file */ - error = -EBADF; - file = fget_light(epfd, &fput_needed); - if (!file) - goto error_return; + f = fdget(epfd); + if (!f.file) + return -EBADF; /* * We have to check that the file structure underneath the fd * the user passed to us _is_ an eventpoll file. */ error = -EINVAL; - if (!is_file_epoll(file)) + if (!is_file_epoll(f.file)) goto error_fput; /* * At this point it is safe to assume that the "private_data" contains * our own data structure. */ - ep = file->private_data; + ep = f.file->private_data; /* Time to fish for events ... */ error = ep_poll(ep, events, maxevents, timeout); error_fput: - fput_light(file, fput_needed); -error_return: - + fdput(f); return error; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 39646f224514..5439d6a56e99 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -233,8 +233,8 @@ group_extend_out: case EXT4_IOC_MOVE_EXT: { struct move_extent me; - struct file *donor_filp; - int err, fput_needed; + struct fd donor; + int err; if (!(filp->f_mode & FMODE_READ) || !(filp->f_mode & FMODE_WRITE)) @@ -245,11 +245,11 @@ group_extend_out: return -EFAULT; me.moved_len = 0; - donor_filp = fget_light(me.donor_fd, &fput_needed); - if (!donor_filp) + donor = fdget(me.donor_fd); + if (!donor.file) return -EBADF; - if (!(donor_filp->f_mode & FMODE_WRITE)) { + if (!(donor.file->f_mode & FMODE_WRITE)) { err = -EBADF; goto mext_out; } @@ -266,7 +266,7 @@ group_extend_out: if (err) goto mext_out; - err = ext4_move_extents(filp, donor_filp, me.orig_start, + err = ext4_move_extents(filp, donor.file, me.orig_start, me.donor_start, me.len, &me.moved_len); mnt_drop_write_file(filp); @@ -274,7 +274,7 @@ group_extend_out: &me, sizeof(me))) err = -EFAULT; mext_out: - fput_light(donor_filp, fput_needed); + fdput(donor); return err; } diff --git a/fs/fcntl.c b/fs/fcntl.c index 40a5bfb72cca..91af39a33af7 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -348,25 +348,23 @@ static int check_fcntl_cmd(unsigned cmd) SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { - struct file *filp; - int fput_needed; + struct fd f = fdget_raw(fd); long err = -EBADF; - filp = fget_raw_light(fd, &fput_needed); - if (!filp) + if (!f.file) goto out; - if (unlikely(filp->f_mode & FMODE_PATH)) { + if (unlikely(f.file->f_mode & FMODE_PATH)) { if (!check_fcntl_cmd(cmd)) goto out1; } - err = security_file_fcntl(filp, cmd, arg); + err = security_file_fcntl(f.file, cmd, arg); if (!err) - err = do_fcntl(fd, cmd, arg, filp); + err = do_fcntl(fd, cmd, arg, f.file); out1: - fput_light(filp, fput_needed); + fdput(f); out: return err; } @@ -375,38 +373,36 @@ out: SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { - struct file * filp; + struct fd f = fdget_raw(fd); long err = -EBADF; - int fput_needed; - filp = fget_raw_light(fd, &fput_needed); - if (!filp) + if (!f.file) goto out; - if (unlikely(filp->f_mode & FMODE_PATH)) { + if (unlikely(f.file->f_mode & FMODE_PATH)) { if (!check_fcntl_cmd(cmd)) goto out1; } - err = security_file_fcntl(filp, cmd, arg); + err = security_file_fcntl(f.file, cmd, arg); if (err) goto out1; switch (cmd) { case F_GETLK64: - err = fcntl_getlk64(filp, (struct flock64 __user *) arg); + err = fcntl_getlk64(f.file, (struct flock64 __user *) arg); break; case F_SETLK64: case F_SETLKW64: - err = fcntl_setlk64(fd, filp, cmd, + err = fcntl_setlk64(fd, f.file, cmd, (struct flock64 __user *) arg); break; default: - err = do_fcntl(fd, cmd, arg, filp); + err = do_fcntl(fd, cmd, arg, f.file); break; } out1: - fput_light(filp, fput_needed); + fdput(f); out: return err; } diff --git a/fs/fhandle.c b/fs/fhandle.c index a48e4a139be1..f775bfdd6e4a 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -113,24 +113,21 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, static struct vfsmount *get_vfsmount_from_fd(int fd) { - struct path path; + struct vfsmount *mnt; if (fd == AT_FDCWD) { struct fs_struct *fs = current->fs; spin_lock(&fs->lock); - path = fs->pwd; - mntget(path.mnt); + mnt = mntget(fs->pwd.mnt); spin_unlock(&fs->lock); } else { - int fput_needed; - struct file *file = fget_light(fd, &fput_needed); - if (!file) + struct fd f = fdget(fd); + if (!f.file) return ERR_PTR(-EBADF); - path = file->f_path; - mntget(path.mnt); - fput_light(file, fput_needed); + mnt = mntget(f.file->f_path.mnt); + fdput(f); } - return path.mnt; + return mnt; } static int vfs_dentry_acceptable(void *context, struct dentry *dentry) diff --git a/fs/ioctl.c b/fs/ioctl.c index 29167bebe874..3bdad6d1f268 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -603,21 +603,14 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { - struct file *filp; - int error = -EBADF; - int fput_needed; - - filp = fget_light(fd, &fput_needed); - if (!filp) - goto out; - - error = security_file_ioctl(filp, cmd, arg); - if (error) - goto out_fput; - - error = do_vfs_ioctl(filp, fd, cmd, arg); - out_fput: - fput_light(filp, fput_needed); - out: + int error; + struct fd f = fdget(fd); + + if (!f.file) + return -EBADF; + error = security_file_ioctl(f.file, cmd, arg); + if (!error) + error = do_vfs_ioctl(f.file, fd, cmd, arg); + fdput(f); return error; } diff --git a/fs/locks.c b/fs/locks.c index 7e81bfc75164..abc7dc6c490b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1625,15 +1625,13 @@ EXPORT_SYMBOL(flock_lock_file_wait); */ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) { - struct file *filp; - int fput_needed; + struct fd f = fdget(fd); struct file_lock *lock; int can_sleep, unlock; int error; error = -EBADF; - filp = fget_light(fd, &fput_needed); - if (!filp) + if (!f.file) goto out; can_sleep = !(cmd & LOCK_NB); @@ -1641,31 +1639,31 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) unlock = (cmd == LOCK_UN); if (!unlock && !(cmd & LOCK_MAND) && - !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) + !(f.file->f_mode & (FMODE_READ|FMODE_WRITE))) goto out_putf; - error = flock_make_lock(filp, &lock, cmd); + error = flock_make_lock(f.file, &lock, cmd); if (error) goto out_putf; if (can_sleep) lock->fl_flags |= FL_SLEEP; - error = security_file_lock(filp, lock->fl_type); + error = security_file_lock(f.file, lock->fl_type); if (error) goto out_free; - if (filp->f_op && filp->f_op->flock) - error = filp->f_op->flock(filp, + if (f.file->f_op && f.file->f_op->flock) + error = f.file->f_op->flock(f.file, (can_sleep) ? F_SETLKW : F_SETLK, lock); else - error = flock_lock_file_wait(filp, lock); + error = flock_lock_file_wait(f.file, lock); out_free: locks_free_lock(lock); out_putf: - fput_light(filp, fput_needed); + fdput(f); out: return error; } diff --git a/fs/namei.c b/fs/namei.c index c5b85b3523c0..e1c7072c7afa 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1797,8 +1797,6 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd, struct file **fp) { int retval = 0; - int fput_needed; - struct file *file; nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED; @@ -1850,44 +1848,41 @@ static int path_init(int dfd, const char *name, unsigned int flags, get_fs_pwd(current->fs, &nd->path); } } else { + struct fd f = fdget_raw(dfd); struct dentry *dentry; - file = fget_raw_light(dfd, &fput_needed); - retval = -EBADF; - if (!file) - goto out_fail; + if (!f.file) + return -EBADF; - dentry = file->f_path.dentry; + dentry = f.file->f_path.dentry; if (*name) { - retval = -ENOTDIR; - if (!S_ISDIR(dentry->d_inode->i_mode)) - goto fput_fail; + if (!S_ISDIR(dentry->d_inode->i_mode)) { + fdput(f); + return -ENOTDIR; + } retval = inode_permission(dentry->d_inode, MAY_EXEC); - if (retval) - goto fput_fail; + if (retval) { + fdput(f); + return retval; + } } - nd->path = file->f_path; + nd->path = f.file->f_path; if (flags & LOOKUP_RCU) { - if (fput_needed) - *fp = file; + if (f.need_put) + *fp = f.file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); lock_rcu_walk(); } else { - path_get(&file->f_path); - fput_light(file, fput_needed); + path_get(&nd->path); + fdput(f); } } nd->inode = nd->path.dentry->d_inode; return 0; - -fput_fail: - fput_light(file, fput_needed); -out_fail: - return retval; } static inline int lookup_last(struct nameidata *nd, struct path *path) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index ea48693940f1..721d692fa8d4 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -451,24 +451,22 @@ static int fanotify_find_path(int dfd, const char __user *filename, dfd, filename, flags); if (filename == NULL) { - struct file *file; - int fput_needed; + struct fd f = fdget(dfd); ret = -EBADF; - file = fget_light(dfd, &fput_needed); - if (!file) + if (!f.file) goto out; ret = -ENOTDIR; if ((flags & FAN_MARK_ONLYDIR) && - !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) { - fput_light(file, fput_needed); + !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) { + fdput(f); goto out; } - *path = file->f_path; + *path = f.file->f_path; path_get(path); - fput_light(file, fput_needed); + fdput(f); } else { unsigned int lookup_flags = 0; @@ -748,9 +746,9 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, struct inode *inode = NULL; struct vfsmount *mnt = NULL; struct fsnotify_group *group; - struct file *filp; + struct fd f; struct path path; - int ret, fput_needed; + int ret; pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", __func__, fanotify_fd, flags, dfd, pathname, mask); @@ -784,15 +782,15 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, #endif return -EINVAL; - filp = fget_light(fanotify_fd, &fput_needed); - if (unlikely(!filp)) + f = fdget(fanotify_fd); + if (unlikely(!f.file)) return -EBADF; /* verify that this is indeed an fanotify instance */ ret = -EINVAL; - if (unlikely(filp->f_op != &fanotify_fops)) + if (unlikely(f.file->f_op != &fanotify_fops)) goto fput_and_out; - group = filp->private_data; + group = f.file->private_data; /* * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not @@ -839,7 +837,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, path_put(&path); fput_and_out: - fput_light(filp, fput_needed); + fdput(f); return ret; } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 8445fbc8985c..c311dda054a3 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -757,16 +757,16 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, struct fsnotify_group *group; struct inode *inode; struct path path; - struct file *filp; - int ret, fput_needed; + struct fd f; + int ret; unsigned flags = 0; - filp = fget_light(fd, &fput_needed); - if (unlikely(!filp)) + f = fdget(fd); + if (unlikely(!f.file)) return -EBADF; /* verify that this is indeed an inotify instance */ - if (unlikely(filp->f_op != &inotify_fops)) { + if (unlikely(f.file->f_op != &inotify_fops)) { ret = -EINVAL; goto fput_and_out; } @@ -782,13 +782,13 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, /* inode held in place by reference to path; group by fget on fd */ inode = path.dentry->d_inode; - group = filp->private_data; + group = f.file->private_data; /* create/update an inode mark */ ret = inotify_update_watch(group, inode, mask); path_put(&path); fput_and_out: - fput_light(filp, fput_needed); + fdput(f); return ret; } @@ -796,19 +796,19 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) { struct fsnotify_group *group; struct inotify_inode_mark *i_mark; - struct file *filp; - int ret = 0, fput_needed; + struct fd f; + int ret = 0; - filp = fget_light(fd, &fput_needed); - if (unlikely(!filp)) + f = fdget(fd); + if (unlikely(!f.file)) return -EBADF; /* verify that this is indeed an inotify instance */ ret = -EINVAL; - if (unlikely(filp->f_op != &inotify_fops)) + if (unlikely(f.file->f_op != &inotify_fops)) goto out; - group = filp->private_data; + group = f.file->private_data; ret = -EINVAL; i_mark = inotify_idr_find(group, wd); @@ -823,7 +823,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) fsnotify_put_mark(&i_mark->fsn_mark); out: - fput_light(filp, fput_needed); + fdput(f); return ret; } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 61c28ae266f5..f7c648d7d6bf 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1746,11 +1746,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, long fd; int sectsize; char *p = (char *)page; - struct file *filp = NULL; - struct inode *inode = NULL; + struct fd f; + struct inode *inode; ssize_t ret = -EINVAL; int live_threshold; - int fput_needed; if (reg->hr_bdev) goto out; @@ -1767,26 +1766,26 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, if (fd < 0 || fd >= INT_MAX) goto out; - filp = fget_light(fd, &fput_needed); - if (filp == NULL) + f = fdget(fd); + if (f.file == NULL) goto out; if (reg->hr_blocks == 0 || reg->hr_start_block == 0 || reg->hr_block_bytes == 0) - goto out; + goto out2; - inode = igrab(filp->f_mapping->host); + inode = igrab(f.file->f_mapping->host); if (inode == NULL) - goto out; + goto out2; if (!S_ISBLK(inode->i_mode)) - goto out; + goto out3; - reg->hr_bdev = I_BDEV(filp->f_mapping->host); + reg->hr_bdev = I_BDEV(f.file->f_mapping->host); ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); if (ret) { reg->hr_bdev = NULL; - goto out; + goto out3; } inode = NULL; @@ -1798,7 +1797,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, "blocksize %u incorrect for device, expected %d", reg->hr_block_bytes, sectsize); ret = -EINVAL; - goto out; + goto out3; } o2hb_init_region_params(reg); @@ -1812,13 +1811,13 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, ret = o2hb_map_slot_data(reg); if (ret) { mlog_errno(ret); - goto out; + goto out3; } ret = o2hb_populate_slot_data(reg); if (ret) { mlog_errno(ret); - goto out; + goto out3; } INIT_DELAYED_WORK(®->hr_write_timeout_work, o2hb_write_timeout); @@ -1848,7 +1847,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, if (IS_ERR(hb_task)) { ret = PTR_ERR(hb_task); mlog_errno(ret); - goto out; + goto out3; } spin_lock(&o2hb_live_lock); @@ -1864,7 +1863,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, if (reg->hr_aborted_start) { ret = -EIO; - goto out; + goto out3; } /* Ok, we were woken. Make sure it wasn't by drop_item() */ @@ -1883,11 +1882,11 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", config_item_name(®->hr_item), reg->hr_dev_name); +out3: + iput(inode); +out2: + fdput(f); out: - if (filp) - fput_light(filp, fput_needed); - if (inode) - iput(inode); if (ret < 0) { if (reg->hr_bdev) { blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); diff --git a/fs/open.c b/fs/open.c index 3c741eae6b99..85603262d8db 100644 --- a/fs/open.c +++ b/fs/open.c @@ -134,25 +134,25 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { struct inode *inode; struct dentry *dentry; - struct file *file; - int error, fput_needed; + struct fd f; + int error; error = -EINVAL; if (length < 0) goto out; error = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) goto out; /* explicitly opened as large or we are on 64-bit box */ - if (file->f_flags & O_LARGEFILE) + if (f.file->f_flags & O_LARGEFILE) small = 0; - dentry = file->f_path.dentry; + dentry = f.file->f_path.dentry; inode = dentry->d_inode; error = -EINVAL; - if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) + if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) goto out_putf; error = -EINVAL; @@ -165,14 +165,14 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) goto out_putf; sb_start_write(inode->i_sb); - error = locks_verify_truncate(inode, file, length); + error = locks_verify_truncate(inode, f.file, length); if (!error) - error = security_path_truncate(&file->f_path); + error = security_path_truncate(&f.file->f_path); if (!error) - error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); + error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); sb_end_write(inode->i_sb); out_putf: - fput_light(file, fput_needed); + fdput(f); out: return error; } @@ -276,15 +276,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) { - struct file *file; - int error = -EBADF, fput_needed; + struct fd f = fdget(fd); + int error = -EBADF; - file = fget_light(fd, &fput_needed); - if (file) { - error = do_fallocate(file, mode, offset, len); - fput_light(file, fput_needed); + if (f.file) { + error = do_fallocate(f.file, mode, offset, len); + fdput(f); } - return error; } @@ -400,16 +398,15 @@ out: SYSCALL_DEFINE1(fchdir, unsigned int, fd) { - struct file *file; + struct fd f = fdget_raw(fd); struct inode *inode; - int error, fput_needed; + int error = -EBADF; error = -EBADF; - file = fget_raw_light(fd, &fput_needed); - if (!file) + if (!f.file) goto out; - inode = file->f_path.dentry->d_inode; + inode = f.file->f_path.dentry->d_inode; error = -ENOTDIR; if (!S_ISDIR(inode->i_mode)) @@ -417,9 +414,9 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); if (!error) - set_fs_pwd(current->fs, &file->f_path); + set_fs_pwd(current->fs, &f.file->f_path); out_putf: - fput_light(file, fput_needed); + fdput(f); out: return error; } @@ -582,21 +579,20 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) { - struct file *file; - int error = -EBADF, fput_needed; + struct fd f = fdget(fd); + int error = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) goto out; - error = mnt_want_write_file(file); + error = mnt_want_write_file(f.file); if (error) goto out_fput; - audit_inode(NULL, file->f_path.dentry); - error = chown_common(&file->f_path, user, group); - mnt_drop_write_file(file); + audit_inode(NULL, f.file->f_path.dentry); + error = chown_common(&f.file->f_path, user, group); + mnt_drop_write_file(f.file); out_fput: - fput_light(file, fput_needed); + fdput(f); out: return error; } diff --git a/fs/read_write.c b/fs/read_write.c index 1adfb691e4f1..28b38279a219 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -232,23 +232,18 @@ EXPORT_SYMBOL(vfs_llseek); SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) { off_t retval; - struct file * file; - int fput_needed; - - retval = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) - goto bad; + struct fd f = fdget(fd); + if (!f.file) + return -EBADF; retval = -EINVAL; if (origin <= SEEK_MAX) { - loff_t res = vfs_llseek(file, offset, origin); + loff_t res = vfs_llseek(f.file, offset, origin); retval = res; if (res != (loff_t)retval) retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ } - fput_light(file, fput_needed); -bad: + fdput(f); return retval; } @@ -258,20 +253,17 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, unsigned int, origin) { int retval; - struct file * file; + struct fd f = fdget(fd); loff_t offset; - int fput_needed; - retval = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) - goto bad; + if (!f.file) + return -EBADF; retval = -EINVAL; if (origin > SEEK_MAX) goto out_putf; - offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, + offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, origin); retval = (int)offset; @@ -281,8 +273,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, retval = 0; } out_putf: - fput_light(file, fput_needed); -bad: + fdput(f); return retval; } #endif @@ -461,34 +452,29 @@ static inline void file_pos_write(struct file *file, loff_t pos) SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) { - struct file *file; + struct fd f = fdget(fd); ssize_t ret = -EBADF; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (file) { - loff_t pos = file_pos_read(file); - ret = vfs_read(file, buf, count, &pos); - file_pos_write(file, pos); - fput_light(file, fput_needed); + if (f.file) { + loff_t pos = file_pos_read(f.file); + ret = vfs_read(f.file, buf, count, &pos); + file_pos_write(f.file, pos); + fdput(f); } - return ret; } SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, size_t, count) { - struct file *file; + struct fd f = fdget(fd); ssize_t ret = -EBADF; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (file) { - loff_t pos = file_pos_read(file); - ret = vfs_write(file, buf, count, &pos); - file_pos_write(file, pos); - fput_light(file, fput_needed); + if (f.file) { + loff_t pos = file_pos_read(f.file); + ret = vfs_write(f.file, buf, count, &pos); + file_pos_write(f.file, pos); + fdput(f); } return ret; @@ -497,19 +483,18 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, size_t count, loff_t pos) { - struct file *file; + struct fd f; ssize_t ret = -EBADF; - int fput_needed; if (pos < 0) return -EINVAL; - file = fget_light(fd, &fput_needed); - if (file) { + f = fdget(fd); + if (f.file) { ret = -ESPIPE; - if (file->f_mode & FMODE_PREAD) - ret = vfs_read(file, buf, count, &pos); - fput_light(file, fput_needed); + if (f.file->f_mode & FMODE_PREAD) + ret = vfs_read(f.file, buf, count, &pos); + fdput(f); } return ret; @@ -526,19 +511,18 @@ SYSCALL_ALIAS(sys_pread64, SyS_pread64); SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, size_t count, loff_t pos) { - struct file *file; + struct fd f; ssize_t ret = -EBADF; - int fput_needed; if (pos < 0) return -EINVAL; - file = fget_light(fd, &fput_needed); - if (file) { + f = fdget(fd); + if (f.file) { ret = -ESPIPE; - if (file->f_mode & FMODE_PWRITE) - ret = vfs_write(file, buf, count, &pos); - fput_light(file, fput_needed); + if (f.file->f_mode & FMODE_PWRITE) + ret = vfs_write(f.file, buf, count, &pos); + fdput(f); } return ret; @@ -789,16 +773,14 @@ EXPORT_SYMBOL(vfs_writev); SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen) { - struct file *file; + struct fd f = fdget(fd); ssize_t ret = -EBADF; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (file) { - loff_t pos = file_pos_read(file); - ret = vfs_readv(file, vec, vlen, &pos); - file_pos_write(file, pos); - fput_light(file, fput_needed); + if (f.file) { + loff_t pos = file_pos_read(f.file); + ret = vfs_readv(f.file, vec, vlen, &pos); + file_pos_write(f.file, pos); + fdput(f); } if (ret > 0) @@ -810,16 +792,14 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen) { - struct file *file; + struct fd f = fdget(fd); ssize_t ret = -EBADF; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (file) { - loff_t pos = file_pos_read(file); - ret = vfs_writev(file, vec, vlen, &pos); - file_pos_write(file, pos); - fput_light(file, fput_needed); + if (f.file) { + loff_t pos = file_pos_read(f.file); + ret = vfs_writev(f.file, vec, vlen, &pos); + file_pos_write(f.file, pos); + fdput(f); } if (ret > 0) @@ -838,19 +818,18 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) { loff_t pos = pos_from_hilo(pos_h, pos_l); - struct file *file; + struct fd f; ssize_t ret = -EBADF; - int fput_needed; if (pos < 0) return -EINVAL; - file = fget_light(fd, &fput_needed); - if (file) { + f = fdget(fd); + if (f.file) { ret = -ESPIPE; - if (file->f_mode & FMODE_PREAD) - ret = vfs_readv(file, vec, vlen, &pos); - fput_light(file, fput_needed); + if (f.file->f_mode & FMODE_PREAD) + ret = vfs_readv(f.file, vec, vlen, &pos); + fdput(f); } if (ret > 0) @@ -863,19 +842,18 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) { loff_t pos = pos_from_hilo(pos_h, pos_l); - struct file *file; + struct fd f; ssize_t ret = -EBADF; - int fput_needed; if (pos < 0) return -EINVAL; - file = fget_light(fd, &fput_needed); - if (file) { + f = fdget(fd); + if (f.file) { ret = -ESPIPE; - if (file->f_mode & FMODE_PWRITE) - ret = vfs_writev(file, vec, vlen, &pos); - fput_light(file, fput_needed); + if (f.file->f_mode & FMODE_PWRITE) + ret = vfs_writev(f.file, vec, vlen, &pos); + fdput(f); } if (ret > 0) @@ -887,28 +865,28 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, loff_t max) { - struct file * in_file, * out_file; - struct inode * in_inode, * out_inode; + struct fd in, out; + struct inode *in_inode, *out_inode; loff_t pos; ssize_t retval; - int fput_needed_in, fput_needed_out, fl; + int fl; /* * Get input file, and verify that it is ok.. */ retval = -EBADF; - in_file = fget_light(in_fd, &fput_needed_in); - if (!in_file) + in = fdget(in_fd); + if (!in.file) goto out; - if (!(in_file->f_mode & FMODE_READ)) + if (!(in.file->f_mode & FMODE_READ)) goto fput_in; retval = -ESPIPE; if (!ppos) - ppos = &in_file->f_pos; + ppos = &in.file->f_pos; else - if (!(in_file->f_mode & FMODE_PREAD)) + if (!(in.file->f_mode & FMODE_PREAD)) goto fput_in; - retval = rw_verify_area(READ, in_file, ppos, count); + retval = rw_verify_area(READ, in.file, ppos, count); if (retval < 0) goto fput_in; count = retval; @@ -917,15 +895,15 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, * Get output file, and verify that it is ok.. */ retval = -EBADF; - out_file = fget_light(out_fd, &fput_needed_out); - if (!out_file) + out = fdget(out_fd); + if (!out.file) goto fput_in; - if (!(out_file->f_mode & FMODE_WRITE)) + if (!(out.file->f_mode & FMODE_WRITE)) goto fput_out; retval = -EINVAL; - in_inode = in_file->f_path.dentry->d_inode; - out_inode = out_file->f_path.dentry->d_inode; - retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); + in_inode = in.file->f_path.dentry->d_inode; + out_inode = out.file->f_path.dentry->d_inode; + retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count); if (retval < 0) goto fput_out; count = retval; @@ -949,10 +927,10 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, * and the application is arguably buggy if it doesn't expect * EAGAIN on a non-blocking file descriptor. */ - if (in_file->f_flags & O_NONBLOCK) + if (in.file->f_flags & O_NONBLOCK) fl = SPLICE_F_NONBLOCK; #endif - retval = do_splice_direct(in_file, ppos, out_file, count, fl); + retval = do_splice_direct(in.file, ppos, out.file, count, fl); if (retval > 0) { add_rchar(current, retval); @@ -965,9 +943,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, retval = -EOVERFLOW; fput_out: - fput_light(out_file, fput_needed_out); + fdput(out); fput_in: - fput_light(in_file, fput_needed_in); + fdput(in); out: return retval; } diff --git a/fs/readdir.c b/fs/readdir.c index 39e3370d79cf..5e69ef533b77 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -106,22 +106,20 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, struct old_linux_dirent __user *, dirent, unsigned int, count) { int error; - struct file * file; + struct fd f = fdget(fd); struct readdir_callback buf; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) return -EBADF; buf.result = 0; buf.dirent = dirent; - error = vfs_readdir(file, fillonedir, &buf); + error = vfs_readdir(f.file, fillonedir, &buf); if (buf.result) error = buf.result; - fput_light(file, fput_needed); + fdput(f); return error; } @@ -191,17 +189,16 @@ efault: SYSCALL_DEFINE3(getdents, unsigned int, fd, struct linux_dirent __user *, dirent, unsigned int, count) { - struct file * file; + struct fd f; struct linux_dirent __user * lastdirent; struct getdents_callback buf; - int fput_needed; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) return -EFAULT; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return -EBADF; buf.current_dir = dirent; @@ -209,17 +206,17 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, buf.count = count; buf.error = 0; - error = vfs_readdir(file, filldir, &buf); + error = vfs_readdir(f.file, filldir, &buf); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { - if (put_user(file->f_pos, &lastdirent->d_off)) + if (put_user(f.file->f_pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } - fput_light(file, fput_needed); + fdput(f); return error; } @@ -272,17 +269,16 @@ efault: SYSCALL_DEFINE3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count) { - struct file * file; + struct fd f; struct linux_dirent64 __user * lastdirent; struct getdents_callback64 buf; - int fput_needed; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) return -EFAULT; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return -EBADF; buf.current_dir = dirent; @@ -290,17 +286,17 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, buf.count = count; buf.error = 0; - error = vfs_readdir(file, filldir64, &buf); + error = vfs_readdir(f.file, filldir64, &buf); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { - typeof(lastdirent->d_off) d_off = file->f_pos; + typeof(lastdirent->d_off) d_off = f.file->f_pos; if (__put_user(d_off, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } - fput_light(file, fput_needed); + fdput(f); return error; } diff --git a/fs/select.c b/fs/select.c index ffdd16d6e691..2ef72d965036 100644 --- a/fs/select.c +++ b/fs/select.c @@ -428,8 +428,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) for (i = 0; i < n; ++rinp, ++routp, ++rexp) { unsigned long in, out, ex, all_bits, bit = 1, mask, j; unsigned long res_in = 0, res_out = 0, res_ex = 0; - const struct file_operations *f_op = NULL; - struct file *file = NULL; in = *inp++; out = *outp++; ex = *exp++; all_bits = in | out | ex; @@ -439,20 +437,21 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) } for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { - int fput_needed; + struct fd f; if (i >= n) break; if (!(bit & all_bits)) continue; - file = fget_light(i, &fput_needed); - if (file) { - f_op = file->f_op; + f = fdget(i); + if (f.file) { + const struct file_operations *f_op; + f_op = f.file->f_op; mask = DEFAULT_POLLMASK; if (f_op && f_op->poll) { wait_key_set(wait, in, out, bit); - mask = (*f_op->poll)(file, wait); + mask = (*f_op->poll)(f.file, wait); } - fput_light(file, fput_needed); + fdput(f); if ((mask & POLLIN_SET) && (in & bit)) { res_in |= bit; retval++; @@ -725,20 +724,17 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) mask = 0; fd = pollfd->fd; if (fd >= 0) { - int fput_needed; - struct file * file; - - file = fget_light(fd, &fput_needed); + struct fd f = fdget(fd); mask = POLLNVAL; - if (file != NULL) { + if (f.file) { mask = DEFAULT_POLLMASK; - if (file->f_op && file->f_op->poll) { + if (f.file->f_op && f.file->f_op->poll) { pwait->_key = pollfd->events|POLLERR|POLLHUP; - mask = file->f_op->poll(file, pwait); + mask = f.file->f_op->poll(f.file, pwait); } /* Mask out unneeded events. */ mask &= pollfd->events | POLLERR | POLLHUP; - fput_light(file, fput_needed); + fdput(f); } } pollfd->revents = mask; diff --git a/fs/signalfd.c b/fs/signalfd.c index 9f35a37173de..8bee4e570911 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -269,13 +269,12 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, if (ufd < 0) kfree(ctx); } else { - int fput_needed; - struct file *file = fget_light(ufd, &fput_needed); - if (!file) + struct fd f = fdget(ufd); + if (!f.file) return -EBADF; - ctx = file->private_data; - if (file->f_op != &signalfd_fops) { - fput_light(file, fput_needed); + ctx = f.file->private_data; + if (f.file->f_op != &signalfd_fops) { + fdput(f); return -EINVAL; } spin_lock_irq(¤t->sighand->siglock); @@ -283,7 +282,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, spin_unlock_irq(¤t->sighand->siglock); wake_up(¤t->sighand->signalfd_wqh); - fput_light(file, fput_needed); + fdput(f); } return ufd; diff --git a/fs/splice.c b/fs/splice.c index 41514dd89462..13e5b4776e7a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1666,9 +1666,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, unsigned long, nr_segs, unsigned int, flags) { - struct file *file; + struct fd f; long error; - int fput; if (unlikely(nr_segs > UIO_MAXIOV)) return -EINVAL; @@ -1676,14 +1675,14 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, return 0; error = -EBADF; - file = fget_light(fd, &fput); - if (file) { - if (file->f_mode & FMODE_WRITE) - error = vmsplice_to_pipe(file, iov, nr_segs, flags); - else if (file->f_mode & FMODE_READ) - error = vmsplice_to_user(file, iov, nr_segs, flags); - - fput_light(file, fput); + f = fdget(fd); + if (f.file) { + if (f.file->f_mode & FMODE_WRITE) + error = vmsplice_to_pipe(f.file, iov, nr_segs, flags); + else if (f.file->f_mode & FMODE_READ) + error = vmsplice_to_user(f.file, iov, nr_segs, flags); + + fdput(f); } return error; @@ -1693,30 +1692,27 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags) { + struct fd in, out; long error; - struct file *in, *out; - int fput_in, fput_out; if (unlikely(!len)) return 0; error = -EBADF; - in = fget_light(fd_in, &fput_in); - if (in) { - if (in->f_mode & FMODE_READ) { - out = fget_light(fd_out, &fput_out); - if (out) { - if (out->f_mode & FMODE_WRITE) - error = do_splice(in, off_in, - out, off_out, + in = fdget(fd_in); + if (in.file) { + if (in.file->f_mode & FMODE_READ) { + out = fdget(fd_out); + if (out.file) { + if (out.file->f_mode & FMODE_WRITE) + error = do_splice(in.file, off_in, + out.file, off_out, len, flags); - fput_light(out, fput_out); + fdput(out); } } - - fput_light(in, fput_in); + fdput(in); } - return error; } @@ -2027,26 +2023,25 @@ static long do_tee(struct file *in, struct file *out, size_t len, SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) { - struct file *in; - int error, fput_in; + struct fd in; + int error; if (unlikely(!len)) return 0; error = -EBADF; - in = fget_light(fdin, &fput_in); - if (in) { - if (in->f_mode & FMODE_READ) { - int fput_out; - struct file *out = fget_light(fdout, &fput_out); - - if (out) { - if (out->f_mode & FMODE_WRITE) - error = do_tee(in, out, len, flags); - fput_light(out, fput_out); + in = fdget(fdin); + if (in.file) { + if (in.file->f_mode & FMODE_READ) { + struct fd out = fdget(fdout); + if (out.file) { + if (out.file->f_mode & FMODE_WRITE) + error = do_tee(in.file, out.file, + len, flags); + fdput(out); } } - fput_light(in, fput_in); + fdput(in); } return error; diff --git a/fs/stat.c b/fs/stat.c index 40780229a032..ee18fa122ae0 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr); int vfs_fstat(unsigned int fd, struct kstat *stat) { - int fput_needed; - struct file *f = fget_raw_light(fd, &fput_needed); + struct fd f = fdget_raw(fd); int error = -EBADF; - if (f) { - error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); - fput_light(f, fput_needed); + if (f.file) { + error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry, + stat); + fdput(f); } return error; } diff --git a/fs/statfs.c b/fs/statfs.c index 95ad5c0e586c..f8e832e6f0a2 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -87,12 +87,11 @@ int user_statfs(const char __user *pathname, struct kstatfs *st) int fd_statfs(int fd, struct kstatfs *st) { - int fput_needed; - struct file *file = fget_light(fd, &fput_needed); + struct fd f = fdget(fd); int error = -EBADF; - if (file) { - error = vfs_statfs(&file->f_path, st); - fput_light(file, fput_needed); + if (f.file) { + error = vfs_statfs(&f.file->f_path, st); + fdput(f); } return error; } diff --git a/fs/sync.c b/fs/sync.c index eb8722dc556f..14eefeb44636 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -148,21 +148,19 @@ void emergency_sync(void) */ SYSCALL_DEFINE1(syncfs, int, fd) { - struct file *file; + struct fd f = fdget(fd); struct super_block *sb; int ret; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) return -EBADF; - sb = file->f_dentry->d_sb; + sb = f.file->f_dentry->d_sb; down_read(&sb->s_umount); ret = sync_filesystem(sb); up_read(&sb->s_umount); - fput_light(file, fput_needed); + fdput(f); return ret; } @@ -201,14 +199,12 @@ EXPORT_SYMBOL(vfs_fsync); static int do_fsync(unsigned int fd, int datasync) { - struct file *file; + struct fd f = fdget(fd); int ret = -EBADF; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (file) { - ret = vfs_fsync(file, datasync); - fput_light(file, fput_needed); + if (f.file) { + ret = vfs_fsync(f.file, datasync); + fdput(f); } return ret; } @@ -291,10 +287,9 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, unsigned int flags) { int ret; - struct file *file; + struct fd f; struct address_space *mapping; loff_t endbyte; /* inclusive */ - int fput_needed; umode_t i_mode; ret = -EINVAL; @@ -333,17 +328,17 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, endbyte--; /* inclusive */ ret = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) goto out; - i_mode = file->f_path.dentry->d_inode->i_mode; + i_mode = f.file->f_path.dentry->d_inode->i_mode; ret = -ESPIPE; if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && !S_ISLNK(i_mode)) goto out_put; - mapping = file->f_mapping; + mapping = f.file->f_mapping; if (!mapping) { ret = -EINVAL; goto out_put; @@ -366,7 +361,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, ret = filemap_fdatawait_range(mapping, offset, endbyte); out_put: - fput_light(file, fput_needed); + fdput(f); out: return ret; } diff --git a/fs/timerfd.c b/fs/timerfd.c index dd91e9420c9e..d03822bbf190 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -234,19 +234,17 @@ static const struct file_operations timerfd_fops = { .llseek = noop_llseek, }; -static struct file *timerfd_fget(int fd, int *fput_needed) +static int timerfd_fget(int fd, struct fd *p) { - struct file *file; - - file = fget_light(fd, fput_needed); - if (!file) - return ERR_PTR(-EBADF); - if (file->f_op != &timerfd_fops) { - fput_light(file, *fput_needed); - return ERR_PTR(-EINVAL); + struct fd f = fdget(fd); + if (!f.file) + return -EBADF; + if (f.file->f_op != &timerfd_fops) { + fdput(f); + return -EINVAL; } - - return file; + *p = f; + return 0; } SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) @@ -284,10 +282,10 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, const struct itimerspec __user *, utmr, struct itimerspec __user *, otmr) { - struct file *file; + struct fd f; struct timerfd_ctx *ctx; struct itimerspec ktmr, kotmr; - int ret, fput_needed; + int ret; if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) return -EFAULT; @@ -297,10 +295,10 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, !timespec_valid(&ktmr.it_interval)) return -EINVAL; - file = timerfd_fget(ufd, &fput_needed); - if (IS_ERR(file)) - return PTR_ERR(file); - ctx = file->private_data; + ret = timerfd_fget(ufd, &f); + if (ret) + return ret; + ctx = f.file->private_data; timerfd_setup_cancel(ctx, flags); @@ -334,7 +332,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, ret = timerfd_setup(ctx, flags, &ktmr); spin_unlock_irq(&ctx->wqh.lock); - fput_light(file, fput_needed); + fdput(f); if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) return -EFAULT; @@ -343,15 +341,13 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) { - struct file *file; + struct fd f; struct timerfd_ctx *ctx; struct itimerspec kotmr; - int fput_needed; - - file = timerfd_fget(ufd, &fput_needed); - if (IS_ERR(file)) - return PTR_ERR(file); - ctx = file->private_data; + int ret = timerfd_fget(ufd, &f); + if (ret) + return ret; + ctx = f.file->private_data; spin_lock_irq(&ctx->wqh.lock); if (ctx->expired && ctx->tintv.tv64) { @@ -363,7 +359,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); kotmr.it_interval = ktime_to_timespec(ctx->tintv); spin_unlock_irq(&ctx->wqh.lock); - fput_light(file, fput_needed); + fdput(f); return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; } diff --git a/fs/utimes.c b/fs/utimes.c index fa4dbe451e27..bb0696a41735 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -140,19 +140,18 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times, goto out; if (filename == NULL && dfd != AT_FDCWD) { - int fput_needed; - struct file *file; + struct fd f; if (flags & AT_SYMLINK_NOFOLLOW) goto out; - file = fget_light(dfd, &fput_needed); + f = fdget(dfd); error = -EBADF; - if (!file) + if (!f.file) goto out; - error = utimes_common(&file->f_path, times); - fput_light(file, fput_needed); + error = utimes_common(&f.file->f_path, times); + fdput(f); } else { struct path path; int lookup_flags = 0; diff --git a/fs/xattr.c b/fs/xattr.c index 4d45b7189e7e..14a7e2544fe3 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -399,22 +399,20 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, const void __user *,value, size_t, size, int, flags) { - int fput_needed; - struct file *f; + struct fd f = fdget(fd); struct dentry *dentry; int error = -EBADF; - f = fget_light(fd, &fput_needed); - if (!f) + if (!f.file) return error; - dentry = f->f_path.dentry; + dentry = f.file->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write_file(f); + error = mnt_want_write_file(f.file); if (!error) { error = setxattr(dentry, name, value, size, flags); - mnt_drop_write_file(f); + mnt_drop_write_file(f.file); } - fput_light(f, fput_needed); + fdput(f); return error; } @@ -495,16 +493,14 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname, SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size) { - int fput_needed; - struct file *f; + struct fd f = fdget(fd); ssize_t error = -EBADF; - f = fget_light(fd, &fput_needed); - if (!f) + if (!f.file) return error; - audit_inode(NULL, f->f_path.dentry); - error = getxattr(f->f_path.dentry, name, value, size); - fput_light(f, fput_needed); + audit_inode(NULL, f.file->f_path.dentry); + error = getxattr(f.file->f_path.dentry, name, value, size); + fdput(f); return error; } @@ -576,16 +572,14 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list, SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) { - int fput_needed; - struct file *f; + struct fd f = fdget(fd); ssize_t error = -EBADF; - f = fget_light(fd, &fput_needed); - if (!f) + if (!f.file) return error; - audit_inode(NULL, f->f_path.dentry); - error = listxattr(f->f_path.dentry, list, size); - fput_light(f, fput_needed); + audit_inode(NULL, f.file->f_path.dentry); + error = listxattr(f.file->f_path.dentry, list, size); + fdput(f); return error; } @@ -645,22 +639,20 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) { - int fput_needed; - struct file *f; + struct fd f = fdget(fd); struct dentry *dentry; int error = -EBADF; - f = fget_light(fd, &fput_needed); - if (!f) + if (!f.file) return error; - dentry = f->f_path.dentry; + dentry = f.file->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write_file(f); + error = mnt_want_write_file(f.file); if (!error) { error = removexattr(dentry, name); - mnt_drop_write_file(f); + mnt_drop_write_file(f.file); } - fput_light(f, fput_needed); + fdput(f); return error; } diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index e6cdf224d7ea..b9b8646e62db 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -48,44 +48,44 @@ xfs_swapext( xfs_swapext_t *sxp) { xfs_inode_t *ip, *tip; - struct file *file, *tmp_file; - int error = 0, fput_needed, fput_needed_tmp; + struct fd f, tmp; + int error = 0; /* Pull information for the target fd */ - file = fget_light((int)sxp->sx_fdtarget, &fput_needed); - if (!file) { + f = fdget((int)sxp->sx_fdtarget); + if (!f.file) { error = XFS_ERROR(EINVAL); goto out; } - if (!(file->f_mode & FMODE_WRITE) || - !(file->f_mode & FMODE_READ) || - (file->f_flags & O_APPEND)) { + if (!(f.file->f_mode & FMODE_WRITE) || + !(f.file->f_mode & FMODE_READ) || + (f.file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); goto out_put_file; } - tmp_file = fget_light((int)sxp->sx_fdtmp, &fput_needed_tmp); - if (!tmp_file) { + tmp = fdget((int)sxp->sx_fdtmp); + if (!tmp.file) { error = XFS_ERROR(EINVAL); goto out_put_file; } - if (!(tmp_file->f_mode & FMODE_WRITE) || - !(tmp_file->f_mode & FMODE_READ) || - (tmp_file->f_flags & O_APPEND)) { + if (!(tmp.file->f_mode & FMODE_WRITE) || + !(tmp.file->f_mode & FMODE_READ) || + (tmp.file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); goto out_put_tmp_file; } - if (IS_SWAPFILE(file->f_path.dentry->d_inode) || - IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) { + if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) || + IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) { error = XFS_ERROR(EINVAL); goto out_put_tmp_file; } - ip = XFS_I(file->f_path.dentry->d_inode); - tip = XFS_I(tmp_file->f_path.dentry->d_inode); + ip = XFS_I(f.file->f_path.dentry->d_inode); + tip = XFS_I(tmp.file->f_path.dentry->d_inode); if (ip->i_mount != tip->i_mount) { error = XFS_ERROR(EINVAL); @@ -105,9 +105,9 @@ xfs_swapext( error = xfs_swap_extents(ip, tip, sxp); out_put_tmp_file: - fput_light(tmp_file, fput_needed_tmp); + fdput(tmp); out_put_file: - fput_light(file, fput_needed); + fdput(f); out: return error; } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 21483eac402d..8305f2ac6773 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -70,16 +70,16 @@ xfs_find_handle( int hsize; xfs_handle_t handle; struct inode *inode; - struct file *file = NULL; + struct fd f; struct path path; - int error, fput_needed; + int error; struct xfs_inode *ip; if (cmd == XFS_IOC_FD_TO_HANDLE) { - file = fget_light(hreq->fd, &fput_needed); - if (!file) + f = fdget(hreq->fd); + if (!f.file) return -EBADF; - inode = file->f_path.dentry->d_inode; + inode = f.file->f_path.dentry->d_inode; } else { error = user_lpath((const char __user *)hreq->path, &path); if (error) @@ -134,7 +134,7 @@ xfs_find_handle( out_put: if (cmd == XFS_IOC_FD_TO_HANDLE) - fput_light(file, fput_needed); + fdput(f); else path_put(&path); return error; diff --git a/include/linux/file.h b/include/linux/file.h index c38bfbff4647..cbacf4faf447 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -47,6 +47,9 @@ static inline struct fd fdget(unsigned int fd) return (struct fd){f,b}; } +extern struct file *fget_raw(unsigned int fd); +extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); + static inline struct fd fdget_raw(unsigned int fd) { int b; @@ -54,8 +57,6 @@ static inline struct fd fdget_raw(unsigned int fd) return (struct fd){f,b}; } -extern struct file *fget_raw(unsigned int fd); -extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 5db1b69500fd..6d255e535d03 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -944,7 +944,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, size_t, msg_len, unsigned int, msg_prio, const struct timespec __user *, u_abs_timeout) { - struct file *filp; + struct fd f; struct inode *inode; struct ext_wait_queue wait; struct ext_wait_queue *receiver; @@ -953,7 +953,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, ktime_t expires, *timeout = NULL; struct timespec ts; struct posix_msg_tree_node *new_leaf = NULL; - int ret = 0, fput_needed; + int ret = 0; if (u_abs_timeout) { int res = prepare_timeout(u_abs_timeout, &expires, &ts); @@ -967,21 +967,21 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL); - filp = fget_light(mqdes, &fput_needed); - if (unlikely(!filp)) { + f = fdget(mqdes); + if (unlikely(!f.file)) { ret = -EBADF; goto out; } - inode = filp->f_path.dentry->d_inode; - if (unlikely(filp->f_op != &mqueue_file_operations)) { + inode = f.file->f_path.dentry->d_inode; + if (unlikely(f.file->f_op != &mqueue_file_operations)) { ret = -EBADF; goto out_fput; } info = MQUEUE_I(inode); - audit_inode(NULL, filp->f_path.dentry); + audit_inode(NULL, f.file->f_path.dentry); - if (unlikely(!(filp->f_mode & FMODE_WRITE))) { + if (unlikely(!(f.file->f_mode & FMODE_WRITE))) { ret = -EBADF; goto out_fput; } @@ -1023,7 +1023,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, } if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) { - if (filp->f_flags & O_NONBLOCK) { + if (f.file->f_flags & O_NONBLOCK) { ret = -EAGAIN; } else { wait.task = current; @@ -1056,7 +1056,7 @@ out_free: if (ret) free_msg(msg_ptr); out_fput: - fput_light(filp, fput_needed); + fdput(f); out: return ret; } @@ -1067,14 +1067,13 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, { ssize_t ret; struct msg_msg *msg_ptr; - struct file *filp; + struct fd f; struct inode *inode; struct mqueue_inode_info *info; struct ext_wait_queue wait; ktime_t expires, *timeout = NULL; struct timespec ts; struct posix_msg_tree_node *new_leaf = NULL; - int fput_needed; if (u_abs_timeout) { int res = prepare_timeout(u_abs_timeout, &expires, &ts); @@ -1085,21 +1084,21 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL); - filp = fget_light(mqdes, &fput_needed); - if (unlikely(!filp)) { + f = fdget(mqdes); + if (unlikely(!f.file)) { ret = -EBADF; goto out; } - inode = filp->f_path.dentry->d_inode; - if (unlikely(filp->f_op != &mqueue_file_operations)) { + inode = f.file->f_path.dentry->d_inode; + if (unlikely(f.file->f_op != &mqueue_file_operations)) { ret = -EBADF; goto out_fput; } info = MQUEUE_I(inode); - audit_inode(NULL, filp->f_path.dentry); + audit_inode(NULL, f.file->f_path.dentry); - if (unlikely(!(filp->f_mode & FMODE_READ))) { + if (unlikely(!(f.file->f_mode & FMODE_READ))) { ret = -EBADF; goto out_fput; } @@ -1131,7 +1130,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, } if (info->attr.mq_curmsgs == 0) { - if (filp->f_flags & O_NONBLOCK) { + if (f.file->f_flags & O_NONBLOCK) { spin_unlock(&info->lock); ret = -EAGAIN; } else { @@ -1161,7 +1160,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, free_msg(msg_ptr); } out_fput: - fput_light(filp, fput_needed); + fdput(f); out: return ret; } @@ -1174,8 +1173,8 @@ out: SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, const struct sigevent __user *, u_notification) { - int ret, fput_needed; - struct file *filp; + int ret; + struct fd f; struct sock *sock; struct inode *inode; struct sigevent notification; @@ -1221,13 +1220,13 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, skb_put(nc, NOTIFY_COOKIE_LEN); /* and attach it to the socket */ retry: - filp = fget_light(notification.sigev_signo, &fput_needed); - if (!filp) { + f = fdget(notification.sigev_signo); + if (!f.file) { ret = -EBADF; goto out; } - sock = netlink_getsockbyfilp(filp); - fput_light(filp, fput_needed); + sock = netlink_getsockbyfilp(f.file); + fdput(f); if (IS_ERR(sock)) { ret = PTR_ERR(sock); sock = NULL; @@ -1246,14 +1245,14 @@ retry: } } - filp = fget_light(mqdes, &fput_needed); - if (!filp) { + f = fdget(mqdes); + if (!f.file) { ret = -EBADF; goto out; } - inode = filp->f_path.dentry->d_inode; - if (unlikely(filp->f_op != &mqueue_file_operations)) { + inode = f.file->f_path.dentry->d_inode; + if (unlikely(f.file->f_op != &mqueue_file_operations)) { ret = -EBADF; goto out_fput; } @@ -1293,7 +1292,7 @@ retry: } spin_unlock(&info->lock); out_fput: - fput_light(filp, fput_needed); + fdput(f); out: if (sock) { netlink_detachskb(sock, nc); @@ -1309,8 +1308,7 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, { int ret; struct mq_attr mqstat, omqstat; - int fput_needed; - struct file *filp; + struct fd f; struct inode *inode; struct mqueue_inode_info *info; @@ -1321,14 +1319,14 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, return -EINVAL; } - filp = fget_light(mqdes, &fput_needed); - if (!filp) { + f = fdget(mqdes); + if (!f.file) { ret = -EBADF; goto out; } - inode = filp->f_path.dentry->d_inode; - if (unlikely(filp->f_op != &mqueue_file_operations)) { + inode = f.file->f_path.dentry->d_inode; + if (unlikely(f.file->f_op != &mqueue_file_operations)) { ret = -EBADF; goto out_fput; } @@ -1337,15 +1335,15 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, spin_lock(&info->lock); omqstat = info->attr; - omqstat.mq_flags = filp->f_flags & O_NONBLOCK; + omqstat.mq_flags = f.file->f_flags & O_NONBLOCK; if (u_mqstat) { audit_mq_getsetattr(mqdes, &mqstat); - spin_lock(&filp->f_lock); + spin_lock(&f.file->f_lock); if (mqstat.mq_flags & O_NONBLOCK) - filp->f_flags |= O_NONBLOCK; + f.file->f_flags |= O_NONBLOCK; else - filp->f_flags &= ~O_NONBLOCK; - spin_unlock(&filp->f_lock); + f.file->f_flags &= ~O_NONBLOCK; + spin_unlock(&f.file->f_lock); inode->i_atime = inode->i_ctime = CURRENT_TIME; } @@ -1358,7 +1356,7 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, ret = -EFAULT; out_fput: - fput_light(filp, fput_needed); + fdput(f); out: return ret; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 2bd199bfaef8..bd9c5bca42ae 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -467,14 +467,13 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, { struct perf_cgroup *cgrp; struct cgroup_subsys_state *css; - struct file *file; - int ret = 0, fput_needed; + struct fd f = fdget(fd); + int ret = 0; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) return -EBADF; - css = cgroup_css_from_dir(file, perf_subsys_id); + css = cgroup_css_from_dir(f.file, perf_subsys_id); if (IS_ERR(css)) { ret = PTR_ERR(css); goto out; @@ -500,7 +499,7 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, ret = -EINVAL; } out: - fput_light(file, fput_needed); + fdput(f); return ret; } @@ -3233,21 +3232,18 @@ unlock: static const struct file_operations perf_fops; -static struct file *perf_fget_light(int fd, int *fput_needed) +static inline int perf_fget_light(int fd, struct fd *p) { - struct file *file; - - file = fget_light(fd, fput_needed); - if (!file) - return ERR_PTR(-EBADF); + struct fd f = fdget(fd); + if (!f.file) + return -EBADF; - if (file->f_op != &perf_fops) { - fput_light(file, *fput_needed); - *fput_needed = 0; - return ERR_PTR(-EBADF); + if (f.file->f_op != &perf_fops) { + fdput(f); + return -EBADF; } - - return file; + *p = f; + return 0; } static int perf_event_set_output(struct perf_event *event, @@ -3279,22 +3275,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_EVENT_IOC_SET_OUTPUT: { - struct file *output_file = NULL; - struct perf_event *output_event = NULL; - int fput_needed = 0; int ret; - if (arg != -1) { - output_file = perf_fget_light(arg, &fput_needed); - if (IS_ERR(output_file)) - return PTR_ERR(output_file); - output_event = output_file->private_data; + struct perf_event *output_event; + struct fd output; + ret = perf_fget_light(arg, &output); + if (ret) + return ret; + output_event = output.file->private_data; + ret = perf_event_set_output(event, output_event); + fdput(output); + } else { + ret = perf_event_set_output(event, NULL); } - - ret = perf_event_set_output(event, output_event); - if (output_event) - fput_light(output_file, fput_needed); - return ret; } @@ -6229,12 +6222,11 @@ SYSCALL_DEFINE5(perf_event_open, struct perf_event_attr attr; struct perf_event_context *ctx; struct file *event_file = NULL; - struct file *group_file = NULL; + struct fd group = {NULL, 0}; struct task_struct *task = NULL; struct pmu *pmu; int event_fd; int move_group = 0; - int fput_needed = 0; int err; /* for future expandability... */ @@ -6269,12 +6261,10 @@ SYSCALL_DEFINE5(perf_event_open, return event_fd; if (group_fd != -1) { - group_file = perf_fget_light(group_fd, &fput_needed); - if (IS_ERR(group_file)) { - err = PTR_ERR(group_file); + err = perf_fget_light(group_fd, &group); + if (err) goto err_fd; - } - group_leader = group_file->private_data; + group_leader = group.file->private_data; if (flags & PERF_FLAG_FD_OUTPUT) output_event = group_leader; if (flags & PERF_FLAG_FD_NO_GROUP) @@ -6450,7 +6440,7 @@ SYSCALL_DEFINE5(perf_event_open, * of the group leader will find the pointer to itself in * perf_group_detach(). */ - fput_light(group_file, fput_needed); + fdput(group); fd_install(event_fd, event_file); return event_fd; @@ -6464,7 +6454,7 @@ err_task: if (task) put_task_struct(task); err_group_fd: - fput_light(group_file, fput_needed); + fdput(group); err_fd: put_unused_fd(event_fd); return err; diff --git a/kernel/sys.c b/kernel/sys.c index 0cb4283df884..f9492284e5d2 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1788,15 +1788,15 @@ SYSCALL_DEFINE1(umask, int, mask) #ifdef CONFIG_CHECKPOINT_RESTORE static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { - struct file *exe_file; + struct fd exe; struct dentry *dentry; - int err, fput_needed; + int err; - exe_file = fget_light(fd, &fput_needed); - if (!exe_file) + exe = fdget(fd); + if (!exe.file) return -EBADF; - dentry = exe_file->f_path.dentry; + dentry = exe.file->f_path.dentry; /* * Because the original mm->exe_file points to executable file, make @@ -1805,7 +1805,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) */ err = -EACCES; if (!S_ISREG(dentry->d_inode->i_mode) || - exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC) + exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) goto exit; err = inode_permission(dentry->d_inode, MAY_EXEC); @@ -1839,12 +1839,12 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) goto exit_unlock; err = 0; - set_mm_exe_file(mm, exe_file); /* this grabs a reference to exe_file */ + set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ exit_unlock: up_write(&mm->mmap_sem); exit: - fput_light(exe_file, fput_needed); + fdput(exe); return err; } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d0a32796550f..5116b7e5962e 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -415,16 +415,15 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) struct nlattr *na; size_t size; u32 fd; - struct file *file; - int fput_needed; + struct fd f; na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; if (!na) return -EINVAL; fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return 0; size = nla_total_size(sizeof(struct cgroupstats)); @@ -444,7 +443,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) stats = nla_data(na); memset(stats, 0, sizeof(*stats)); - rc = cgroupstats_build(stats, file->f_dentry); + rc = cgroupstats_build(stats, f.file->f_dentry); if (rc < 0) { nlmsg_free(rep_skb); goto err; @@ -453,7 +452,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) rc = send_reply(rep_skb, info); err: - fput_light(file, fput_needed); + fdput(f); return rc; } diff --git a/mm/fadvise.c b/mm/fadvise.c index a83245763cf8..a47f0f50c89f 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -26,8 +26,7 @@ */ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) { - int fput_needed; - struct file *file = fget_light(fd, &fput_needed); + struct fd f = fdget(fd); struct address_space *mapping; struct backing_dev_info *bdi; loff_t endbyte; /* inclusive */ @@ -36,15 +35,15 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) unsigned long nrpages; int ret = 0; - if (!file) + if (!f.file) return -EBADF; - if (S_ISFIFO(file->f_path.dentry->d_inode->i_mode)) { + if (S_ISFIFO(f.file->f_path.dentry->d_inode->i_mode)) { ret = -ESPIPE; goto out; } - mapping = file->f_mapping; + mapping = f.file->f_mapping; if (!mapping || len < 0) { ret = -EINVAL; goto out; @@ -77,21 +76,21 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) switch (advice) { case POSIX_FADV_NORMAL: - file->f_ra.ra_pages = bdi->ra_pages; - spin_lock(&file->f_lock); - file->f_mode &= ~FMODE_RANDOM; - spin_unlock(&file->f_lock); + f.file->f_ra.ra_pages = bdi->ra_pages; + spin_lock(&f.file->f_lock); + f.file->f_mode &= ~FMODE_RANDOM; + spin_unlock(&f.file->f_lock); break; case POSIX_FADV_RANDOM: - spin_lock(&file->f_lock); - file->f_mode |= FMODE_RANDOM; - spin_unlock(&file->f_lock); + spin_lock(&f.file->f_lock); + f.file->f_mode |= FMODE_RANDOM; + spin_unlock(&f.file->f_lock); break; case POSIX_FADV_SEQUENTIAL: - file->f_ra.ra_pages = bdi->ra_pages * 2; - spin_lock(&file->f_lock); - file->f_mode &= ~FMODE_RANDOM; - spin_unlock(&file->f_lock); + f.file->f_ra.ra_pages = bdi->ra_pages * 2; + spin_lock(&f.file->f_lock); + f.file->f_mode &= ~FMODE_RANDOM; + spin_unlock(&f.file->f_lock); break; case POSIX_FADV_WILLNEED: /* First and last PARTIAL page! */ @@ -107,7 +106,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) * Ignore return value because fadvise() shall return * success even if filesystem can't retrieve a hint, */ - force_page_cache_readahead(mapping, file, start_index, + force_page_cache_readahead(mapping, f.file, start_index, nrpages); break; case POSIX_FADV_NOREUSE: @@ -129,7 +128,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) ret = -EINVAL; } out: - fput_light(file, fput_needed); + fdput(f); return ret; } #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS diff --git a/mm/readahead.c b/mm/readahead.c index 1011111e2bf4..7963f2391236 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -579,20 +579,19 @@ do_readahead(struct address_space *mapping, struct file *filp, SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count) { ssize_t ret; - struct file *file; - int fput_needed; + struct fd f; ret = -EBADF; - file = fget_light(fd, &fput_needed); - if (file) { - if (file->f_mode & FMODE_READ) { - struct address_space *mapping = file->f_mapping; + f = fdget(fd); + if (f.file) { + if (f.file->f_mode & FMODE_READ) { + struct address_space *mapping = f.file->f_mapping; pgoff_t start = offset >> PAGE_CACHE_SHIFT; pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT; unsigned long len = end - start + 1; - ret = do_readahead(mapping, file, start, len); + ret = do_readahead(mapping, f.file, start, len); } - fput_light(file, fput_needed); + fdput(f); } return ret; } -- cgit v1.2.3 From 1fe0c0230a7c2d5f4061e681a3f3be9512446d23 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 19 Sep 2012 15:49:51 +0100 Subject: vfs: delete surplus inode NULL check Each iteration of d_delete we reload inode from dentry->d_inode and then call S_ISDIR(inode-i_mode), so inode cannot possibly be NULL shortly afterwards unless something went horribly wrong. Signed-off-by: Alan Cox Signed-off-by: Al Viro --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 16521a9f2038..fbee67b92651 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2109,7 +2109,7 @@ again: inode = dentry->d_inode; isdir = S_ISDIR(inode->i_mode); if (dentry->d_count == 1) { - if (inode && !spin_trylock(&inode->i_lock)) { + if (!spin_trylock(&inode->i_lock)) { spin_unlock(&dentry->d_lock); cpu_relax(); goto again; -- cgit v1.2.3 From 2744c171dbaa0b1ec7639e7d0ff817fba9461a38 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 26 Sep 2012 21:41:05 -0400 Subject: ceph: don't abuse d_delete() on failure exits Signed-off-by: Al Viro --- fs/ceph/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 4b5762ef7c2b..ba95eea201bf 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1104,7 +1104,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, pr_err("fill_trace bad get_inode " "%llx.%llx\n", vino.ino, vino.snap); err = PTR_ERR(in); - d_delete(dn); + d_drop(dn); goto done; } dn = splice_dentry(dn, in, &have_lease, true); @@ -1277,7 +1277,7 @@ retry_lookup: in = ceph_get_inode(parent->d_sb, vino); if (IS_ERR(in)) { dout("new_inode badness\n"); - d_delete(dn); + d_drop(dn); dput(dn); err = PTR_ERR(in); goto out; -- cgit v1.2.3 From 63784dd02b2ac85e867be9d6a6f5536c4ff738be Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 26 Sep 2012 21:43:05 -0400 Subject: fcntl: fix misannotations __user * != * __user... Signed-off-by: Al Viro --- fs/fcntl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index 91af39a33af7..8f704291d4ed 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -149,7 +149,7 @@ pid_t f_getown(struct file *filp) static int f_setown_ex(struct file *filp, unsigned long arg) { - struct f_owner_ex * __user owner_p = (void * __user)arg; + struct f_owner_ex __user *owner_p = (void __user *)arg; struct f_owner_ex owner; struct pid *pid; int type; @@ -189,7 +189,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg) static int f_getown_ex(struct file *filp, unsigned long arg) { - struct f_owner_ex * __user owner_p = (void * __user)arg; + struct f_owner_ex __user *owner_p = (void __user *)arg; struct f_owner_ex owner; int ret = 0; @@ -227,7 +227,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg) static int f_getowner_uids(struct file *filp, unsigned long arg) { struct user_namespace *user_ns = current_user_ns(); - uid_t * __user dst = (void * __user)arg; + uid_t __user *dst = (void __user *)arg; uid_t src[2]; int err; -- cgit v1.2.3 From f34f9d186df35e5c39163444c43b4fc6255e39c5 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 26 Sep 2012 11:34:50 +1000 Subject: coredump: prevent double-free on an error path in core dumper In !CORE_DUMP_USE_REGSET case, if elf_note_info_init fails to allocate memory for info->fields, it frees already allocated stuff and returns error to its caller, fill_note_info. Which in turn returns error to its caller, elf_core_dump. Which jumps to cleanup label and calls free_note_info, which will happily try to free all info->fields again. BOOM. This is the fix. Signed-off-by: Oleg Nesterov Signed-off-by: Denys Vlasenko Cc: Venu Byravarasu Cc: Signed-off-by: Andrew Morton --- fs/binfmt_elf.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 1b52956afe33..0225fddf49b7 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1696,30 +1696,19 @@ static int elf_note_info_init(struct elf_note_info *info) return 0; info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); if (!info->psinfo) - goto notes_free; + return 0; info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); if (!info->prstatus) - goto psinfo_free; + return 0; info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); if (!info->fpu) - goto prstatus_free; + return 0; #ifdef ELF_CORE_COPY_XFPREGS info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); if (!info->xfpu) - goto fpu_free; + return 0; #endif return 1; -#ifdef ELF_CORE_COPY_XFPREGS - fpu_free: - kfree(info->fpu); -#endif - prstatus_free: - kfree(info->prstatus); - psinfo_free: - kfree(info->psinfo); - notes_free: - kfree(info->notes); - return 0; } static int fill_note_info(struct elfhdr *elf, int phdrs, -- cgit v1.2.3 From aaf7d73e54b6915310ece11aedb19ec06a833642 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 26 Sep 2012 22:21:21 -0400 Subject: ext4: enable FITRIM ioctl on bigalloc file system With a minor tweaks regarding minimum extent size to discard and discarded bytes reporting the FITRIM can be enabled on bigalloc file system and it works without any problem. This patch fixes minlen handling and discarded bytes reporting to take into consideration bigalloc enabled file systems and finally removes the restriction and allow FITRIM to be used on file system with bigalloc feature enabled. Reviewed-by: Carlos Maiolino Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/ioctl.c | 7 ------- fs/ext4/mballoc.c | 5 +++-- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 8b84fe28ccaf..4c8174aa685c 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -404,13 +404,6 @@ resizefs_out: if (!blk_queue_discard(q)) return -EOPNOTSUPP; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { - ext4_msg(sb, KERN_ERR, - "FITRIM not supported with bigalloc"); - return -EOPNOTSUPP; - } - if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8ec6f88b7a37..a415465f97a0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4990,7 +4990,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) start = range->start >> sb->s_blocksize_bits; end = start + (range->len >> sb->s_blocksize_bits) - 1; - minlen = range->minlen >> sb->s_blocksize_bits; + minlen = EXT4_NUM_B2C(EXT4_SB(sb), + range->minlen >> sb->s_blocksize_bits); if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || unlikely(start >= max_blks)) @@ -5050,6 +5051,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); out: - range->len = trimmed * sb->s_blocksize; + range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; return ret; } -- cgit v1.2.3 From 9b68733273665a4c0d98041a657dabfb4fd6bd80 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 26 Sep 2012 22:58:50 -0400 Subject: ext4: release donor reference when EXT4_IOC_MOVE_EXT ioctl fails When the EXT4_IOC_MOVE_EXT ioctl() fails on bigalloc file systems, we should jump to the 'mext_out' label to release the donor file reference. Signed-off-by: Djalal Harouni Signed-off-by: "Theodore Ts'o" --- fs/ext4/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 4c8174aa685c..17c53a69454d 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -258,7 +258,8 @@ group_extend_out: EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { ext4_msg(sb, KERN_ERR, "Online defrag not supported with bigalloc"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto mext_out; } err = mnt_want_write_file(filp); -- cgit v1.2.3 From 760ad0cac198356c1148cad7531c1a6138322493 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 25 Sep 2012 11:00:07 +0400 Subject: CIFS: Make ops->close return void Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 ++- fs/cifs/file.c | 5 ++--- fs/cifs/smb1ops.c | 4 ++-- fs/cifs/smb2ops.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a39e5b7fc844..f6f40635abca 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -316,7 +316,8 @@ struct smb_version_operations { /* set fid protocol-specific info */ void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); /* close a file */ - int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); + void (*close)(const unsigned int, struct cifs_tcon *, + struct cifs_fid *); /* send a flush request to the server */ int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* async read from the server */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 075f7cfd1da5..7d7bbdc4c8e7 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -355,12 +355,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) if (!tcon->need_reconnect && !cifs_file->invalidHandle) { struct TCP_Server_Info *server = tcon->ses->server; unsigned int xid; - int rc = -ENOSYS; xid = get_xid(); if (server->ops->close) - rc = server->ops->close(xid, tcon, &cifs_file->fid); - free_xid(xid); + server->ops->close(xid, tcon, &cifs_file->fid); + _free_xid(xid); } cifs_del_pending_open(&open); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 5fb0fe5f72b6..42dccbb57c40 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -720,11 +720,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) cinode->can_cache_brlcks = cinode->clientCanCacheAll; } -static int +static void cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { - return CIFSSMBClose(xid, tcon, fid->netfid); + CIFSSMBClose(xid, tcon, fid->netfid); } static int diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 2183bb343edd..1570cbea4a49 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -374,11 +374,11 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) cinode->can_cache_brlcks = cinode->clientCanCacheAll; } -static int +static void smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { - return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); + SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); } static int -- cgit v1.2.3 From b794e7a6ebfbddb819b0e75ab59ada6b08a285f2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 26 Sep 2012 23:11:13 -0400 Subject: jbd2: fix assertion failure in commit code due to lacking transaction credits ext4 users of data=journal mode with blocksize < pagesize were occasionally hitting assertion failure in jbd2_journal_commit_transaction() checking whether the transaction has at least as many credits reserved as buffers attached. The core of the problem is that when a file gets truncated, buffers that still need checkpointing or that are attached to the committing transaction are left with buffer_mapped set. When this happens to buffers beyond i_size attached to a page stradding i_size, subsequent write extending the file will see these buffers and as they are mapped (but underlying blocks were freed) things go awry from here. The assertion failure just coincidentally (and in this case luckily as we would start corrupting filesystem) triggers due to journal_head not being properly cleaned up as well. We fix the problem by unmapping buffers if possible (in lots of cases we just need a buffer attached to a transaction as a place holder but it must not be written out anyway). And in one case, we just have to bite the bullet and wait for transaction commit to finish. CC: Josef Bacik Signed-off-by: Jan Kara --- fs/jbd2/commit.c | 40 ++++++++++++++++++++++--------- fs/jbd2/transaction.c | 65 +++++++++++++++++++++++++++++++++++---------------- 2 files changed, 74 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index af5280fb579b..3091d42992f0 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -1014,17 +1014,35 @@ restart_loop: * there's no point in keeping a checkpoint record for * it. */ - /* A buffer which has been freed while still being - * journaled by a previous transaction may end up still - * being dirty here, but we want to avoid writing back - * that buffer in the future after the "add to orphan" - * operation been committed, That's not only a performance - * gain, it also stops aliasing problems if the buffer is - * left behind for writeback and gets reallocated for another - * use in a different page. */ - if (buffer_freed(bh) && !jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); + /* + * A buffer which has been freed while still being journaled by + * a previous transaction. + */ + if (buffer_freed(bh)) { + /* + * If the running transaction is the one containing + * "add to orphan" operation (b_next_transaction != + * NULL), we have to wait for that transaction to + * commit before we can really get rid of the buffer. + * So just clear b_modified to not confuse transaction + * credit accounting and refile the buffer to + * BJ_Forget of the running transaction. If the just + * committed transaction contains "add to orphan" + * operation, we can completely invalidate the buffer + * now. We are rather through in that since the + * buffer may be still accessible when blocksize < + * pagesize and it is attached to the last partial + * page. + */ + jh->b_modified = 0; + if (!jh->b_next_transaction) { + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; + } } if (buffer_jbddirty(bh)) { diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index fb1ab9533b67..a74ba4659549 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1841,15 +1841,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) * We're outside-transaction here. Either or both of j_running_transaction * and j_committing_transaction may be NULL. */ -static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) +static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, + int partial_page) { transaction_t *transaction; struct journal_head *jh; int may_free = 1; - int ret; BUFFER_TRACE(bh, "entry"); +retry: /* * It is safe to proceed here without the j_list_lock because the * buffers cannot be stolen by try_to_free_buffers as long as we are @@ -1878,10 +1879,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * clear the buffer dirty bit at latest at the moment when the * transaction marking the buffer as freed in the filesystem * structures is committed because from that moment on the - * buffer can be reallocated and used by a different page. + * block can be reallocated and used by a different page. * Since the block hasn't been freed yet but the inode has * already been added to orphan list, it is safe for us to add * the buffer to BJ_Forget list of the newest transaction. + * + * Also we have to clear buffer_mapped flag of a truncated buffer + * because the buffer_head may be attached to the page straddling + * i_size (can happen only when blocksize < pagesize) and thus the + * buffer_head can be reused when the file is extended again. So we end + * up keeping around invalidated buffers attached to transactions' + * BJ_Forget list just to stop checkpointing code from cleaning up + * the transaction this buffer was modified in. */ transaction = jh->b_transaction; if (transaction == NULL) { @@ -1908,13 +1917,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * committed, the buffer won't be needed any * longer. */ JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_running_transaction); - jbd2_journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - write_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* There is no currently-running transaction. So the * orphan record which we wrote for this file must have @@ -1922,13 +1927,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * the committing transaction, if it exists. */ if (journal->j_committing_transaction) { JBUFFER_TRACE(jh, "give to committing trans"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_committing_transaction); - jbd2_journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - write_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* The orphan record's transaction has * committed. We can cleanse this buffer */ @@ -1940,10 +1941,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) JBUFFER_TRACE(jh, "on committing transaction"); /* * The buffer is committing, we simply cannot touch - * it. So we just set j_next_transaction to the - * running transaction (if there is one) and mark - * buffer as freed so that commit code knows it should - * clear dirty bits when it is done with the buffer. + * it. If the page is straddling i_size we have to wait + * for commit and try again. + */ + if (partial_page) { + tid_t tid = journal->j_committing_transaction->t_tid; + + jbd2_journal_put_journal_head(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + write_unlock(&journal->j_state_lock); + jbd2_log_wait_commit(journal, tid); + goto retry; + } + /* + * OK, buffer won't be reachable after truncate. We just set + * j_next_transaction to the running transaction (if there is + * one) and mark buffer as freed so that commit code knows it + * should clear dirty bits when it is done with the buffer. */ set_buffer_freed(bh); if (journal->j_running_transaction && buffer_jbddirty(bh)) @@ -1966,6 +1981,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } zap_buffer: + /* + * This is tricky. Although the buffer is truncated, it may be reused + * if blocksize < pagesize and it is attached to the page straddling + * EOF. Since the buffer might have been added to BJ_Forget list of the + * running transaction, journal_get_write_access() won't clear + * b_modified and credit accounting gets confused. So clear b_modified + * here. + */ + jh->b_modified = 0; jbd2_journal_put_journal_head(jh); zap_buffer_no_jh: spin_unlock(&journal->j_list_lock); @@ -2017,7 +2041,8 @@ void jbd2_journal_invalidatepage(journal_t *journal, if (offset <= curr_off) { /* This block is wholly outside the truncation point */ lock_buffer(bh); - may_free &= journal_unmap_buffer(journal, bh); + may_free &= journal_unmap_buffer(journal, bh, + offset > 0); unlock_buffer(bh); } curr_off = next_off; -- cgit v1.2.3 From 4ca3a99ca4bf8f5dcfc4fef4f2b1d8322bb60ad9 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 25 Sep 2012 11:00:09 +0400 Subject: CIFS: Fix possible freed pointer dereference in SMB2_sess_setup and remove redundant (rsp == NULL) checks after SendReceive2. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 35 +++-------------------------------- 1 file changed, 3 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a7db95f4760c..5ad88b4b9990 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -409,11 +409,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) if (rc != 0) goto neg_exit; - if (rsp == NULL) { - rc = -EIO; - goto neg_exit; - } - cFYI(1, "mode 0x%x", rsp->SecurityMode); if (rsp->DialectRevision == smb2protocols[SMB21_PROT].name) @@ -637,13 +632,14 @@ ssetup_ntlmssp_authenticate: kfree(security_blob); rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base; - if (rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) { + if (resp_buftype != CIFS_NO_BUFFER && + rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) { if (phase != NtLmNegotiate) { cERROR(1, "Unexpected more processing error"); goto ssetup_exit; } if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 != - le16_to_cpu(rsp->SecurityBufferOffset)) { + le16_to_cpu(rsp->SecurityBufferOffset)) { cERROR(1, "Invalid security buffer offset %d", le16_to_cpu(rsp->SecurityBufferOffset)); rc = -EIO; @@ -669,11 +665,6 @@ ssetup_ntlmssp_authenticate: if (rc != 0) goto ssetup_exit; - if (rsp == NULL) { - rc = -EIO; - goto ssetup_exit; - } - ses->session_flags = le16_to_cpu(rsp->SessionFlags); ssetup_exit: free_rsp_buf(resp_buftype, rsp); @@ -793,11 +784,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, goto tcon_error_exit; } - if (rsp == NULL) { - rc = -EIO; - goto tcon_exit; - } - if (tcon == NULL) { ses->ipc_tid = rsp->hdr.TreeId; goto tcon_exit; @@ -1046,10 +1032,6 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, goto creat_exit; } - if (rsp == NULL) { - rc = -EIO; - goto creat_exit; - } *persistent_fid = rsp->PersistentFileId; *volatile_fid = rsp->VolatileFileId; @@ -1111,11 +1093,6 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, goto close_exit; } - if (rsp == NULL) { - rc = -EIO; - goto close_exit; - } - /* BB FIXME - decode close response, update inode for caching */ close_exit: @@ -1950,12 +1927,6 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE); goto out; } - - if (rsp == NULL) { - rc = -EIO; - goto out; - } - out: free_rsp_buf(resp_buftype, rsp); kfree(iov); -- cgit v1.2.3 From f065fd099fc475333fc7a55677a7f64764445d55 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 25 Sep 2012 11:00:08 +0400 Subject: CIFS: Fix possible freed pointer dereference in CIFS_SessSetup Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/sess.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 382c06d01b38..76809f4d3428 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -876,7 +876,8 @@ ssetup_ntlmssp_authenticate: pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; smb_buf = (struct smb_hdr *)iov[0].iov_base; - if ((type == RawNTLMSSP) && (smb_buf->Status.CifsError == + if ((type == RawNTLMSSP) && (resp_buf_type != CIFS_NO_BUFFER) && + (smb_buf->Status.CifsError == cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) { if (phase != NtLmNegotiate) { cERROR(1, "Unexpected more processing error"); -- cgit v1.2.3 From c25f9bc6143f4cb4dc31d2ad7a6fe4e4005fc414 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 26 Sep 2012 23:30:12 -0400 Subject: ext4: don't clear orphan list on ro mount with errors If the file system contains errors and it is being mounted read-only, don't clear the orphan list. We should minimize changes to the file system if it is mounted read-only. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7bef0a4bc518..a53a23aef5e3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2177,10 +2177,12 @@ static void ext4_orphan_cleanup(struct super_block *sb, } if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { - if (es->s_last_orphan) + /* don't clear list on RO mount w/ errors */ + if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { jbd_debug(1, "Errors on filesystem, " "clearing orphan list.\n"); - es->s_last_orphan = 0; + es->s_last_orphan = 0; + } jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); return; } -- cgit v1.2.3 From cbb4ee830e0057f8d60b4e0a8c4b6daa6cdd28d7 Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Thu, 27 Sep 2012 08:00:01 -0400 Subject: ext4: remove redundant offset check in mext_check_arguments() In the check code above, if orig_start != donor_start, we would return -EINVAL. So here, orig_start should be equal with donor_start. Remove the redundant check here. Signed-off-by: Wang Sheng-Hui Signed-off-by: "Theodore Ts'o" --- fs/ext4/move_extent.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index c2e47da7c2ba..cee4bd066b7a 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -1134,7 +1134,6 @@ mext_check_arguments(struct inode *orig_inode, } if ((orig_start >= EXT_MAX_BLOCKS) || - (donor_start >= EXT_MAX_BLOCKS) || (*len > EXT_MAX_BLOCKS) || (orig_start + *len >= EXT_MAX_BLOCKS)) { ext4_debug("ext4 move extent: Can't handle over [%u] blocks " -- cgit v1.2.3 From 6d1ab10e69ff5f3cb63920ba965ec0f1f0bdaf8d Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 27 Sep 2012 09:31:33 -0400 Subject: ext4: ext4_bread usage audit When ext4_bread() returns NULL and err is set to zero, this means there is no phyical block mapped to the specified logical block number. (Previous to commit 90b0a97323, err was uninitialized in this case, which caused other problems.) The directory handling routines use ext4_bread() in many places, the fact that ext4_bread() now returns NULL with err set to zero could cause problems since a number of these functions will simply return the value of err if the result of ext4_bread() was the NULL pointer, causing the caller of the function to think that the function was successful. Since directories should never contain holes, this case can only happen if the file system is corrupted. This commit audits all of the callers of ext4_bread(), and makes sure they do the right thing if a hole in a directory is found by ext4_bread(). Some ext4_bread() callers did not need any changes either because they already had its own hole detector paths. Signed-off-by: Carlos Maiolino Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index bd87b7a66afb..6d600a69fc9d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -74,6 +74,12 @@ static struct buffer_head *ext4_append(handle_t *handle, bh = NULL; } } + if (!bh && !(*err)) { + *err = -EIO; + ext4_error(inode->i_sb, + "Directory hole detected on inode %lu\n", + inode->i_ino); + } return bh; } @@ -601,8 +607,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir, u32 hash; frame->bh = NULL; - if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) + if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) { + if (*err == 0) + *err = ERR_BAD_DX_DIR; goto fail; + } root = (struct dx_root *) bh->b_data; if (root->info.hash_version != DX_HASH_TEA && root->info.hash_version != DX_HASH_HALF_MD4 && @@ -703,8 +712,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir, frame->entries = entries; frame->at = at; if (!indirect--) return frame; - if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) + if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) { + if (!(*err)) + *err = ERR_BAD_DX_DIR; goto fail2; + } at = entries = ((struct dx_node *) bh->b_data)->entries; if (!buffer_verified(bh) && @@ -814,8 +826,15 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, */ while (num_frames--) { if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), - 0, &err))) + 0, &err))) { + if (!err) { + ext4_error(dir->i_sb, + "Directory hole detected on inode %lu\n", + dir->i_ino); + return -EIO; + } return err; /* Failure */ + } if (!buffer_verified(bh) && !ext4_dx_csum_verify(dir, @@ -850,8 +869,15 @@ static int htree_dirblock_to_tree(struct file *dir_file, dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", (unsigned long)block)); - if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) + if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) { + if (!err) { + err = -EIO; + ext4_error(dir->i_sb, + "Directory hole detected on inode %lu\n", + dir->i_ino); + } return err; + } if (!buffer_verified(bh) && !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) @@ -1274,8 +1300,15 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q return NULL; do { block = dx_get_block(frame->at); - if (!(bh = ext4_bread(NULL, dir, block, 0, err))) + if (!(bh = ext4_bread(NULL, dir, block, 0, err))) { + if (!(*err)) { + *err = -EIO; + ext4_error(dir->i_sb, + "Directory hole detected on inode %lu\n", + dir->i_ino); + } goto errout; + } if (!buffer_verified(bh) && !ext4_dirent_csum_verify(dir, @@ -1808,9 +1841,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, } blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0; block < blocks; block++) { - bh = ext4_bread(handle, dir, block, 0, &retval); - if(!bh) + if (!(bh = ext4_bread(handle, dir, block, 0, &retval))) { + if (!retval) { + retval = -EIO; + ext4_error(inode->i_sb, + "Directory hole detected on inode %lu\n", + inode->i_ino); + } return retval; + } if (!buffer_verified(bh) && !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) @@ -1867,8 +1906,15 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, entries = frame->entries; at = frame->at; - if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err))) + if (!(bh = ext4_bread(handle, dir, dx_get_block(frame->at), 0, &err))) { + if (!err) { + err = -EIO; + ext4_error(dir->i_sb, + "Directory hole detected on inode %lu\n", + dir->i_ino); + } goto cleanup; + } if (!buffer_verified(bh) && !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) @@ -2204,9 +2250,15 @@ retry: inode->i_op = &ext4_dir_inode_operations; inode->i_fop = &ext4_dir_operations; inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext4_bread(handle, inode, 0, 1, &err); - if (!dir_block) + if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) { + if (!err) { + err = -EIO; + ext4_error(inode->i_sb, + "Directory hole detected on inode %lu\n", + inode->i_ino); + } goto out_clear_inode; + } BUFFER_TRACE(dir_block, "get_write_access"); err = ext4_journal_get_write_access(handle, dir_block); if (err) @@ -2323,6 +2375,11 @@ static int empty_dir(struct inode *inode) EXT4_ERROR_INODE(inode, "error %d reading directory " "lblock %u", err, lblock); + else + ext4_warning(inode->i_sb, + "bad directory (dir #%lu) - no data block", + inode->i_ino); + offset += sb->s_blocksize; continue; } @@ -2830,9 +2887,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, goto end_rename; } retval = -EIO; - dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); - if (!dir_bh) + if (!(dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval))) { + if (!retval) { + retval = -EIO; + ext4_error(old_inode->i_sb, + "Directory hole detected on inode %lu\n", + old_inode->i_ino); + } goto end_rename; + } if (!buffer_verified(dir_bh) && !ext4_dirent_csum_verify(old_inode, (struct ext4_dir_entry *)dir_bh->b_data)) -- cgit v1.2.3 From ba39ebb61401cfe0ccd58dd0cd4da88465528c0a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 27 Sep 2012 09:37:53 -0400 Subject: ext4: convert to use leXX_add_cpu() Convert cpu_to_leXX(leXX_to_cpu(E1) + E2) to use leXX_add_cpu(). dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 +- fs/ext4/move_extent.c | 5 ++--- fs/ext4/super.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index cbcc6b3f2ae0..8f08c7b77179 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1177,7 +1177,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), ext4_idx_pblock(EXT_FIRST_INDEX(neh))); - neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1); + le16_add_cpu(&neh->eh_depth, 1); ext4_mark_inode_dirty(handle, inode); out: brelse(bh); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index cee4bd066b7a..8076b96b5299 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -570,9 +570,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, diff = donor_off - le32_to_cpu(tmp_dext->ee_block); ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); - tmp_dext->ee_block = - cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); - tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); + le32_add_cpu(&tmp_dext->ee_block, diff); + le16_add_cpu(&tmp_dext->ee_len, -diff); if (max_count < ext4_ext_get_actual_len(tmp_dext)) tmp_dext->ee_len = cpu_to_le16(max_count); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a53a23aef5e3..ae456321c5bf 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -431,7 +431,7 @@ static void __save_error_info(struct super_block *sb, const char *func, */ if (!es->s_error_count) mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); - es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1); + le32_add_cpu(&es->s_error_count, 1); } static void save_error_info(struct super_block *sb, const char *func, -- cgit v1.2.3 From fd51790949edbbd17633689d4e19fe26d8447764 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 18 Sep 2012 16:35:51 -0400 Subject: trivial select_parent documentation fix "Search list for X" sounds like you're trying to find X on a list. Signed-off-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 16521a9f2038..0364af2311f4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1141,7 +1141,7 @@ rename_retry: EXPORT_SYMBOL(have_submounts); /* - * Search the dentry child list for the specified parent, + * Search the dentry child list of the specified parent, * and move any unused dentries to the end of the unused * list for prune_dcache(). We descend to the next level * whenever the d_subdirs list is non-empty and continue -- cgit v1.2.3 From c052e2b423f3eabe9f3f32e60744afa5cf26f6b9 Mon Sep 17 00:00:00 2001 From: Shirish Pargaonkar Date: Fri, 28 Sep 2012 12:21:14 -0500 Subject: cifs: obtain file access during backup intent lookup (resend) Rebased and resending the patch. Path based queries can fail for lack of access, especially during lookup during open. open itself would actually succeed becasue of back up intent bit but queries (either path or file handle based) do not have a means to specifiy backup intent bit. So query the file info during lookup using trans2 / findfirst / file_id_full_dir_info to obtain file info as well as file_id/inode value. Signed-off-by: Shirish Pargaonkar Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 6 +++-- fs/cifs/cifssmb.c | 43 +++++++++++++++++++++-------------- fs/cifs/inode.c | 64 +++++++++++++++++++++++++++++++++++++++-------------- fs/cifs/readdir.c | 2 +- fs/cifs/smb1ops.c | 6 ++--- 5 files changed, 80 insertions(+), 41 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 09ea6321c55a..5144e9fbeb8c 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -140,6 +140,8 @@ void cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr); extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, struct cifs_sb_info *cifs_sb); +extern void cifs_dir_info_to_fattr(struct cifs_fattr *, FILE_DIRECTORY_INFO *, + struct cifs_sb_info *); extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr); extern struct inode *cifs_iget(struct super_block *sb, struct cifs_fattr *fattr); @@ -216,10 +218,10 @@ extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *); extern int CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, - const char *searchName, const struct nls_table *nls_codepage, + const char *searchName, struct cifs_sb_info *cifs_sb, __u16 *searchHandle, __u16 search_flags, struct cifs_search_info *psrch_inf, - int map, const char dirsep); + bool msearch); extern int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, __u16 searchHandle, __u16 search_flags, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 88bbb3ef95b3..76d0d2998850 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4214,10 +4214,9 @@ UnixQPathInfoRetry: /* xid, tcon, searchName and codepage are input parms, rest are returned */ int CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, - const char *searchName, - const struct nls_table *nls_codepage, + const char *searchName, struct cifs_sb_info *cifs_sb, __u16 *pnetfid, __u16 search_flags, - struct cifs_search_info *psrch_inf, int remap, const char dirsep) + struct cifs_search_info *psrch_inf, bool msearch) { /* level 257 SMB_ */ TRANSACTION2_FFIRST_REQ *pSMB = NULL; @@ -4225,8 +4224,9 @@ CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, T2_FFIRST_RSP_PARMS *parms; int rc = 0; int bytes_returned = 0; - int name_len; + int name_len, remap; __u16 params, byte_count; + struct nls_table *nls_codepage; cFYI(1, "In FindFirst for %s", searchName); @@ -4236,6 +4236,9 @@ findFirstRetry: if (rc) return rc; + nls_codepage = cifs_sb->local_nls; + remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; + if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, @@ -4244,24 +4247,29 @@ findFirstRetry: it got remapped to 0xF03A as if it were part of the directory name instead of a wildcard */ name_len *= 2; - pSMB->FileName[name_len] = dirsep; - pSMB->FileName[name_len+1] = 0; - pSMB->FileName[name_len+2] = '*'; - pSMB->FileName[name_len+3] = 0; - name_len += 4; /* now the trailing null */ - pSMB->FileName[name_len] = 0; /* null terminate just in case */ - pSMB->FileName[name_len+1] = 0; - name_len += 2; + if (msearch) { + pSMB->FileName[name_len] = CIFS_DIR_SEP(cifs_sb); + pSMB->FileName[name_len+1] = 0; + pSMB->FileName[name_len+2] = '*'; + pSMB->FileName[name_len+3] = 0; + name_len += 4; /* now the trailing null */ + /* null terminate just in case */ + pSMB->FileName[name_len] = 0; + pSMB->FileName[name_len+1] = 0; + name_len += 2; + } } else { /* BB add check for overrun of SMB buf BB */ name_len = strnlen(searchName, PATH_MAX); /* BB fix here and in unicode clause above ie if (name_len > buffersize-header) free buffer exit; BB */ strncpy(pSMB->FileName, searchName, name_len); - pSMB->FileName[name_len] = dirsep; - pSMB->FileName[name_len+1] = '*'; - pSMB->FileName[name_len+2] = 0; - name_len += 3; + if (msearch) { + pSMB->FileName[name_len] = CIFS_DIR_SEP(cifs_sb); + pSMB->FileName[name_len+1] = '*'; + pSMB->FileName[name_len+2] = 0; + name_len += 3; + } } params = 12 + name_len /* includes null */ ; @@ -4349,7 +4357,8 @@ findFirstRetry: psrch_inf->last_entry = psrch_inf->srch_entries_start + lnoff; - *pnetfid = parms->SearchHandle; + if (pnetfid) + *pnetfid = parms->SearchHandle; } else { cifs_buf_release(pSMB); } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 3d155875f446..afdff79651f1 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -607,7 +607,9 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, FILE_ALL_INFO *data, struct super_block *sb, int xid, const __u16 *fid) { - int rc = 0, tmprc; + bool validinum = false; + __u16 srchflgs; + int rc = 0, tmprc = ENOSYS; struct cifs_tcon *tcon; struct TCP_Server_Info *server; struct tcon_link *tlink; @@ -615,6 +617,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, char *buf = NULL; bool adjust_tz = false; struct cifs_fattr fattr; + struct cifs_search_info *srchinf = NULL; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) @@ -653,9 +656,38 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, } else if (rc == -EREMOTE) { cifs_create_dfs_fattr(&fattr, sb); rc = 0; - } else { + } else if (rc == -EACCES && backup_cred(cifs_sb)) { + srchinf = kzalloc(sizeof(struct cifs_search_info), + GFP_KERNEL); + if (srchinf == NULL) { + rc = -ENOMEM; + goto cgii_exit; + } + + srchinf->endOfSearch = false; + srchinf->info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; + + srchflgs = CIFS_SEARCH_CLOSE_ALWAYS | + CIFS_SEARCH_CLOSE_AT_END | + CIFS_SEARCH_BACKUP_SEARCH; + + rc = CIFSFindFirst(xid, tcon, full_path, + cifs_sb, NULL, srchflgs, srchinf, false); + if (!rc) { + data = + (FILE_ALL_INFO *)srchinf->srch_entries_start; + + cifs_dir_info_to_fattr(&fattr, + (FILE_DIRECTORY_INFO *)data, cifs_sb); + fattr.cf_uniqueid = le64_to_cpu( + ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId); + validinum = true; + + cifs_buf_release(srchinf->ntwrk_buf_start); + } + kfree(srchinf); + } else goto cgii_exit; - } /* * If an inode wasn't passed in, then get the inode number @@ -666,23 +698,21 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, */ if (*inode == NULL) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { - if (server->ops->get_srv_inum) - tmprc = server->ops->get_srv_inum(xid, tcon, - cifs_sb, full_path, &fattr.cf_uniqueid, - data); - else - tmprc = -ENOSYS; - if (tmprc || !fattr.cf_uniqueid) { - cFYI(1, "GetSrvInodeNum rc %d", tmprc); - fattr.cf_uniqueid = iunique(sb, ROOT_I); - cifs_autodisable_serverino(cifs_sb); + if (validinum == false) { + if (server->ops->get_srv_inum) + tmprc = server->ops->get_srv_inum(xid, + tcon, cifs_sb, full_path, + &fattr.cf_uniqueid, data); + if (tmprc) { + cFYI(1, "GetSrvInodeNum rc %d", tmprc); + fattr.cf_uniqueid = iunique(sb, ROOT_I); + cifs_autodisable_serverino(cifs_sb); + } } - } else { + } else fattr.cf_uniqueid = iunique(sb, ROOT_I); - } - } else { + } else fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid; - } /* query for SFU type info if supported and needed */ if (fattr.cf_cifsattrs & ATTR_SYSTEM && diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b0f4a428398d..f9b5d3d6cf33 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -151,7 +151,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) } } -static void +void cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, struct cifs_sb_info *cifs_sb) { diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 42dccbb57c40..56cc4be87807 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -837,10 +837,8 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid, __u16 search_flags, struct cifs_search_info *srch_inf) { - return CIFSFindFirst(xid, tcon, path, cifs_sb->local_nls, - &fid->netfid, search_flags, srch_inf, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); + return CIFSFindFirst(xid, tcon, path, cifs_sb, + &fid->netfid, search_flags, srch_inf, true); } static int -- cgit v1.2.3 From f45ee3a1ea438af96e4fd2c0b16d195e67ef235f Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Fri, 28 Sep 2012 23:21:09 -0400 Subject: ext4: ext4_inode_info diet Generic inode has unused i_private pointer which may be used as cur_aio_dio storage. TODO: If cur_aio_dio will be passed as an argument to get_block_t this allow to have concurent AIO_DIO requests. Reviewed-by: Zheng Liu Reviewed-by: Jan Kara Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 12 ++++++++++-- fs/ext4/extents.c | 4 ++-- fs/ext4/inode.c | 6 +++--- fs/ext4/super.c | 1 - 4 files changed, 15 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8b6902c4d7be..3e8e185e5e22 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -912,8 +912,6 @@ struct ext4_inode_info { struct list_head i_completed_io_list; spinlock_t i_completed_io_lock; atomic_t i_ioend_count; /* Number of outstanding io_end structs */ - /* current io_end structure for async DIO write*/ - ext4_io_end_t *cur_aio_dio; atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */ spinlock_t i_block_reservation_lock; @@ -1338,6 +1336,16 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, } } +static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode) +{ + return inode->i_private; +} + +static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io) +{ + inode->i_private = io; +} + /* * Inode dynamic state flags */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8f08c7b77179..a1f56c3e773b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3618,7 +3618,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, { int ret = 0; int err = 0; - ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; + ext4_io_end_t *io = ext4_inode_aio(inode); ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " "block %llu, max_blocks %u, flags %x, allocated %u\n", @@ -3876,7 +3876,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, unsigned int allocated = 0, offset = 0; unsigned int allocated_clusters = 0; struct ext4_allocation_request ar; - ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; + ext4_io_end_t *io = ext4_inode_aio(inode); ext4_lblk_t cluster_offset; ext_debug("blocks %u/%u requested for inode %lu\n", diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4df5e95801b4..a99588673566 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3056,7 +3056,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, * hook to the iocb. */ iocb->private = NULL; - EXT4_I(inode)->cur_aio_dio = NULL; + ext4_inode_aio_set(inode, NULL); if (!is_sync_kiocb(iocb)) { ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); @@ -3073,7 +3073,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, * is a unwritten extents needs to be converted * when IO is completed. */ - EXT4_I(inode)->cur_aio_dio = iocb->private; + ext4_inode_aio_set(inode, io_end); } if (overwrite) @@ -3093,7 +3093,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, NULL, DIO_LOCKING); if (iocb->private) - EXT4_I(inode)->cur_aio_dio = NULL; + ext4_inode_aio_set(inode, NULL); /* * The io_end structure takes a reference to the inode, * that structure needs to be destroyed and the diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ae456321c5bf..e05267ab1f81 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -965,7 +965,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->jinode = NULL; INIT_LIST_HEAD(&ei->i_completed_io_list); spin_lock_init(&ei->i_completed_io_lock); - ei->cur_aio_dio = NULL; ei->i_sync_tid = 0; ei->i_datasync_tid = 0; atomic_set(&ei->i_ioend_count, 0); -- cgit v1.2.3 From e27f41e1b789e60e7d8cc9c81fd93ca49ef31f13 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Fri, 28 Sep 2012 23:24:52 -0400 Subject: ext4: give i_aiodio_unwritten a more appropriate name AIO/DIO prefix is wrong because it account unwritten extents which also may be scheduled from buffered write endio Reviewed-by: Jan Kara Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 4 ++-- fs/ext4/file.c | 6 +++--- fs/ext4/page-io.c | 2 +- fs/ext4/super.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3e8e185e5e22..6ec9856065d6 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -912,7 +912,7 @@ struct ext4_inode_info { struct list_head i_completed_io_list; spinlock_t i_completed_io_lock; atomic_t i_ioend_count; /* Number of outstanding io_end structs */ - atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */ + atomic_t i_unwritten; /* Nr. of inflight conversions pending */ spinlock_t i_block_reservation_lock; @@ -1332,7 +1332,7 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, { if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { io_end->flag |= EXT4_IO_END_UNWRITTEN; - atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); + atomic_inc(&EXT4_I(inode)->i_unwritten); } } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 3b0e3bdaabfc..39335bda404b 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -55,11 +55,11 @@ static int ext4_release_file(struct inode *inode, struct file *filp) return 0; } -static void ext4_aiodio_wait(struct inode *inode) +static void ext4_unwritten_wait(struct inode *inode) { wait_queue_head_t *wq = ext4_ioend_wq(inode); - wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0)); + wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0)); } /* @@ -116,7 +116,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, "performance will be poor.", inode->i_ino, current->comm); mutex_lock(ext4_aio_mutex(inode)); - ext4_aiodio_wait(inode); + ext4_unwritten_wait(inode); } BUG_ON(iocb->ki_pos != pos); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index dcdeef169a69..de77e31cc119 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -113,7 +113,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io) if (io->flag & EXT4_IO_END_DIRECT) inode_dio_done(inode); /* Wake up anyone waiting on unwritten extent conversion */ - if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) + if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) wake_up_all(ext4_ioend_wq(io->inode)); return ret; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e05267ab1f81..982f6fc22c88 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -968,7 +968,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_sync_tid = 0; ei->i_datasync_tid = 0; atomic_set(&ei->i_ioend_count, 0); - atomic_set(&ei->i_aiodio_unwritten, 0); + atomic_set(&ei->i_unwritten, 0); return &ei->vfs_inode; } -- cgit v1.2.3 From 82e54229118785badffb4ef5ba4803df25fe007f Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Fri, 28 Sep 2012 23:36:25 -0400 Subject: ext4: fix unwritten counter leakage ext4_set_io_unwritten_flag() will increment i_unwritten counter, so once we mark end_io with EXT4_END_IO_UNWRITTEN we have to revert it back on error path. - add missed error checks to prevent counter leakage - ext4_end_io_nolock() will clear EXT4_END_IO_UNWRITTEN flag to signal that conversion finished. - add BUG_ON to ext4_free_end_io() to prevent similar leakage in future. Visible effect of this bug is that unaligned aio_stress may deadlock Reviewed-by: Jan Kara Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 21 ++++++++++++++------- fs/ext4/page-io.c | 6 +++++- 2 files changed, 19 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a1f56c3e773b..54a94426ef7b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3633,6 +3633,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { ret = ext4_split_unwritten_extents(handle, inode, map, path, flags); + if (ret <= 0) + goto out; /* * Flag the inode(non aio case) or end_io struct (aio case) * that this IO needs to conversion to written when IO is @@ -3878,6 +3880,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_allocation_request ar; ext4_io_end_t *io = ext4_inode_aio(inode); ext4_lblk_t cluster_offset; + int set_unwritten = 0; ext_debug("blocks %u/%u requested for inode %lu\n", map->m_lblk, map->m_len, inode->i_ino); @@ -4100,13 +4103,8 @@ got_allocated_blocks: * For non asycn direct IO case, flag the inode state * that we need to perform conversion when IO is done. */ - if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { - if (io) - ext4_set_io_unwritten_flag(inode, io); - else - ext4_set_inode_state(inode, - EXT4_STATE_DIO_UNWRITTEN); - } + if ((flags & EXT4_GET_BLOCKS_PRE_IO)) + set_unwritten = 1; if (ext4_should_dioread_nolock(inode)) map->m_flags |= EXT4_MAP_UNINIT; } @@ -4118,6 +4116,15 @@ got_allocated_blocks: if (!err) err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); + + if (!err && set_unwritten) { + if (io) + ext4_set_io_unwritten_flag(inode, io); + else + ext4_set_inode_state(inode, + EXT4_STATE_DIO_UNWRITTEN); + } + if (err && free_on_err) { int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index de77e31cc119..997002218228 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -71,6 +71,8 @@ void ext4_free_io_end(ext4_io_end_t *io) int i; BUG_ON(!io); + BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); + if (io->page) put_page(io->page); for (i = 0; i < io->num_io_pages; i++) @@ -94,6 +96,8 @@ int ext4_end_io_nolock(ext4_io_end_t *io) ssize_t size = io->size; int ret = 0; + BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN)); + ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," "list->prev 0x%p\n", io, inode->i_ino, io->list.next, io->list.prev); @@ -106,7 +110,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io) "(inode %lu, offset %llu, size %zd, error %d)", inode->i_ino, offset, size, ret); } - + io->flag &= ~EXT4_IO_END_UNWRITTEN; if (io->iocb) aio_complete(io->iocb, io->result, 0); -- cgit v1.2.3 From 28a535f9a0df060569dcc786e5bc2e1de43d7dc7 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sat, 29 Sep 2012 00:14:55 -0400 Subject: ext4: completed_io locking cleanup Current unwritten extent conversion state-machine is very fuzzy. - For unknown reason it performs conversion under i_mutex. What for? My diagnosis: We already protect extent tree with i_data_sem, truncate and punch_hole should wait for DIO, so the only data we have to protect is end_io->flags modification, but only flush_completed_IO and end_io_work modified this flags and we can serialize them via i_completed_io_lock. Currently all these games with mutex_trylock result in the following deadlock truncate: kworker: ext4_setattr ext4_end_io_work mutex_lock(i_mutex) inode_dio_wait(inode) ->BLOCK DEADLOCK<- mutex_trylock() inode_dio_done() #TEST_CASE1_BEGIN MNT=/mnt_scrach unlink $MNT/file fallocate -l $((1024*1024*1024)) $MNT/file aio-stress -I 100000 -O -s 100m -n -t 1 -c 10 -o 2 -o 3 $MNT/file sleep 2 truncate -s 0 $MNT/file #TEST_CASE1_END Or use 286's xfstests https://github.com/dmonakhov/xfstests/blob/devel/286 This patch makes state machine simple and clean: (1) xxx_end_io schedule final extent conversion simply by calling ext4_add_complete_io(), which append it to ei->i_completed_io_list NOTE1: because of (2A) work should be queued only if ->i_completed_io_list was empty, otherwise the work is scheduled already. (2) ext4_flush_completed_IO is responsible for handling all pending end_io from ei->i_completed_io_list Flushing sequence consists of following stages: A) LOCKED: Atomically drain completed_io_list to local_list B) Perform extents conversion C) LOCKED: move converted io's to to_free list for final deletion This logic depends on context which we was called from. D) Final end_io context destruction NOTE1: i_mutex is no longer required because end_io->flags modification is protected by ei->ext4_complete_io_lock Full list of changes: - Move all completion end_io related routines to page-io.c in order to improve logic locality - Move open coded logic from various xx_end_xx routines to ext4_add_complete_io() - remove EXT4_IO_END_FSYNC - Improve SMP scalability by removing useless i_mutex which does not protect io->flags anymore. - Reduce lock contention on i_completed_io_lock by optimizing list walk. - Rename ext4_end_io_nolock to end4_end_io and make it static - Check flush completion status to ext4_ext_punch_hole(). Because it is not good idea to punch blocks from corrupted inode. Changes since V3 (in request to Jan's comments): Fall back to active flush_completed_IO() approach in order to prevent performance issues with nolocked DIO reads. Changes since V2: Fix use-after-free caused by race truncate vs end_io_work Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +- fs/ext4/extents.c | 4 +- fs/ext4/fsync.c | 81 ------------------------- fs/ext4/indirect.c | 6 +- fs/ext4/inode.c | 25 +------- fs/ext4/page-io.c | 171 +++++++++++++++++++++++++++++++++++------------------ 6 files changed, 121 insertions(+), 169 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6ec9856065d6..edb049579420 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -186,7 +186,6 @@ struct mpage_da_data { #define EXT4_IO_END_ERROR 0x0002 #define EXT4_IO_END_QUEUED 0x0004 #define EXT4_IO_END_DIRECT 0x0008 -#define EXT4_IO_END_IN_FSYNC 0x0010 struct ext4_io_page { struct page *p_page; @@ -2418,11 +2417,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, /* page-io.c */ extern int __init ext4_init_pageio(void); +extern void ext4_add_complete_io(ext4_io_end_t *io_end); extern void ext4_exit_pageio(void); extern void ext4_ioend_wait(struct inode *); extern void ext4_free_io_end(ext4_io_end_t *io); extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); -extern int ext4_end_io_nolock(ext4_io_end_t *io); extern void ext4_io_submit(struct ext4_io_submit *io); extern int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 54a94426ef7b..232077439aa8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4833,7 +4833,9 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) } /* finish any pending end_io work */ - ext4_flush_completed_IO(inode); + err = ext4_flush_completed_IO(inode); + if (err) + return err; credits = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, credits); diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 323eb15c2d94..460000868b8e 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -34,87 +34,6 @@ #include -static void dump_completed_IO(struct inode * inode) -{ -#ifdef EXT4FS_DEBUG - struct list_head *cur, *before, *after; - ext4_io_end_t *io, *io0, *io1; - unsigned long flags; - - if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ - ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); - return; - } - - ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); - spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); - list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ - cur = &io->list; - before = cur->prev; - io0 = container_of(before, ext4_io_end_t, list); - after = cur->next; - io1 = container_of(after, ext4_io_end_t, list); - - ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", - io, inode->i_ino, io0, io1); - } - spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); -#endif -} - -/* - * This function is called from ext4_sync_file(). - * - * When IO is completed, the work to convert unwritten extents to - * written is queued on workqueue but may not get immediately - * scheduled. When fsync is called, we need to ensure the - * conversion is complete before fsync returns. - * The inode keeps track of a list of pending/completed IO that - * might needs to do the conversion. This function walks through - * the list and convert the related unwritten extents for completed IO - * to written. - * The function return the number of pending IOs on success. - */ -int ext4_flush_completed_IO(struct inode *inode) -{ - ext4_io_end_t *io; - struct ext4_inode_info *ei = EXT4_I(inode); - unsigned long flags; - int ret = 0; - int ret2 = 0; - - dump_completed_IO(inode); - spin_lock_irqsave(&ei->i_completed_io_lock, flags); - while (!list_empty(&ei->i_completed_io_list)){ - io = list_entry(ei->i_completed_io_list.next, - ext4_io_end_t, list); - list_del_init(&io->list); - io->flag |= EXT4_IO_END_IN_FSYNC; - /* - * Calling ext4_end_io_nolock() to convert completed - * IO to written. - * - * When ext4_sync_file() is called, run_queue() may already - * about to flush the work corresponding to this io structure. - * It will be upset if it founds the io structure related - * to the work-to-be schedule is freed. - * - * Thus we need to keep the io structure still valid here after - * conversion finished. The io structure has a flag to - * avoid double converting from both fsync and background work - * queue work. - */ - spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); - ret = ext4_end_io_nolock(io); - if (ret < 0) - ret2 = ret; - spin_lock_irqsave(&ei->i_completed_io_lock, flags); - io->flag &= ~EXT4_IO_END_IN_FSYNC; - } - spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); - return (ret2 < 0) ? ret2 : 0; -} - /* * If we're not journaling and this is a just-created file, we have to * sync our parent directory (if it was freshly created) since diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 830e1b2bf145..61f13e57975e 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -807,11 +807,9 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, retry: if (rw == READ && ext4_should_dioread_nolock(inode)) { - if (unlikely(!list_empty(&ei->i_completed_io_list))) { - mutex_lock(&inode->i_mutex); + if (unlikely(!list_empty(&ei->i_completed_io_list))) ext4_flush_completed_IO(inode); - mutex_unlock(&inode->i_mutex); - } + ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a99588673566..09d0488e9a15 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2881,9 +2881,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, { struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; ext4_io_end_t *io_end = iocb->private; - struct workqueue_struct *wq; - unsigned long flags; - struct ext4_inode_info *ei; /* if not async direct IO or dio with 0 bytes write, just return */ if (!io_end || !size) @@ -2912,24 +2909,14 @@ out: io_end->iocb = iocb; io_end->result = ret; } - wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; - - /* Add the io_end to per-inode completed aio dio list*/ - ei = EXT4_I(io_end->inode); - spin_lock_irqsave(&ei->i_completed_io_lock, flags); - list_add_tail(&io_end->list, &ei->i_completed_io_list); - spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); - /* queue the work to convert unwritten extents to written */ - queue_work(wq, &io_end->work); + ext4_add_complete_io(io_end); } static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) { ext4_io_end_t *io_end = bh->b_private; - struct workqueue_struct *wq; struct inode *inode; - unsigned long flags; if (!test_clear_buffer_uninit(bh) || !io_end) goto out; @@ -2948,15 +2935,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) */ inode = io_end->inode; ext4_set_io_unwritten_flag(inode, io_end); - - /* Add the io_end to per-inode completed io list*/ - spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); - list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); - spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); - - wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; - /* queue the work to convert unwritten extents to written */ - queue_work(wq, &io_end->work); + ext4_add_complete_io(io_end); out: bh->b_private = NULL; bh->b_end_io = NULL; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 997002218228..5b24c407701b 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -71,6 +71,7 @@ void ext4_free_io_end(ext4_io_end_t *io) int i; BUG_ON(!io); + BUG_ON(!list_empty(&io->list)); BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); if (io->page) @@ -83,21 +84,14 @@ void ext4_free_io_end(ext4_io_end_t *io) kmem_cache_free(io_end_cachep, io); } -/* - * check a range of space and convert unwritten extents to written. - * - * Called with inode->i_mutex; we depend on this when we manipulate - * io->flag, since we could otherwise race with ext4_flush_completed_IO() - */ -int ext4_end_io_nolock(ext4_io_end_t *io) +/* check a range of space and convert unwritten extents to written. */ +static int ext4_end_io(ext4_io_end_t *io) { struct inode *inode = io->inode; loff_t offset = io->offset; ssize_t size = io->size; int ret = 0; - BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN)); - ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," "list->prev 0x%p\n", io, inode->i_ino, io->list.next, io->list.prev); @@ -110,7 +104,6 @@ int ext4_end_io_nolock(ext4_io_end_t *io) "(inode %lu, offset %llu, size %zd, error %d)", inode->i_ino, offset, size, ret); } - io->flag &= ~EXT4_IO_END_UNWRITTEN; if (io->iocb) aio_complete(io->iocb, io->result, 0); @@ -122,51 +115,122 @@ int ext4_end_io_nolock(ext4_io_end_t *io) return ret; } -/* - * work on completed aio dio IO, to convert unwritten extents to extents - */ -static void ext4_end_io_work(struct work_struct *work) +static void dump_completed_IO(struct inode *inode) +{ +#ifdef EXT4FS_DEBUG + struct list_head *cur, *before, *after; + ext4_io_end_t *io, *io0, *io1; + unsigned long flags; + + if (list_empty(&EXT4_I(inode)->i_completed_io_list)) { + ext4_debug("inode %lu completed_io list is empty\n", + inode->i_ino); + return; + } + + ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino); + list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) { + cur = &io->list; + before = cur->prev; + io0 = container_of(before, ext4_io_end_t, list); + after = cur->next; + io1 = container_of(after, ext4_io_end_t, list); + + ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", + io, inode->i_ino, io0, io1); + } +#endif +} + +/* Add the io_end to per-inode completed end_io list. */ +void ext4_add_complete_io(ext4_io_end_t *io_end) { - ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); - struct inode *inode = io->inode; - struct ext4_inode_info *ei = EXT4_I(inode); - unsigned long flags; + struct ext4_inode_info *ei = EXT4_I(io_end->inode); + struct workqueue_struct *wq; + unsigned long flags; + + BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); + wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; spin_lock_irqsave(&ei->i_completed_io_lock, flags); - if (io->flag & EXT4_IO_END_IN_FSYNC) - goto requeue; - if (list_empty(&io->list)) { - spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); - goto free; + if (list_empty(&ei->i_completed_io_list)) { + io_end->flag |= EXT4_IO_END_QUEUED; + queue_work(wq, &io_end->work); } + list_add_tail(&io_end->list, &ei->i_completed_io_list); + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); +} - if (!mutex_trylock(&inode->i_mutex)) { - bool was_queued; -requeue: - was_queued = !!(io->flag & EXT4_IO_END_QUEUED); - io->flag |= EXT4_IO_END_QUEUED; - spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); - /* - * Requeue the work instead of waiting so that the work - * items queued after this can be processed. - */ - queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work); - /* - * To prevent the ext4-dio-unwritten thread from keeping - * requeueing end_io requests and occupying cpu for too long, - * yield the cpu if it sees an end_io request that has already - * been requeued. - */ - if (was_queued) - yield(); - return; +static int ext4_do_flush_completed_IO(struct inode *inode, + ext4_io_end_t *work_io) +{ + ext4_io_end_t *io; + struct list_head unwritten, complete, to_free; + unsigned long flags; + struct ext4_inode_info *ei = EXT4_I(inode); + int err, ret = 0; + + INIT_LIST_HEAD(&complete); + INIT_LIST_HEAD(&to_free); + + spin_lock_irqsave(&ei->i_completed_io_lock, flags); + dump_completed_IO(inode); + list_replace_init(&ei->i_completed_io_list, &unwritten); + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); + + while (!list_empty(&unwritten)) { + io = list_entry(unwritten.next, ext4_io_end_t, list); + BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN)); + list_del_init(&io->list); + + err = ext4_end_io(io); + if (unlikely(!ret && err)) + ret = err; + + list_add_tail(&io->list, &complete); + } + /* It is important to update all flags for all end_io in one shot w/o + * dropping the lock.*/ + spin_lock_irqsave(&ei->i_completed_io_lock, flags); + while (!list_empty(&complete)) { + io = list_entry(complete.next, ext4_io_end_t, list); + io->flag &= ~EXT4_IO_END_UNWRITTEN; + /* end_io context can not be destroyed now because it still + * used by queued worker. Worker thread will destroy it later */ + if (io->flag & EXT4_IO_END_QUEUED) + list_del_init(&io->list); + else + list_move(&io->list, &to_free); + } + /* If we are called from worker context, it is time to clear queued + * flag, and destroy it's end_io if it was converted already */ + if (work_io) { + work_io->flag &= ~EXT4_IO_END_QUEUED; + if (!(work_io->flag & EXT4_IO_END_UNWRITTEN)) + list_add_tail(&work_io->list, &to_free); } - list_del_init(&io->list); spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); - (void) ext4_end_io_nolock(io); - mutex_unlock(&inode->i_mutex); -free: - ext4_free_io_end(io); + + while (!list_empty(&to_free)) { + io = list_entry(to_free.next, ext4_io_end_t, list); + list_del_init(&io->list); + ext4_free_io_end(io); + } + return ret; +} + +/* + * work on completed aio dio IO, to convert unwritten extents to extents + */ +static void ext4_end_io_work(struct work_struct *work) +{ + ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); + ext4_do_flush_completed_IO(io->inode, io); +} + +int ext4_flush_completed_IO(struct inode *inode) +{ + return ext4_do_flush_completed_IO(inode, NULL); } ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) @@ -199,9 +263,7 @@ static void buffer_io_error(struct buffer_head *bh) static void ext4_end_bio(struct bio *bio, int error) { ext4_io_end_t *io_end = bio->bi_private; - struct workqueue_struct *wq; struct inode *inode; - unsigned long flags; int i; sector_t bi_sector = bio->bi_sector; @@ -259,14 +321,7 @@ static void ext4_end_bio(struct bio *bio, int error) return; } - /* Add the io_end to per-inode completed io list*/ - spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); - list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); - spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); - - wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; - /* queue the work to convert unwritten extents to written */ - queue_work(wq, &io_end->work); + ext4_add_complete_io(io_end); } void ext4_io_submit(struct ext4_io_submit *io) -- cgit v1.2.3 From 17335dcc471199717839b2fa3492ca36f70f1168 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sat, 29 Sep 2012 00:41:21 -0400 Subject: ext4: serialize dio nonlocked reads with defrag workers Inode's block defrag and ext4_change_inode_journal_flag() may affect nonlocked DIO reads result, so proper synchronization required. - Add missed inode_dio_wait() calls where appropriate - Check inode state under extra i_dio_count reference. Reviewed-by: Jan Kara Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 17 +++++++++++++++++ fs/ext4/indirect.c | 14 ++++++++++++++ fs/ext4/inode.c | 5 +++++ fs/ext4/move_extent.c | 8 ++++++++ 4 files changed, 44 insertions(+) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index edb049579420..1be2b4472a83 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1358,6 +1358,8 @@ enum { EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ EXT4_STATE_NEWENTRY, /* File just added to dir */ EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ + EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read + nolocking */ }; #define EXT4_INODE_BIT_FNS(name, field, offset) \ @@ -2469,6 +2471,21 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); } +/* + * Disable DIO read nolock optimization, so new dioreaders will be forced + * to grab i_mutex + */ +static inline void ext4_inode_block_unlocked_dio(struct inode *inode) +{ + ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); + smp_mb(); +} +static inline void ext4_inode_resume_unlocked_dio(struct inode *inode) +{ + smp_mb(); + ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); +} + #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) /* For ioend & aio unwritten conversion wait queues */ diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 61f13e57975e..8d849dae8428 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -810,11 +810,25 @@ retry: if (unlikely(!list_empty(&ei->i_completed_io_list))) ext4_flush_completed_IO(inode); + /* + * Nolock dioread optimization may be dynamically disabled + * via ext4_inode_block_unlocked_dio(). Check inode's state + * while holding extra i_dio_count ref. + */ + atomic_inc(&inode->i_dio_count); + smp_mb(); + if (unlikely(ext4_test_inode_state(inode, + EXT4_STATE_DIOREAD_LOCK))) { + inode_dio_done(inode); + goto locked; + } ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ext4_get_block, NULL, NULL, 0); + inode_dio_done(inode); } else { +locked: ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, ext4_get_block); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 09d0488e9a15..bdd399bc2abf 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4720,6 +4720,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) return err; } + /* Wait for all existing dio workers */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + jbd2_journal_lock_updates(journal); /* @@ -4739,6 +4743,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ext4_set_aops(inode); jbd2_journal_unlock_updates(journal); + ext4_inode_resume_unlocked_dio(inode); /* Finally we can mark the inode as dirty. */ diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 8076b96b5299..292daeeed455 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -1323,6 +1323,12 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, /* Protect orig and donor inodes against a truncate */ mext_inode_double_lock(orig_inode, donor_inode); + /* Wait for all existing dio workers */ + ext4_inode_block_unlocked_dio(orig_inode); + ext4_inode_block_unlocked_dio(donor_inode); + inode_dio_wait(orig_inode); + inode_dio_wait(donor_inode); + /* Protect extent tree against block allocations via delalloc */ double_down_write_data_sem(orig_inode, donor_inode); /* Check the filesystem environment whether move_extent can be done */ @@ -1521,6 +1527,8 @@ out: kfree(holecheck_path); } double_up_write_data_sem(orig_inode, donor_inode); + ext4_inode_resume_unlocked_dio(orig_inode); + ext4_inode_resume_unlocked_dio(donor_inode); mext_inode_double_unlock(orig_inode, donor_inode); return ret; -- cgit v1.2.3 From 1c9114f9c0f10f58dd7e568a7152025af47b27e5 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sat, 29 Sep 2012 00:55:23 -0400 Subject: ext4: serialize unlocked dio reads with truncate Current serialization will works only for DIO which holds i_mutex, but nonlocked DIO following race is possible: dio_nolock_read_task truncate_task ->ext4_setattr() ->inode_dio_wait() ->ext4_ext_direct_IO ->ext4_ind_direct_IO ->__blockdev_direct_IO ->ext4_get_block ->truncate_setsize() ->ext4_truncate() #alloc truncated blocks #to other inode ->submit_io() #INFORMATION LEAK In order to serialize with unlocked DIO reads we have to rearrange wait sequence 1) update i_size first 2) if i_size about to be reduced wait for outstanding DIO requests 3) and only after that truncate inode blocks Reviewed-by: Jan Kara Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bdd399bc2abf..0bfc63331467 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4283,7 +4283,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_SIZE) { - inode_dio_wait(inode); if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -4332,8 +4331,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_SIZE) { - if (attr->ia_size != i_size_read(inode)) + if (attr->ia_size != i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); + /* Inode size will be reduced, wait for dio in flight */ + if (orphan) + inode_dio_wait(inode); + } ext4_truncate(inode); } -- cgit v1.2.3 From 1b65007e9870e0021397b548e8cd6bbc584f9152 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sat, 29 Sep 2012 00:56:15 -0400 Subject: ext4: endless truncate due to nonlocked dio readers If we have enough aggressive DIO readers, truncate and other dio waiters will wait forever inside inode_dio_wait(). It is reasonable to disable nonlock DIO read optimization during truncate. Reviewed-by: Jan Kara Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0bfc63331467..05ab70dd5c64 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4333,9 +4333,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE) { if (attr->ia_size != i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); - /* Inode size will be reduced, wait for dio in flight */ - if (orphan) + /* Inode size will be reduced, wait for dio in flight. + * Temporarily disable dioread_nolock to prevent + * livelock. */ + if (orphan) { + ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); + ext4_inode_resume_unlocked_dio(inode); + } } ext4_truncate(inode); } -- cgit v1.2.3 From 1f555cfa29e8f787d675e8390f88ce517a37271a Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sat, 29 Sep 2012 00:58:26 -0400 Subject: ext4: serialize truncate with owerwrite DIO workers Jan Kara have spotted interesting issue: There are potential data corruption issue with direct IO overwrites racing with truncate: Like: dio write truncate_task ->ext4_ext_direct_IO ->overwrite == 1 ->down_read(&EXT4_I(inode)->i_data_sem); ->mutex_unlock(&inode->i_mutex); ->ext4_setattr() ->inode_dio_wait() ->truncate_setsize() ->ext4_truncate() ->down_write(&EXT4_I(inode)->i_data_sem); ->__blockdev_direct_IO ->ext4_get_block ->submit_io() ->up_read(&EXT4_I(inode)->i_data_sem); # truncate data blocks, allocate them to # other inode - bad stuff happens because # dio is still in flight. In order to serialize with truncate dio worker should grab extra i_dio_count reference before drop i_mutex. Reviewed-by: Jan Kara Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 05ab70dd5c64..09308ad0f314 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3010,6 +3010,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, overwrite = *((int *)iocb->private); if (overwrite) { + atomic_inc(&inode->i_dio_count); down_read(&EXT4_I(inode)->i_data_sem); mutex_unlock(&inode->i_mutex); } @@ -3107,6 +3108,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, retake_lock: /* take i_mutex locking again if we do a ovewrite dio */ if (overwrite) { + inode_dio_done(inode); up_read(&EXT4_I(inode)->i_data_sem); mutex_lock(&inode->i_mutex); } -- cgit v1.2.3 From 8110e16d42d587997bcaee0c864179e6d93603fe Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 17 Sep 2012 22:23:30 +0200 Subject: vfs: dcache: fix deadlock in tree traversal IBM reported a deadlock in select_parent(). This was found to be caused by taking rename_lock when already locked when restarting the tree traversal. There are two cases when the traversal needs to be restarted: 1) concurrent d_move(); this can only happen when not already locked, since taking rename_lock protects against concurrent d_move(). 2) racing with final d_put() on child just at the moment of ascending to parent; rename_lock doesn't protect against this rare race, so it can happen when already locked. Because of case 2, we need to be able to handle restarting the traversal when rename_lock is already held. This patch fixes all three callers of try_to_ascend(). IBM reported that the deadlock is gone with this patch. [ I rewrote the patch to be smaller and just do the "goto again" if the lock was already held, but credit goes to Miklos for the real work. - Linus ] Signed-off-by: Miklos Szeredi Cc: Al Viro Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/dcache.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 0364af2311f4..693f95bf1cae 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1134,6 +1134,8 @@ positive: return 1; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -1236,6 +1238,8 @@ out: rename_retry: if (found) return found; + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -3035,6 +3039,8 @@ resume: return; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; -- cgit v1.2.3 From 02d262dffcf4c74e5c4612ee736bdb94f18ed5b9 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sun, 30 Sep 2012 23:03:42 -0400 Subject: ext4: punch_hole should wait for DIO writers punch_hole is the place where we have to wait for all existing writers (writeback, aio, dio), but currently we simply flush pended end_io request which is not sufficient. Other issue is that punch_hole performed w/o i_mutex held which obviously result in dangerous data corruption due to write-after-free. This patch performs following changes: - Guard punch_hole with i_mutex - Recheck inode flags under i_mutex - Block all new dio readers in order to prevent information leak caused by read-after-free pattern. - punch_hole now wait for all writers in flight NOTE: XXX write-after-free race is still possible because new dirty pages may appear due to mmap(), and currently there is no easy way to stop writeback while punch_hole is in progress. [ Fixed error return from ext4_ext_punch_hole() to make sure that we release i_mutex before returning EPERM or ETXTBUSY -- Ted ] Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 53 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 232077439aa8..5920e75fc05f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4794,9 +4794,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) loff_t first_page_offset, last_page_offset; int credits, err = 0; + /* + * Write out all dirty pages to avoid race conditions + * Then release them. + */ + if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { + err = filemap_write_and_wait_range(mapping, + offset, offset + length - 1); + + if (err) + return err; + } + + mutex_lock(&inode->i_mutex); + /* It's not possible punch hole on append only file */ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { + err = -EPERM; + goto out_mutex; + } + if (IS_SWAPFILE(inode)) { + err = -ETXTBSY; + goto out_mutex; + } + /* No need to punch hole beyond i_size */ if (offset >= inode->i_size) - return 0; + goto out_mutex; /* * If the hole extends beyond i_size, set the hole @@ -4814,33 +4837,25 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) first_page_offset = first_page << PAGE_CACHE_SHIFT; last_page_offset = last_page << PAGE_CACHE_SHIFT; - /* - * Write out all dirty pages to avoid race conditions - * Then release them. - */ - if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { - err = filemap_write_and_wait_range(mapping, - offset, offset + length - 1); - - if (err) - return err; - } - /* Now release the pages */ if (last_page_offset > first_page_offset) { truncate_pagecache_range(inode, first_page_offset, last_page_offset - 1); } - /* finish any pending end_io work */ + /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); err = ext4_flush_completed_IO(inode); if (err) - return err; + goto out_dio; credits = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, credits); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out_dio; + } /* @@ -4930,6 +4945,10 @@ out: inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); +out_dio: + ext4_inode_resume_unlocked_dio(inode); +out_mutex: + mutex_unlock(&inode->i_mutex); return err; } int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, -- cgit v1.2.3 From 6f2080e64487b9963f9c6ff8a252e1abce98f2d4 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sun, 30 Sep 2012 23:03:50 -0400 Subject: ext4: fix ext_remove_space for punch_hole case Inode is allowed to have empty leaf only if it this is blockless inode. Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5920e75fc05f..c1fcf489e056 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2589,7 +2589,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, struct ext4_ext_path *path = NULL; ext4_fsblk_t partial_cluster = 0; handle_t *handle; - int i = 0, err; + int i = 0, err = 0; ext_debug("truncate since %u to %u\n", start, end); @@ -2621,12 +2621,16 @@ again: return PTR_ERR(path); } depth = ext_depth(inode); + /* Leaf not may not exist only if inode has no blocks at all */ ex = path[depth].p_ext; if (!ex) { - ext4_ext_drop_refs(path); - kfree(path); - path = NULL; - goto cont; + if (depth) { + EXT4_ERROR_INODE(inode, + "path[%d].p_hdr == NULL", + depth); + err = -EIO; + } + goto out; } ee_block = le32_to_cpu(ex->ee_block); @@ -2658,8 +2662,6 @@ again: goto out; } } -cont: - /* * We start scanning from right side, freeing all the blocks * after i_size and walking into the tree depth-wise. -- cgit v1.2.3 From 041bbb6d369811e948ae01f3d00414264076be35 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 30 Sep 2012 23:04:56 -0400 Subject: ext4: fix mtime update in nodelalloc mode Commits 5e8830dc85d0 and 41c4d25f78c0 introduced a regression into v3.6-rc1 for ext4 in nodealloc mode, such that mtime updates would not take place for files modified via mmap if the page was already in the page cache. This would also affect ext3 file systems mounted using the ext4 file system driver. The problem was that ext4_page_mkwrite() had a shortcut which would avoid calling __block_page_mkwrite() under some circumstances, and the above two commit transferred the responsibility of calling file_update_time() to __block_page_mkwrite --- which woudln't get called in some circumstances. Since __block_page_mkwrite() only has three callers, block_page_mkwrite(), ext4_page_mkwrite, and nilfs_page_mkwrite(), the best way to solve this is to move the responsibility for calling file_update_time() to its caller. This problem was found via xfstests #215 with a file system mounted with -o nodelalloc. Signed-off-by: "Theodore Ts'o" Reviewed-by: Jan Kara Cc: KONISHI Ryusuke Cc: stable@vger.kernel.org --- fs/buffer.c | 13 +++++++------ fs/ext4/inode.c | 1 + fs/nilfs2/file.c | 1 + 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 9f6d2e41281d..1fe3968357a9 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2318,12 +2318,6 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, loff_t size; int ret; - /* - * Update file times before taking page lock. We may end up failing the - * fault so this update may be superfluous but who really cares... - */ - file_update_time(vma->vm_file); - lock_page(page); size = i_size_read(inode); if ((page->mapping != inode->i_mapping) || @@ -2361,6 +2355,13 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; sb_start_pagefault(sb); + + /* + * Update file times before taking page lock. We may end up failing the + * fault so this update may be superfluous but who really cares... + */ + file_update_time(vma->vm_file); + ret = __block_page_mkwrite(vma, vmf, get_block); sb_end_pagefault(sb); return block_page_mkwrite_return(ret); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 09308ad0f314..f18e786e87b0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4788,6 +4788,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) int retries = 0; sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); /* Delalloc case is easy... */ if (test_opt(inode->i_sb, DELALLOC) && !ext4_should_journal_data(inode) && diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index a4d56ac02e6c..5b387a4c293e 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -116,6 +116,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (unlikely(ret)) goto out; + file_update_time(vma->vm_file); ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); if (ret) { nilfs_transaction_abort(inode->i_sb); -- cgit v1.2.3 From e4aa25e7801163df058f62c617b859e9d3d4b148 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 1 Oct 2012 12:26:22 -0500 Subject: [CIFS] Fix SMB2 negotiation support to select only one dialect (based on vers=) Based on whether the user (on mount command) chooses: vers=3.0 (for smb3.0 support) vers=2.1 (for smb2.1 support) or (with subsequent patch, which will allow SMB2 support) vers=2.0 (for original smb2.02 dialect support) send only one dialect at a time during negotiate (we had been sending a list). Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 9 +++++++++ fs/cifs/connect.c | 5 +++++ fs/cifs/smb2ops.c | 19 +++++++++++++++++++ fs/cifs/smb2pdu.c | 40 ++++++++++++---------------------------- fs/cifs/smb2pdu.h | 12 +++++++++++- 5 files changed, 56 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index f6f40635abca..f5af2527fc69 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -179,6 +179,7 @@ struct smb_rqst { enum smb_version { Smb_1 = 1, Smb_21, + Smb_30, }; struct mid_q_entry; @@ -372,6 +373,8 @@ struct smb_version_operations { struct smb_version_values { char *version_string; + __u16 protocol_id; + __u32 req_capabilities; __u32 large_lock_type; __u32 exclusive_lock_type; __u32 shared_lock_type; @@ -1496,7 +1499,13 @@ extern mempool_t *cifs_mid_poolp; #define SMB1_VERSION_STRING "1.0" extern struct smb_version_operations smb1_operations; extern struct smb_version_values smb1_values; +#define SMB20_VERSION_STRING "2.0" +/*extern struct smb_version_operations smb20_operations; */ /* not needed yet */ +extern struct smb_version_values smb20_values; #define SMB21_VERSION_STRING "2.1" extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; +#define SMB30_VERSION_STRING "3.0" +/*extern struct smb_version_operations smb30_operations; */ /* not needed yet */ +extern struct smb_version_values smb30_values; #endif /* _CIFS_GLOB_H */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a792282f02f7..2fdbe08a7a23 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -272,6 +272,7 @@ static const match_table_t cifs_cacheflavor_tokens = { static const match_table_t cifs_smb_version_tokens = { { Smb_1, SMB1_VERSION_STRING }, { Smb_21, SMB21_VERSION_STRING }, + { Smb_30, SMB30_VERSION_STRING }, }; static int ip_connect(struct TCP_Server_Info *server); @@ -1074,6 +1075,10 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) vol->ops = &smb21_operations; vol->vals = &smb21_values; break; + case Smb_30: + vol->ops = &smb21_operations; /* currently identical with 2.1 */ + vol->vals = &smb30_values; + break; #endif default: cERROR(1, "Unknown vers= option specified: %s", value); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 1570cbea4a49..4d9dbe0b7385 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -645,6 +645,25 @@ struct smb_version_operations smb21_operations = { struct smb_version_values smb21_values = { .version_string = SMB21_VERSION_STRING, + .protocol_id = SMB21_PROT_ID, + .req_capabilities = 0, /* MBZ on negotiate req until SMB3 dialect */ + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, + .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, + .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, + .header_size = sizeof(struct smb2_hdr), + .max_header_size = MAX_SMB2_HDR_SIZE, + .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, + .lock_cmd = SMB2_LOCK, + .cap_unix = 0, + .cap_nt_find = SMB2_NT_FIND, + .cap_large_files = SMB2_LARGE_FILES, +}; + +struct smb_version_values smb30_values = { + .version_string = SMB30_VERSION_STRING, + .protocol_id = SMB30_PROT_ID, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5ad88b4b9990..cf33622cdac8 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1,7 +1,7 @@ /* * fs/cifs/smb2pdu.c * - * Copyright (C) International Business Machines Corp., 2009, 2011 + * Copyright (C) International Business Machines Corp., 2009, 2012 * Etersoft, 2012 * Author(s): Steve French (sfrench@us.ibm.com) * Pavel Shilovsky (pshilovsky@samba.org) 2012 @@ -304,24 +304,6 @@ free_rsp_buf(int resp_buftype, void *rsp) cifs_buf_release(rsp); } -#define SMB2_NUM_PROT 2 - -#define SMB2_PROT 0 -#define SMB21_PROT 1 -#define BAD_PROT 0xFFFF - -#define SMB2_PROT_ID 0x0202 -#define SMB21_PROT_ID 0x0210 -#define BAD_PROT_ID 0xFFFF - -static struct { - int index; - __le16 name; -} smb2protocols[] = { - {SMB2_PROT, cpu_to_le16(SMB2_PROT_ID)}, - {SMB21_PROT, cpu_to_le16(SMB21_PROT_ID)}, - {BAD_PROT, cpu_to_le16(BAD_PROT_ID)} -}; /* * @@ -348,7 +330,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) int resp_buftype; struct TCP_Server_Info *server; unsigned int sec_flags; - u16 i; u16 temp = 0; int blob_offset, blob_length; char *security_blob; @@ -377,11 +358,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) req->hdr.SessionId = 0; - for (i = 0; i < SMB2_NUM_PROT; i++) - req->Dialects[i] = smb2protocols[i].name; + req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); - req->DialectCount = cpu_to_le16(i); - inc_rfc1001_len(req, i * 2); + req->DialectCount = cpu_to_le16(1); /* One vers= at a time for now */ + inc_rfc1001_len(req, 2); /* only one of SMB2 signing flags may be set in SMB2 request */ if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) @@ -391,7 +371,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) req->SecurityMode = cpu_to_le16(temp); - req->Capabilities = cpu_to_le32(SMB2_GLOBAL_CAP_DFS); + req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities); memcpy(req->ClientGUID, cifs_client_guid, SMB2_CLIENT_GUID_SIZE); @@ -411,10 +391,14 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) cFYI(1, "mode 0x%x", rsp->SecurityMode); - if (rsp->DialectRevision == smb2protocols[SMB21_PROT].name) + /* BB we may eventually want to match the negotiated vs. requested + dialect, even though we are only requesting one at a time */ + if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) + cFYI(1, "negotiated smb2.0 dialect"); + else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) cFYI(1, "negotiated smb2.1 dialect"); - else if (rsp->DialectRevision == smb2protocols[SMB2_PROT].name) - cFYI(1, "negotiated smb2 dialect"); + else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID)) + cFYI(1, "negotiated smb3.0 dialect"); else { cERROR(1, "Illegal dialect returned by server %d", le16_to_cpu(rsp->DialectRevision)); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index da099225b1a9..4cb4ced258cb 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -163,9 +163,15 @@ struct smb2_negotiate_req { __le32 Capabilities; __u8 ClientGUID[SMB2_CLIENT_GUID_SIZE]; __le64 ClientStartTime; /* MBZ */ - __le16 Dialects[2]; /* variable length */ + __le16 Dialects[1]; /* One dialect (vers=) at a time for now */ } __packed; +/* Dialects */ +#define SMB20_PROT_ID 0x0202 +#define SMB21_PROT_ID 0x0210 +#define SMB30_PROT_ID 0x0300 +#define BAD_PROT_ID 0xFFFF + /* SecurityMode flags */ #define SMB2_NEGOTIATE_SIGNING_ENABLED 0x0001 #define SMB2_NEGOTIATE_SIGNING_REQUIRED 0x0002 @@ -173,6 +179,10 @@ struct smb2_negotiate_req { #define SMB2_GLOBAL_CAP_DFS 0x00000001 #define SMB2_GLOBAL_CAP_LEASING 0x00000002 /* Resp only New to SMB2.1 */ #define SMB2_GLOBAL_CAP_LARGE_MTU 0X00000004 /* Resp only New to SMB2.1 */ +#define SMB2_GLOBAL_CAP_MULTI_CHANNEL 0x00000008 /* New to SMB3 */ +#define SMB2_GLOBAL_CAP_PERSISTENT_HANDLES 0x00000010 /* New to SMB3 */ +#define SMB2_GLOBAL_CAP_DIRECTORY_LEASING 0x00000020 /* New to SMB3 */ +#define SMB2_GLOBAL_CAP_ENCRYPTION 0x00000040 /* New to SMB3 */ /* Internal types */ #define SMB2_NT_FIND 0x00100000 #define SMB2_LARGE_FILES 0x00200000 -- cgit v1.2.3 From 1d4ab9077681b7cce60ff46e3a42fe2dafa0b83d Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 1 Oct 2012 12:48:03 -0500 Subject: [CIFS] Fix indentation of fs/cifs/Kconfig entries make menuconfig for cifs shows multiple entries toward the end of the list with the incorrect indentation (probably a bug in Kconfig parsing of items that are dependant on the module (cifs=m instead of just CONFIG_CIFS). This patch fixes the indentation of all but the last entry (CIFS_ACL) which I don't know how to fix. It also clarifies wording in two places Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/Kconfig | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 253033ec0904..2075ddfffa73 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -15,8 +15,8 @@ config CIFS (CIFS) protocol which is the successor to the Server Message Block (SMB) protocol, the native file sharing mechanism for most early PC operating systems. The CIFS protocol is fully supported by - file servers such as Windows 2000 (including Windows 2003, NT 4 - and Windows XP) as well by Samba (which provides excellent CIFS + file servers such as Windows 2000 (including Windows 2003, Windows 2008, + NT 4 and Windows XP) as well by Samba (which provides excellent CIFS server support for Linux and many other operating systems). Limited support for OS/2 and Windows ME and similar servers is provided as well. @@ -115,6 +115,13 @@ config CIFS_POSIX (such as Samba 3.10 and later) which can negotiate CIFS POSIX ACL support. If unsure, say N. +config CIFS_ACL + bool "Provide CIFS ACL support" + depends on CIFS_XATTR && KEYS + help + Allows fetching CIFS/NTFS ACL from the server. The DACL blob + is handed over to the application/caller. + config CIFS_DEBUG2 bool "Enable additional CIFS debugging routines" depends on CIFS @@ -139,21 +146,6 @@ config CIFS_DFS_UPCALL IP addresses) which is needed for implicit mounts of DFS junction points. If unsure, say N. -config CIFS_FSCACHE - bool "Provide CIFS client caching support" - depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y - help - Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data - to be cached locally on disk through the general filesystem cache - manager. If unsure, say N. - -config CIFS_ACL - bool "Provide CIFS ACL support" - depends on CIFS_XATTR && KEYS - help - Allows to fetch CIFS/NTFS ACL from the server. The DACL blob - is handed over to the application/caller. - config CIFS_NFSD_EXPORT bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" depends on CIFS && EXPERIMENTAL && BROKEN @@ -162,7 +154,7 @@ config CIFS_NFSD_EXPORT config CIFS_SMB2 bool "SMB2 network file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL && INET + depends on CIFS && EXPERIMENTAL && INET select NLS select KEYS select FSCACHE @@ -179,3 +171,12 @@ config CIFS_SMB2 (compared to cifs) due to protocol improvements. Unless you are a developer or tester, say N. + +config CIFS_FSCACHE + bool "Provide CIFS client caching support" + depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y + help + Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data + to be cached locally on disk through the general filesystem cache + manager. If unsure, say N. + -- cgit v1.2.3 From c98f533c9497e285109a047bfb955d683f33f7e4 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 9 Aug 2012 10:33:26 -0700 Subject: ceph: let path portion of mount "device" be optional A recent change to /sbin/mountall causes any trailing '/' character in the "device" (or fs_spec) field in /etc/fstab to be stripped. As a result, an entry for a ceph mount that intends to mount the root of the name space ends up with now path portion, and the ceph mount option processing code rejects this. That is, an entry in /etc/fstab like: cephserver:port:/ /mnt ceph defaults 0 0 provides to the ceph code just "cephserver:port:" as the "device," and that gets rejected. Although this is a bug in /sbin/mountall, we can have the ceph mount code support an empty/nonexistent path, interpreting it to mean the root of the name space. RFC 5952 offers recommendations for how to express IPv6 addresses, and recommends the usage found in RFC 3986 (which specifies the format for URI's) for representing both IPv4 and IPv6 addresses that include port numbers. (See in particular the definition of "authority" found in the Appendix of RFC 3986.) According to those standards, no host specification will ever contain a '/' character. As a result, it is sufficient to scan a provided "device" from an /etc/fstab entry for the first '/' character, and if it's found, treat that as the beginning of the path. If no '/' character is present, we can treat the entire string as the monitor host specification(s), and assume the path to be the root of the name space. We'll still require a ':' to separate the host portion from the (possibly empty) path portion. This means that we can more formally define how ceph will interpret the "device" it's provided when processing a mount request: "device" will look like: [,...]:[] where is [:] is optional, but if present must begin with '/' This addresses http://tracker.newdream.net/issues/2919 Signed-off-by: Alex Elder Reviewed-by: Dan Mick --- fs/ceph/super.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b982239f38f9..2f586b0e5e0f 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -307,7 +307,10 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, { struct ceph_mount_options *fsopt; const char *dev_name_end; - int err = -ENOMEM; + int err; + + if (!dev_name || !*dev_name) + return -EINVAL; fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); if (!fsopt) @@ -328,21 +331,33 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; fsopt->congestion_kb = default_congestion_kb(); - /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ + /* + * Distinguish the server list from the path in "dev_name". + * Internally we do not include the leading '/' in the path. + * + * "dev_name" will look like: + * [,...]:[] + * where + * is [:] + * is optional, but if present must begin with '/' + */ + dev_name_end = strchr(dev_name, '/'); + if (dev_name_end) { + /* skip over leading '/' for path */ + *path = dev_name_end + 1; + } else { + /* path is empty */ + dev_name_end = dev_name + strlen(dev_name); + *path = dev_name_end; + } err = -EINVAL; - if (!dev_name) - goto out; - *path = strstr(dev_name, ":/"); - if (*path == NULL) { - pr_err("device name is missing path (no :/ in %s)\n", + dev_name_end--; /* back up to ':' separator */ + if (*dev_name_end != ':') { + pr_err("device name is missing path (no : separator in %s)\n", dev_name); goto out; } - dev_name_end = *path; dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); - - /* path on server */ - *path += 2; dout("server path '%s'\n", *path); *popt = ceph_parse_options(options, dev_name, dev_name_end, -- cgit v1.2.3 From 3e8f43a089f06279c5f76a9ccd42578eebf7bfa5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 20 Sep 2012 17:42:25 +0800 Subject: ceph: Fix oops when handling mdsmap that decreases max_mds When i >= newmap->m_max_mds, ceph_mdsmap_get_addr(newmap, i) return NULL. Passing NULL to memcmp() triggers oops. Signed-off-by: Yan, Zheng Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a5a735422aa7..1bcf712655d9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2625,7 +2625,8 @@ static void check_new_map(struct ceph_mds_client *mdsc, ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", session_state_name(s->s_state)); - if (memcmp(ceph_mdsmap_get_addr(oldmap, i), + if (i >= newmap->m_max_mds || + memcmp(ceph_mdsmap_get_addr(oldmap, i), ceph_mdsmap_get_addr(newmap, i), sizeof(struct ceph_entity_addr))) { if (s->s_state == CEPH_MDS_SESSION_OPENING) { -- cgit v1.2.3 From b905a7f8b7a61c192927d0324f2ea6c998f451ba Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 28 Sep 2012 12:59:16 +0800 Subject: ceph: convert to use le32_add_cpu() Convert cpu_to_le32(le32_to_cpu(E1) + E2) to use le32_add_cpu(). dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: Sage Weil --- fs/ceph/caps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 620daad201db..3251e9cc6401 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1005,7 +1005,7 @@ static void __queue_cap_release(struct ceph_mds_session *session, BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); head = msg->front.iov_base; - head->num = cpu_to_le32(le32_to_cpu(head->num) + 1); + le32_add_cpu(&head->num, 1); item = msg->front.iov_base + msg->front.iov_len; item->ino = cpu_to_le64(ino); item->cap_id = cpu_to_le64(cap_id); -- cgit v1.2.3 From 6816282dab3a72efe8c0d182c1bc2960d87f4322 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 Sep 2012 21:01:02 -0700 Subject: ceph: propagate layout error on osd request creation If we are creating an osd request and get an invalid layout, return an EINVAL to the caller. We switch up the return to have an error code instead of NULL implying -ENOMEM. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/addr.c | 8 ++++---- fs/ceph/file.c | 4 ++-- net/ceph/osd_client.c | 15 +++++++++------ 3 files changed, 15 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 452e71a1b753..4469b63c9b7b 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -308,8 +308,8 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) NULL, 0, ci->i_truncate_seq, ci->i_truncate_size, NULL, false, 1, 0); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); /* build page vector */ nr_pages = len >> PAGE_CACHE_SHIFT; @@ -832,8 +832,8 @@ get_more_pages: ci->i_truncate_size, &inode->i_mtime, true, 1, 0); - if (!req) { - rc = -ENOMEM; + if (IS_ERR(req)) { + rc = PTR_ERR(req); unlock_page(page); break; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ecebbc09bfc7..5840d2aaed15 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -536,8 +536,8 @@ more: do_sync, ci->i_truncate_seq, ci->i_truncate_size, &mtime, false, 2, page_align); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); if (file->f_flags & O_DIRECT) { pages = ceph_get_direct_page_vector(data, num_pages, false); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index f7b56e23988c..ccbdfbba9e53 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -464,6 +464,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, { struct ceph_osd_req_op ops[3]; struct ceph_osd_request *req; + int r; ops[0].op = opcode; ops[0].extent.truncate_seq = truncate_seq; @@ -482,10 +483,12 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, use_mempool, GFP_NOFS, NULL, NULL); if (!req) - return NULL; + return ERR_PTR(-ENOMEM); /* calculate max write size */ - calc_layout(osdc, vino, layout, off, plen, req, ops); + r = calc_layout(osdc, vino, layout, off, plen, req, ops); + if (r < 0) + return ERR_PTR(r); req->r_file_layout = *layout; /* keep a copy */ /* in case it differs from natural (file) alignment that @@ -1928,8 +1931,8 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, truncate_seq, truncate_size, NULL, false, 1, page_align); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); /* it may be a short read due to an object boundary */ req->r_pages = pages; @@ -1971,8 +1974,8 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, snapc, do_sync, truncate_seq, truncate_size, mtime, nofail, 1, page_align); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); /* it may be a short write due to an object boundary */ req->r_pages = pages; -- cgit v1.2.3 From 10c28d937e2cca577c2d804106b50dd0562fb062 Mon Sep 17 00:00:00 2001 From: Alex Kelly Date: Wed, 26 Sep 2012 21:52:08 -0400 Subject: coredump: move core dump functionality into its own file This prepares for making core dump functionality optional. The variable "suid_dumpable" and associated functions are left in fs/exec.c because they're used elsewhere, such as in ptrace. Signed-off-by: Alex Kelly Reviewed-by: Josh Triplett Acked-by: Serge Hallyn Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/Makefile | 2 +- fs/coredump.c | 686 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/exec.c | 645 +---------------------------------------------- include/linux/sched.h | 1 + 4 files changed, 689 insertions(+), 645 deletions(-) create mode 100644 fs/coredump.c (limited to 'fs') diff --git a/fs/Makefile b/fs/Makefile index 2fb977934673..8938f8250320 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o drop_caches.o splice.o sync.o utimes.o \ - stack.o fs_struct.o statfs.o + stack.o fs_struct.o statfs.o coredump.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o diff --git a/fs/coredump.c b/fs/coredump.c new file mode 100644 index 000000000000..f045bbad6822 --- /dev/null +++ b/fs/coredump.c @@ -0,0 +1,686 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include "internal.h" + +#include + +int core_uses_pid; +char core_pattern[CORENAME_MAX_SIZE] = "core"; +unsigned int core_pipe_limit; + +struct core_name { + char *corename; + int used, size; +}; +static atomic_t call_count = ATOMIC_INIT(1); + +/* The maximal length of core_pattern is also specified in sysctl.c */ + +static int expand_corename(struct core_name *cn) +{ + char *old_corename = cn->corename; + + cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); + cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); + + if (!cn->corename) { + kfree(old_corename); + return -ENOMEM; + } + + return 0; +} + +static int cn_printf(struct core_name *cn, const char *fmt, ...) +{ + char *cur; + int need; + int ret; + va_list arg; + + va_start(arg, fmt); + need = vsnprintf(NULL, 0, fmt, arg); + va_end(arg); + + if (likely(need < cn->size - cn->used - 1)) + goto out_printf; + + ret = expand_corename(cn); + if (ret) + goto expand_fail; + +out_printf: + cur = cn->corename + cn->used; + va_start(arg, fmt); + vsnprintf(cur, need + 1, fmt, arg); + va_end(arg); + cn->used += need; + return 0; + +expand_fail: + return ret; +} + +static void cn_escape(char *str) +{ + for (; *str; str++) + if (*str == '/') + *str = '!'; +} + +static int cn_print_exe_file(struct core_name *cn) +{ + struct file *exe_file; + char *pathbuf, *path; + int ret; + + exe_file = get_mm_exe_file(current->mm); + if (!exe_file) { + char *commstart = cn->corename + cn->used; + ret = cn_printf(cn, "%s (path unknown)", current->comm); + cn_escape(commstart); + return ret; + } + + pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); + if (!pathbuf) { + ret = -ENOMEM; + goto put_exe_file; + } + + path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); + if (IS_ERR(path)) { + ret = PTR_ERR(path); + goto free_buf; + } + + cn_escape(path); + + ret = cn_printf(cn, "%s", path); + +free_buf: + kfree(pathbuf); +put_exe_file: + fput(exe_file); + return ret; +} + +/* format_corename will inspect the pattern parameter, and output a + * name into corename, which must have space for at least + * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. + */ +static int format_corename(struct core_name *cn, long signr) +{ + const struct cred *cred = current_cred(); + const char *pat_ptr = core_pattern; + int ispipe = (*pat_ptr == '|'); + int pid_in_pattern = 0; + int err = 0; + + cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); + cn->corename = kmalloc(cn->size, GFP_KERNEL); + cn->used = 0; + + if (!cn->corename) + return -ENOMEM; + + /* Repeat as long as we have more pattern to process and more output + space */ + while (*pat_ptr) { + if (*pat_ptr != '%') { + if (*pat_ptr == 0) + goto out; + err = cn_printf(cn, "%c", *pat_ptr++); + } else { + switch (*++pat_ptr) { + /* single % at the end, drop that */ + case 0: + goto out; + /* Double percent, output one percent */ + case '%': + err = cn_printf(cn, "%c", '%'); + break; + /* pid */ + case 'p': + pid_in_pattern = 1; + err = cn_printf(cn, "%d", + task_tgid_vnr(current)); + break; + /* uid */ + case 'u': + err = cn_printf(cn, "%d", cred->uid); + break; + /* gid */ + case 'g': + err = cn_printf(cn, "%d", cred->gid); + break; + /* signal that caused the coredump */ + case 's': + err = cn_printf(cn, "%ld", signr); + break; + /* UNIX time of coredump */ + case 't': { + struct timeval tv; + do_gettimeofday(&tv); + err = cn_printf(cn, "%lu", tv.tv_sec); + break; + } + /* hostname */ + case 'h': { + char *namestart = cn->corename + cn->used; + down_read(&uts_sem); + err = cn_printf(cn, "%s", + utsname()->nodename); + up_read(&uts_sem); + cn_escape(namestart); + break; + } + /* executable */ + case 'e': { + char *commstart = cn->corename + cn->used; + err = cn_printf(cn, "%s", current->comm); + cn_escape(commstart); + break; + } + case 'E': + err = cn_print_exe_file(cn); + break; + /* core limit size */ + case 'c': + err = cn_printf(cn, "%lu", + rlimit(RLIMIT_CORE)); + break; + default: + break; + } + ++pat_ptr; + } + + if (err) + return err; + } + + /* Backward compatibility with core_uses_pid: + * + * If core_pattern does not include a %p (as is the default) + * and core_uses_pid is set, then .%pid will be appended to + * the filename. Do not do this for piped commands. */ + if (!ispipe && !pid_in_pattern && core_uses_pid) { + err = cn_printf(cn, ".%d", task_tgid_vnr(current)); + if (err) + return err; + } +out: + return ispipe; +} + +static int zap_process(struct task_struct *start, int exit_code) +{ + struct task_struct *t; + int nr = 0; + + start->signal->flags = SIGNAL_GROUP_EXIT; + start->signal->group_exit_code = exit_code; + start->signal->group_stop_count = 0; + + t = start; + do { + task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); + if (t != current && t->mm) { + sigaddset(&t->pending.signal, SIGKILL); + signal_wake_up(t, 1); + nr++; + } + } while_each_thread(start, t); + + return nr; +} + +static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, + struct core_state *core_state, int exit_code) +{ + struct task_struct *g, *p; + unsigned long flags; + int nr = -EAGAIN; + + spin_lock_irq(&tsk->sighand->siglock); + if (!signal_group_exit(tsk->signal)) { + mm->core_state = core_state; + nr = zap_process(tsk, exit_code); + } + spin_unlock_irq(&tsk->sighand->siglock); + if (unlikely(nr < 0)) + return nr; + + if (atomic_read(&mm->mm_users) == nr + 1) + goto done; + /* + * We should find and kill all tasks which use this mm, and we should + * count them correctly into ->nr_threads. We don't take tasklist + * lock, but this is safe wrt: + * + * fork: + * None of sub-threads can fork after zap_process(leader). All + * processes which were created before this point should be + * visible to zap_threads() because copy_process() adds the new + * process to the tail of init_task.tasks list, and lock/unlock + * of ->siglock provides a memory barrier. + * + * do_exit: + * The caller holds mm->mmap_sem. This means that the task which + * uses this mm can't pass exit_mm(), so it can't exit or clear + * its ->mm. + * + * de_thread: + * It does list_replace_rcu(&leader->tasks, ¤t->tasks), + * we must see either old or new leader, this does not matter. + * However, it can change p->sighand, so lock_task_sighand(p) + * must be used. Since p->mm != NULL and we hold ->mmap_sem + * it can't fail. + * + * Note also that "g" can be the old leader with ->mm == NULL + * and already unhashed and thus removed from ->thread_group. + * This is OK, __unhash_process()->list_del_rcu() does not + * clear the ->next pointer, we will find the new leader via + * next_thread(). + */ + rcu_read_lock(); + for_each_process(g) { + if (g == tsk->group_leader) + continue; + if (g->flags & PF_KTHREAD) + continue; + p = g; + do { + if (p->mm) { + if (unlikely(p->mm == mm)) { + lock_task_sighand(p, &flags); + nr += zap_process(p, exit_code); + unlock_task_sighand(p, &flags); + } + break; + } + } while_each_thread(g, p); + } + rcu_read_unlock(); +done: + atomic_set(&core_state->nr_threads, nr); + return nr; +} + +static int coredump_wait(int exit_code, struct core_state *core_state) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + int core_waiters = -EBUSY; + + init_completion(&core_state->startup); + core_state->dumper.task = tsk; + core_state->dumper.next = NULL; + + down_write(&mm->mmap_sem); + if (!mm->core_state) + core_waiters = zap_threads(tsk, mm, core_state, exit_code); + up_write(&mm->mmap_sem); + + if (core_waiters > 0) { + struct core_thread *ptr; + + wait_for_completion(&core_state->startup); + /* + * Wait for all the threads to become inactive, so that + * all the thread context (extended register state, like + * fpu etc) gets copied to the memory. + */ + ptr = core_state->dumper.next; + while (ptr != NULL) { + wait_task_inactive(ptr->task, 0); + ptr = ptr->next; + } + } + + return core_waiters; +} + +static void coredump_finish(struct mm_struct *mm) +{ + struct core_thread *curr, *next; + struct task_struct *task; + + next = mm->core_state->dumper.next; + while ((curr = next) != NULL) { + next = curr->next; + task = curr->task; + /* + * see exit_mm(), curr->task must not see + * ->task == NULL before we read ->next. + */ + smp_mb(); + curr->task = NULL; + wake_up_process(task); + } + + mm->core_state = NULL; +} + +static void wait_for_dump_helpers(struct file *file) +{ + struct pipe_inode_info *pipe; + + pipe = file->f_path.dentry->d_inode->i_pipe; + + pipe_lock(pipe); + pipe->readers++; + pipe->writers--; + + while ((pipe->readers > 1) && (!signal_pending(current))) { + wake_up_interruptible_sync(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + pipe_wait(pipe); + } + + pipe->readers--; + pipe->writers++; + pipe_unlock(pipe); + +} + +/* + * umh_pipe_setup + * helper function to customize the process used + * to collect the core in userspace. Specifically + * it sets up a pipe and installs it as fd 0 (stdin) + * for the process. Returns 0 on success, or + * PTR_ERR on failure. + * Note that it also sets the core limit to 1. This + * is a special value that we use to trap recursive + * core dumps + */ +static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) +{ + struct file *files[2]; + struct coredump_params *cp = (struct coredump_params *)info->data; + int err = create_pipe_files(files, 0); + if (err) + return err; + + cp->file = files[1]; + + replace_fd(0, files[0], 0); + /* and disallow core files too */ + current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; + + return 0; +} + +void do_coredump(long signr, int exit_code, struct pt_regs *regs) +{ + struct core_state core_state; + struct core_name cn; + struct mm_struct *mm = current->mm; + struct linux_binfmt * binfmt; + const struct cred *old_cred; + struct cred *cred; + int retval = 0; + int flag = 0; + int ispipe; + struct files_struct *displaced; + bool need_nonrelative = false; + static atomic_t core_dump_count = ATOMIC_INIT(0); + struct coredump_params cprm = { + .signr = signr, + .regs = regs, + .limit = rlimit(RLIMIT_CORE), + /* + * We must use the same mm->flags while dumping core to avoid + * inconsistency of bit flags, since this flag is not protected + * by any locks. + */ + .mm_flags = mm->flags, + }; + + audit_core_dumps(signr); + + binfmt = mm->binfmt; + if (!binfmt || !binfmt->core_dump) + goto fail; + if (!__get_dumpable(cprm.mm_flags)) + goto fail; + + cred = prepare_creds(); + if (!cred) + goto fail; + /* + * We cannot trust fsuid as being the "true" uid of the process + * nor do we know its entire history. We only know it was tainted + * so we dump it as root in mode 2, and only into a controlled + * environment (pipe handler or fully qualified path). + */ + if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { + /* Setuid core dump mode */ + flag = O_EXCL; /* Stop rewrite attacks */ + cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ + need_nonrelative = true; + } + + retval = coredump_wait(exit_code, &core_state); + if (retval < 0) + goto fail_creds; + + old_cred = override_creds(cred); + + /* + * Clear any false indication of pending signals that might + * be seen by the filesystem code called to write the core file. + */ + clear_thread_flag(TIF_SIGPENDING); + + ispipe = format_corename(&cn, signr); + + if (ispipe) { + int dump_count; + char **helper_argv; + + if (ispipe < 0) { + printk(KERN_WARNING "format_corename failed\n"); + printk(KERN_WARNING "Aborting core\n"); + goto fail_corename; + } + + if (cprm.limit == 1) { + /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. + * + * Normally core limits are irrelevant to pipes, since + * we're not writing to the file system, but we use + * cprm.limit of 1 here as a speacial value, this is a + * consistent way to catch recursive crashes. + * We can still crash if the core_pattern binary sets + * RLIM_CORE = !1, but it runs as root, and can do + * lots of stupid things. + * + * Note that we use task_tgid_vnr here to grab the pid + * of the process group leader. That way we get the + * right pid if a thread in a multi-threaded + * core_pattern process dies. + */ + printk(KERN_WARNING + "Process %d(%s) has RLIMIT_CORE set to 1\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Aborting core\n"); + goto fail_unlock; + } + cprm.limit = RLIM_INFINITY; + + dump_count = atomic_inc_return(&core_dump_count); + if (core_pipe_limit && (core_pipe_limit < dump_count)) { + printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Skipping core dump\n"); + goto fail_dropcount; + } + + helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); + if (!helper_argv) { + printk(KERN_WARNING "%s failed to allocate memory\n", + __func__); + goto fail_dropcount; + } + + retval = call_usermodehelper_fns(helper_argv[0], helper_argv, + NULL, UMH_WAIT_EXEC, umh_pipe_setup, + NULL, &cprm); + argv_free(helper_argv); + if (retval) { + printk(KERN_INFO "Core dump to %s pipe failed\n", + cn.corename); + goto close_fail; + } + } else { + struct inode *inode; + + if (cprm.limit < binfmt->min_coredump) + goto fail_unlock; + + if (need_nonrelative && cn.corename[0] != '/') { + printk(KERN_WARNING "Pid %d(%s) can only dump core "\ + "to fully qualified path!\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Skipping core dump\n"); + goto fail_unlock; + } + + cprm.file = filp_open(cn.corename, + O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, + 0600); + if (IS_ERR(cprm.file)) + goto fail_unlock; + + inode = cprm.file->f_path.dentry->d_inode; + if (inode->i_nlink > 1) + goto close_fail; + if (d_unhashed(cprm.file->f_path.dentry)) + goto close_fail; + /* + * AK: actually i see no reason to not allow this for named + * pipes etc, but keep the previous behaviour for now. + */ + if (!S_ISREG(inode->i_mode)) + goto close_fail; + /* + * Dont allow local users get cute and trick others to coredump + * into their pre-created files. + */ + if (!uid_eq(inode->i_uid, current_fsuid())) + goto close_fail; + if (!cprm.file->f_op || !cprm.file->f_op->write) + goto close_fail; + if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) + goto close_fail; + } + + /* get us an unshared descriptor table; almost always a no-op */ + retval = unshare_files(&displaced); + if (retval) + goto close_fail; + if (displaced) + put_files_struct(displaced); + retval = binfmt->core_dump(&cprm); + if (retval) + current->signal->group_exit_code |= 0x80; + + if (ispipe && core_pipe_limit) + wait_for_dump_helpers(cprm.file); +close_fail: + if (cprm.file) + filp_close(cprm.file, NULL); +fail_dropcount: + if (ispipe) + atomic_dec(&core_dump_count); +fail_unlock: + kfree(cn.corename); +fail_corename: + coredump_finish(mm); + revert_creds(old_cred); +fail_creds: + put_cred(cred); +fail: + return; +} + +/* + * Core dumping helper functions. These are the only things you should + * do on a core-file: use only these functions to write out all the + * necessary info. + */ +int dump_write(struct file *file, const void *addr, int nr) +{ + return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; +} +EXPORT_SYMBOL(dump_write); + +int dump_seek(struct file *file, loff_t off) +{ + int ret = 1; + + if (file->f_op->llseek && file->f_op->llseek != no_llseek) { + if (file->f_op->llseek(file, off, SEEK_CUR) < 0) + return 0; + } else { + char *buf = (char *)get_zeroed_page(GFP_KERNEL); + + if (!buf) + return 0; + while (off > 0) { + unsigned long n = off; + + if (n > PAGE_SIZE) + n = PAGE_SIZE; + if (!dump_write(file, buf, n)) { + ret = 0; + break; + } + off -= n; + } + free_page((unsigned long)buf); + } + return ret; +} +EXPORT_SYMBOL(dump_seek); diff --git a/fs/exec.c b/fs/exec.c index beb05a95e4a3..48fb26ef8a1b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -66,19 +66,8 @@ #include -int core_uses_pid; -char core_pattern[CORENAME_MAX_SIZE] = "core"; -unsigned int core_pipe_limit; int suid_dumpable = 0; -struct core_name { - char *corename; - int used, size; -}; -static atomic_t call_count = ATOMIC_INIT(1); - -/* The maximal length of core_pattern is also specified in sysctl.c */ - static LIST_HEAD(formats); static DEFINE_RWLOCK(binfmt_lock); @@ -1603,353 +1592,6 @@ void set_binfmt(struct linux_binfmt *new) EXPORT_SYMBOL(set_binfmt); -static int expand_corename(struct core_name *cn) -{ - char *old_corename = cn->corename; - - cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); - cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); - - if (!cn->corename) { - kfree(old_corename); - return -ENOMEM; - } - - return 0; -} - -static int cn_printf(struct core_name *cn, const char *fmt, ...) -{ - char *cur; - int need; - int ret; - va_list arg; - - va_start(arg, fmt); - need = vsnprintf(NULL, 0, fmt, arg); - va_end(arg); - - if (likely(need < cn->size - cn->used - 1)) - goto out_printf; - - ret = expand_corename(cn); - if (ret) - goto expand_fail; - -out_printf: - cur = cn->corename + cn->used; - va_start(arg, fmt); - vsnprintf(cur, need + 1, fmt, arg); - va_end(arg); - cn->used += need; - return 0; - -expand_fail: - return ret; -} - -static void cn_escape(char *str) -{ - for (; *str; str++) - if (*str == '/') - *str = '!'; -} - -static int cn_print_exe_file(struct core_name *cn) -{ - struct file *exe_file; - char *pathbuf, *path; - int ret; - - exe_file = get_mm_exe_file(current->mm); - if (!exe_file) { - char *commstart = cn->corename + cn->used; - ret = cn_printf(cn, "%s (path unknown)", current->comm); - cn_escape(commstart); - return ret; - } - - pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); - if (!pathbuf) { - ret = -ENOMEM; - goto put_exe_file; - } - - path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); - if (IS_ERR(path)) { - ret = PTR_ERR(path); - goto free_buf; - } - - cn_escape(path); - - ret = cn_printf(cn, "%s", path); - -free_buf: - kfree(pathbuf); -put_exe_file: - fput(exe_file); - return ret; -} - -/* format_corename will inspect the pattern parameter, and output a - * name into corename, which must have space for at least - * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. - */ -static int format_corename(struct core_name *cn, long signr) -{ - const struct cred *cred = current_cred(); - const char *pat_ptr = core_pattern; - int ispipe = (*pat_ptr == '|'); - int pid_in_pattern = 0; - int err = 0; - - cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); - cn->corename = kmalloc(cn->size, GFP_KERNEL); - cn->used = 0; - - if (!cn->corename) - return -ENOMEM; - - /* Repeat as long as we have more pattern to process and more output - space */ - while (*pat_ptr) { - if (*pat_ptr != '%') { - if (*pat_ptr == 0) - goto out; - err = cn_printf(cn, "%c", *pat_ptr++); - } else { - switch (*++pat_ptr) { - /* single % at the end, drop that */ - case 0: - goto out; - /* Double percent, output one percent */ - case '%': - err = cn_printf(cn, "%c", '%'); - break; - /* pid */ - case 'p': - pid_in_pattern = 1; - err = cn_printf(cn, "%d", - task_tgid_vnr(current)); - break; - /* uid */ - case 'u': - err = cn_printf(cn, "%d", cred->uid); - break; - /* gid */ - case 'g': - err = cn_printf(cn, "%d", cred->gid); - break; - /* signal that caused the coredump */ - case 's': - err = cn_printf(cn, "%ld", signr); - break; - /* UNIX time of coredump */ - case 't': { - struct timeval tv; - do_gettimeofday(&tv); - err = cn_printf(cn, "%lu", tv.tv_sec); - break; - } - /* hostname */ - case 'h': { - char *namestart = cn->corename + cn->used; - down_read(&uts_sem); - err = cn_printf(cn, "%s", - utsname()->nodename); - up_read(&uts_sem); - cn_escape(namestart); - break; - } - /* executable */ - case 'e': { - char *commstart = cn->corename + cn->used; - err = cn_printf(cn, "%s", current->comm); - cn_escape(commstart); - break; - } - case 'E': - err = cn_print_exe_file(cn); - break; - /* core limit size */ - case 'c': - err = cn_printf(cn, "%lu", - rlimit(RLIMIT_CORE)); - break; - default: - break; - } - ++pat_ptr; - } - - if (err) - return err; - } - - /* Backward compatibility with core_uses_pid: - * - * If core_pattern does not include a %p (as is the default) - * and core_uses_pid is set, then .%pid will be appended to - * the filename. Do not do this for piped commands. */ - if (!ispipe && !pid_in_pattern && core_uses_pid) { - err = cn_printf(cn, ".%d", task_tgid_vnr(current)); - if (err) - return err; - } -out: - return ispipe; -} - -static int zap_process(struct task_struct *start, int exit_code) -{ - struct task_struct *t; - int nr = 0; - - start->signal->flags = SIGNAL_GROUP_EXIT; - start->signal->group_exit_code = exit_code; - start->signal->group_stop_count = 0; - - t = start; - do { - task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); - if (t != current && t->mm) { - sigaddset(&t->pending.signal, SIGKILL); - signal_wake_up(t, 1); - nr++; - } - } while_each_thread(start, t); - - return nr; -} - -static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, - struct core_state *core_state, int exit_code) -{ - struct task_struct *g, *p; - unsigned long flags; - int nr = -EAGAIN; - - spin_lock_irq(&tsk->sighand->siglock); - if (!signal_group_exit(tsk->signal)) { - mm->core_state = core_state; - nr = zap_process(tsk, exit_code); - } - spin_unlock_irq(&tsk->sighand->siglock); - if (unlikely(nr < 0)) - return nr; - - if (atomic_read(&mm->mm_users) == nr + 1) - goto done; - /* - * We should find and kill all tasks which use this mm, and we should - * count them correctly into ->nr_threads. We don't take tasklist - * lock, but this is safe wrt: - * - * fork: - * None of sub-threads can fork after zap_process(leader). All - * processes which were created before this point should be - * visible to zap_threads() because copy_process() adds the new - * process to the tail of init_task.tasks list, and lock/unlock - * of ->siglock provides a memory barrier. - * - * do_exit: - * The caller holds mm->mmap_sem. This means that the task which - * uses this mm can't pass exit_mm(), so it can't exit or clear - * its ->mm. - * - * de_thread: - * It does list_replace_rcu(&leader->tasks, ¤t->tasks), - * we must see either old or new leader, this does not matter. - * However, it can change p->sighand, so lock_task_sighand(p) - * must be used. Since p->mm != NULL and we hold ->mmap_sem - * it can't fail. - * - * Note also that "g" can be the old leader with ->mm == NULL - * and already unhashed and thus removed from ->thread_group. - * This is OK, __unhash_process()->list_del_rcu() does not - * clear the ->next pointer, we will find the new leader via - * next_thread(). - */ - rcu_read_lock(); - for_each_process(g) { - if (g == tsk->group_leader) - continue; - if (g->flags & PF_KTHREAD) - continue; - p = g; - do { - if (p->mm) { - if (unlikely(p->mm == mm)) { - lock_task_sighand(p, &flags); - nr += zap_process(p, exit_code); - unlock_task_sighand(p, &flags); - } - break; - } - } while_each_thread(g, p); - } - rcu_read_unlock(); -done: - atomic_set(&core_state->nr_threads, nr); - return nr; -} - -static int coredump_wait(int exit_code, struct core_state *core_state) -{ - struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; - int core_waiters = -EBUSY; - - init_completion(&core_state->startup); - core_state->dumper.task = tsk; - core_state->dumper.next = NULL; - - down_write(&mm->mmap_sem); - if (!mm->core_state) - core_waiters = zap_threads(tsk, mm, core_state, exit_code); - up_write(&mm->mmap_sem); - - if (core_waiters > 0) { - struct core_thread *ptr; - - wait_for_completion(&core_state->startup); - /* - * Wait for all the threads to become inactive, so that - * all the thread context (extended register state, like - * fpu etc) gets copied to the memory. - */ - ptr = core_state->dumper.next; - while (ptr != NULL) { - wait_task_inactive(ptr->task, 0); - ptr = ptr->next; - } - } - - return core_waiters; -} - -static void coredump_finish(struct mm_struct *mm) -{ - struct core_thread *curr, *next; - struct task_struct *task; - - next = mm->core_state->dumper.next; - while ((curr = next) != NULL) { - next = curr->next; - task = curr->task; - /* - * see exit_mm(), curr->task must not see - * ->task == NULL before we read ->next. - */ - smp_mb(); - curr->task = NULL; - wake_up_process(task); - } - - mm->core_state = NULL; -} - /* * set_dumpable converts traditional three-value dumpable to two flags and * stores them into mm->flags. It modifies lower two bits of mm->flags, but @@ -1991,7 +1633,7 @@ void set_dumpable(struct mm_struct *mm, int value) } } -static int __get_dumpable(unsigned long mm_flags) +int __get_dumpable(unsigned long mm_flags) { int ret; @@ -2003,288 +1645,3 @@ int get_dumpable(struct mm_struct *mm) { return __get_dumpable(mm->flags); } - -static void wait_for_dump_helpers(struct file *file) -{ - struct pipe_inode_info *pipe; - - pipe = file->f_path.dentry->d_inode->i_pipe; - - pipe_lock(pipe); - pipe->readers++; - pipe->writers--; - - while ((pipe->readers > 1) && (!signal_pending(current))) { - wake_up_interruptible_sync(&pipe->wait); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - pipe_wait(pipe); - } - - pipe->readers--; - pipe->writers++; - pipe_unlock(pipe); - -} - - -/* - * umh_pipe_setup - * helper function to customize the process used - * to collect the core in userspace. Specifically - * it sets up a pipe and installs it as fd 0 (stdin) - * for the process. Returns 0 on success, or - * PTR_ERR on failure. - * Note that it also sets the core limit to 1. This - * is a special value that we use to trap recursive - * core dumps - */ -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) -{ - struct file *files[2]; - struct coredump_params *cp = (struct coredump_params *)info->data; - int err = create_pipe_files(files, 0); - if (err) - return err; - - cp->file = files[1]; - - replace_fd(0, files[0], 0); - /* and disallow core files too */ - current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; - - return 0; -} - -void do_coredump(long signr, int exit_code, struct pt_regs *regs) -{ - struct core_state core_state; - struct core_name cn; - struct mm_struct *mm = current->mm; - struct linux_binfmt * binfmt; - const struct cred *old_cred; - struct cred *cred; - int retval = 0; - int flag = 0; - int ispipe; - struct files_struct *displaced; - bool need_nonrelative = false; - static atomic_t core_dump_count = ATOMIC_INIT(0); - struct coredump_params cprm = { - .signr = signr, - .regs = regs, - .limit = rlimit(RLIMIT_CORE), - /* - * We must use the same mm->flags while dumping core to avoid - * inconsistency of bit flags, since this flag is not protected - * by any locks. - */ - .mm_flags = mm->flags, - }; - - audit_core_dumps(signr); - - binfmt = mm->binfmt; - if (!binfmt || !binfmt->core_dump) - goto fail; - if (!__get_dumpable(cprm.mm_flags)) - goto fail; - - cred = prepare_creds(); - if (!cred) - goto fail; - /* - * We cannot trust fsuid as being the "true" uid of the process - * nor do we know its entire history. We only know it was tainted - * so we dump it as root in mode 2, and only into a controlled - * environment (pipe handler or fully qualified path). - */ - if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { - /* Setuid core dump mode */ - flag = O_EXCL; /* Stop rewrite attacks */ - cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ - need_nonrelative = true; - } - - retval = coredump_wait(exit_code, &core_state); - if (retval < 0) - goto fail_creds; - - old_cred = override_creds(cred); - - /* - * Clear any false indication of pending signals that might - * be seen by the filesystem code called to write the core file. - */ - clear_thread_flag(TIF_SIGPENDING); - - ispipe = format_corename(&cn, signr); - - if (ispipe) { - int dump_count; - char **helper_argv; - - if (ispipe < 0) { - printk(KERN_WARNING "format_corename failed\n"); - printk(KERN_WARNING "Aborting core\n"); - goto fail_corename; - } - - if (cprm.limit == 1) { - /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. - * - * Normally core limits are irrelevant to pipes, since - * we're not writing to the file system, but we use - * cprm.limit of 1 here as a speacial value, this is a - * consistent way to catch recursive crashes. - * We can still crash if the core_pattern binary sets - * RLIM_CORE = !1, but it runs as root, and can do - * lots of stupid things. - * - * Note that we use task_tgid_vnr here to grab the pid - * of the process group leader. That way we get the - * right pid if a thread in a multi-threaded - * core_pattern process dies. - */ - printk(KERN_WARNING - "Process %d(%s) has RLIMIT_CORE set to 1\n", - task_tgid_vnr(current), current->comm); - printk(KERN_WARNING "Aborting core\n"); - goto fail_unlock; - } - cprm.limit = RLIM_INFINITY; - - dump_count = atomic_inc_return(&core_dump_count); - if (core_pipe_limit && (core_pipe_limit < dump_count)) { - printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", - task_tgid_vnr(current), current->comm); - printk(KERN_WARNING "Skipping core dump\n"); - goto fail_dropcount; - } - - helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); - if (!helper_argv) { - printk(KERN_WARNING "%s failed to allocate memory\n", - __func__); - goto fail_dropcount; - } - - retval = call_usermodehelper_fns(helper_argv[0], helper_argv, - NULL, UMH_WAIT_EXEC, umh_pipe_setup, - NULL, &cprm); - argv_free(helper_argv); - if (retval) { - printk(KERN_INFO "Core dump to %s pipe failed\n", - cn.corename); - goto close_fail; - } - } else { - struct inode *inode; - - if (cprm.limit < binfmt->min_coredump) - goto fail_unlock; - - if (need_nonrelative && cn.corename[0] != '/') { - printk(KERN_WARNING "Pid %d(%s) can only dump core "\ - "to fully qualified path!\n", - task_tgid_vnr(current), current->comm); - printk(KERN_WARNING "Skipping core dump\n"); - goto fail_unlock; - } - - cprm.file = filp_open(cn.corename, - O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, - 0600); - if (IS_ERR(cprm.file)) - goto fail_unlock; - - inode = cprm.file->f_path.dentry->d_inode; - if (inode->i_nlink > 1) - goto close_fail; - if (d_unhashed(cprm.file->f_path.dentry)) - goto close_fail; - /* - * AK: actually i see no reason to not allow this for named - * pipes etc, but keep the previous behaviour for now. - */ - if (!S_ISREG(inode->i_mode)) - goto close_fail; - /* - * Dont allow local users get cute and trick others to coredump - * into their pre-created files. - */ - if (!uid_eq(inode->i_uid, current_fsuid())) - goto close_fail; - if (!cprm.file->f_op || !cprm.file->f_op->write) - goto close_fail; - if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) - goto close_fail; - } - - /* get us an unshared descriptor table; almost always a no-op */ - retval = unshare_files(&displaced); - if (retval) - goto close_fail; - if (displaced) - put_files_struct(displaced); - retval = binfmt->core_dump(&cprm); - if (retval) - current->signal->group_exit_code |= 0x80; - - if (ispipe && core_pipe_limit) - wait_for_dump_helpers(cprm.file); -close_fail: - if (cprm.file) - filp_close(cprm.file, NULL); -fail_dropcount: - if (ispipe) - atomic_dec(&core_dump_count); -fail_unlock: - kfree(cn.corename); -fail_corename: - coredump_finish(mm); - revert_creds(old_cred); -fail_creds: - put_cred(cred); -fail: - return; -} - -/* - * Core dumping helper functions. These are the only things you should - * do on a core-file: use only these functions to write out all the - * necessary info. - */ -int dump_write(struct file *file, const void *addr, int nr) -{ - return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; -} -EXPORT_SYMBOL(dump_write); - -int dump_seek(struct file *file, loff_t off) -{ - int ret = 1; - - if (file->f_op->llseek && file->f_op->llseek != no_llseek) { - if (file->f_op->llseek(file, off, SEEK_CUR) < 0) - return 0; - } else { - char *buf = (char *)get_zeroed_page(GFP_KERNEL); - - if (!buf) - return 0; - while (off > 0) { - unsigned long n = off; - - if (n > PAGE_SIZE) - n = PAGE_SIZE; - if (!dump_write(file, buf, n)) { - ret = 0; - break; - } - off -= n; - } - free_page((unsigned long)buf); - } - return ret; -} -EXPORT_SYMBOL(dump_seek); diff --git a/include/linux/sched.h b/include/linux/sched.h index 23bddac4bad8..78041f4c7584 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -405,6 +405,7 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); +extern int __get_dumpable(unsigned long mm_flags); /* get/set_dumpable() values */ #define SUID_DUMPABLE_DISABLED 0 -- cgit v1.2.3 From 99621b44aa194eab594e1f17217231c02b519211 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 29 Aug 2012 16:31:33 -0400 Subject: btrfs: reada_extent doesn't need kref for refcount All increments and decrements are under the same spinlock - have to be, since they need to protect the radix_tree it's found in. Just use int, no need to wank with kref... Signed-off-by: Al Viro --- fs/btrfs/reada.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 48a4882d8ad5..a955669519a2 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -68,7 +68,7 @@ struct reada_extent { u32 blocksize; int err; struct list_head extctl; - struct kref refcnt; + int refcnt; spinlock_t lock; struct reada_zone *zones[BTRFS_MAX_MIRRORS]; int nzones; @@ -126,7 +126,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); if (re) - kref_get(&re->refcnt); + re->refcnt++; spin_unlock(&fs_info->reada_lock); if (!re) @@ -336,7 +336,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); if (re) - kref_get(&re->refcnt); + re->refcnt++; spin_unlock(&fs_info->reada_lock); if (re) @@ -352,7 +352,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, re->top = *top; INIT_LIST_HEAD(&re->extctl); spin_lock_init(&re->lock); - kref_init(&re->refcnt); + re->refcnt = 1; /* * map block @@ -398,7 +398,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, if (ret == -EEXIST) { re_exist = radix_tree_lookup(&fs_info->reada_tree, index); BUG_ON(!re_exist); - kref_get(&re_exist->refcnt); + re_exist->refcnt++; spin_unlock(&fs_info->reada_lock); goto error; } @@ -465,10 +465,6 @@ error: return re_exist; } -static void reada_kref_dummy(struct kref *kr) -{ -} - static void reada_extent_put(struct btrfs_fs_info *fs_info, struct reada_extent *re) { @@ -476,7 +472,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, unsigned long index = re->logical >> PAGE_CACHE_SHIFT; spin_lock(&fs_info->reada_lock); - if (!kref_put(&re->refcnt, reada_kref_dummy)) { + if (--re->refcnt) { spin_unlock(&fs_info->reada_lock); return; } @@ -671,7 +667,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, return 0; } dev->reada_next = re->logical + re->blocksize; - kref_get(&re->refcnt); + re->refcnt++; spin_unlock(&fs_info->reada_lock); -- cgit v1.2.3 From 8c0a85377048b64c880e76ec7368904fe46d0b94 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 26 Sep 2012 11:33:07 +1000 Subject: fs: push rcu_barrier() from deactivate_locked_super() to filesystems There's no reason to call rcu_barrier() on every deactivate_locked_super(). We only need to make sure that all delayed rcu free inodes are flushed before we destroy related cache. Removing rcu_barrier() from deactivate_locked_super() affects some fast paths. E.g. on my machine exit_group() of a last process in IPC namespace takes 0.07538s. rcu_barrier() takes 0.05188s of that time. Signed-off-by: Kirill A. Shutemov Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/9p/v9fs.c | 5 +++++ fs/adfs/super.c | 5 +++++ fs/affs/super.c | 5 +++++ fs/afs/super.c | 5 +++++ fs/befs/linuxvfs.c | 5 +++++ fs/bfs/inode.c | 5 +++++ fs/btrfs/extent_io.c | 6 ++++++ fs/btrfs/inode.c | 5 +++++ fs/ceph/super.c | 5 +++++ fs/cifs/cifsfs.c | 5 +++++ fs/coda/inode.c | 5 +++++ fs/ecryptfs/main.c | 6 ++++++ fs/efs/super.c | 5 +++++ fs/exofs/super.c | 5 +++++ fs/ext2/super.c | 5 +++++ fs/ext3/super.c | 5 +++++ fs/ext4/super.c | 5 +++++ fs/fat/inode.c | 5 +++++ fs/freevxfs/vxfs_super.c | 5 +++++ fs/fuse/inode.c | 6 ++++++ fs/hfs/super.c | 6 ++++++ fs/hfsplus/super.c | 6 ++++++ fs/hpfs/super.c | 5 +++++ fs/hugetlbfs/inode.c | 5 +++++ fs/isofs/inode.c | 5 +++++ fs/jffs2/super.c | 6 ++++++ fs/jfs/super.c | 6 ++++++ fs/logfs/inode.c | 5 +++++ fs/minix/inode.c | 5 +++++ fs/ncpfs/inode.c | 5 +++++ fs/nfs/inode.c | 5 +++++ fs/nilfs2/super.c | 6 ++++++ fs/ntfs/super.c | 6 ++++++ fs/ocfs2/dlmfs/dlmfs.c | 5 +++++ fs/ocfs2/super.c | 5 +++++ fs/openpromfs/inode.c | 5 +++++ fs/qnx4/inode.c | 5 +++++ fs/qnx6/inode.c | 5 +++++ fs/reiserfs/super.c | 5 +++++ fs/romfs/super.c | 5 +++++ fs/squashfs/super.c | 5 +++++ fs/super.c | 6 ------ fs/sysv/inode.c | 5 +++++ fs/ubifs/super.c | 6 ++++++ fs/udf/super.c | 5 +++++ fs/ufs/super.c | 5 +++++ fs/xfs/xfs_super.c | 5 +++++ 47 files changed, 240 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index b85efa773949..392c5dac1981 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -560,6 +560,11 @@ static int v9fs_init_inode_cache(void) */ static void v9fs_destroy_inode_cache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(v9fs_inode_cache); } diff --git a/fs/adfs/super.c b/fs/adfs/super.c index bdaec92353c2..c830c857c663 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -275,6 +275,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(adfs_inode_cachep); } diff --git a/fs/affs/super.c b/fs/affs/super.c index c70f1e5fc024..2f57053bf26c 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -147,6 +147,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(affs_inode_cachep); } diff --git a/fs/afs/super.c b/fs/afs/super.c index df8c6047c2a1..43165009428d 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -123,6 +123,11 @@ void __exit afs_fs_exit(void) BUG(); } + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(afs_inode_cachep); _leave(""); } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index cf7f3c67c8b7..962b4f8f7994 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -454,6 +454,11 @@ befs_init_inodecache(void) static void befs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(befs_inode_cachep); } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 9870417c26e7..d5fc598d6e4a 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -280,6 +280,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(bfs_inode_cachep); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4c878476bb91..b08ea4717e9d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -107,6 +107,12 @@ void extent_io_exit(void) list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); } + + /* + * Make sure all delayed rcu free are flushed before we + * destroy caches. + */ + rcu_barrier(); if (extent_state_cache) kmem_cache_destroy(extent_state_cache); if (extent_buffer_cache) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ec154f954646..cf03a91d806f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7076,6 +7076,11 @@ static void init_once(void *foo) void btrfs_destroy_cachep(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); if (btrfs_inode_cachep) kmem_cache_destroy(btrfs_inode_cachep); if (btrfs_trans_handle_cachep) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b982239f38f9..3a42d9326378 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -603,6 +603,11 @@ bad_cap: static void destroy_caches(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ceph_inode_cachep); kmem_cache_destroy(ceph_cap_cachep); kmem_cache_destroy(ceph_dentry_cachep); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index db8a404a51dd..d4ce77a02327 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -977,6 +977,11 @@ cifs_init_inodecache(void) static void cifs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(cifs_inode_cachep); } diff --git a/fs/coda/inode.c b/fs/coda/inode.c index d315c6c5891a..be2aa4909487 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -85,6 +85,11 @@ int coda_init_inodecache(void) void coda_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(coda_inode_cachep); } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 9b627c15010a..34fcde765d24 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -710,6 +710,12 @@ static void ecryptfs_free_kmem_caches(void) { int i; + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { struct ecryptfs_cache_info *info; diff --git a/fs/efs/super.c b/fs/efs/super.c index e755ec746c69..2002431ef9a0 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -96,6 +96,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(efs_inode_cachep); } diff --git a/fs/exofs/super.c b/fs/exofs/super.c index dde41a75c7c8..59e3bbfac0b1 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -206,6 +206,11 @@ static int init_inodecache(void) */ static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(exofs_inode_cachep); } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index af74d9e27b71..6c205d0c565b 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -206,6 +206,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ext2_inode_cachep); } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 8c892e93d8e7..8d41c8889eee 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -532,6 +532,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ext3_inode_cachep); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c6e0cb3d1f4a..455b7d8c6d62 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1019,6 +1019,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ext4_inode_cachep); } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 05e897fe9866..fd8e47cd898b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -521,6 +521,11 @@ static int __init fat_init_inodecache(void) static void __exit fat_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(fat_inode_cachep); } diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index d4fabd26084e..fed2c8afb3a9 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -279,6 +279,11 @@ static void __exit vxfs_cleanup(void) { unregister_filesystem(&vxfs_fs_type); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(vxfs_inode_cachep); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index fca222dabe3c..f0eda124cffb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1197,6 +1197,12 @@ static void fuse_fs_cleanup(void) { unregister_filesystem(&fuse_fs_type); unregister_fuseblk(); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(fuse_inode_cachep); } diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4eb873e0c07b..941d7a8c2197 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -482,6 +482,12 @@ static int __init init_hfs_fs(void) static void __exit exit_hfs_fs(void) { unregister_filesystem(&hfs_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hfs_inode_cachep); } diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index fdafb2d71654..811a84d2d964 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -635,6 +635,12 @@ static int __init init_hfsplus_fs(void) static void __exit exit_hfsplus_fs(void) { unregister_filesystem(&hfsplus_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hfsplus_inode_cachep); } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 706a12c083ea..3cb1da56eb73 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -210,6 +210,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hpfs_inode_cachep); } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8349a899912e..c4b85d064e6b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1042,6 +1042,11 @@ static int __init init_hugetlbfs_fs(void) static void __exit exit_hugetlbfs_fs(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hugetlbfs_inode_cachep); kern_unmount(hugetlbfs_vfsmount); unregister_filesystem(&hugetlbfs_fs_type); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 29037c365ba4..f94cde4527e8 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -114,6 +114,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(isofs_inode_cachep); } diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 61ea41389f90..ff487954cd96 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -418,6 +418,12 @@ static void __exit exit_jffs2_fs(void) unregister_filesystem(&jffs2_fs_type); jffs2_destroy_slab_caches(); jffs2_compressors_exit(); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(jffs2_inode_cachep); } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index c55c7452d285..3735347fd5f6 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -903,6 +903,12 @@ static void __exit exit_jfs_fs(void) jfs_proc_clean(); #endif unregister_filesystem(&jfs_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(jfs_inode_cachep); } diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 6984562738d3..121bba2cf6f2 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -417,5 +417,10 @@ int logfs_init_inode_cache(void) void logfs_destroy_inode_cache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(logfs_inode_cache); } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 2a503ad020d5..dc8d3629c20a 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -100,6 +100,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(minix_inode_cachep); } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 333df07ae3bd..0c62c55b25d7 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -89,6 +89,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ncp_inode_cachep); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9b47610338f5..e4c716d374a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1571,6 +1571,11 @@ static int __init nfs_init_inodecache(void) static void nfs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(nfs_inode_cachep); } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6a10812711c1..3c991dc84f2f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1382,6 +1382,12 @@ static void nilfs_segbuf_init_once(void *obj) static void nilfs_destroy_cachep(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + if (nilfs_inode_cachep) kmem_cache_destroy(nilfs_inode_cachep); if (nilfs_transaction_cachep) diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 2bc149d6a784..fe08d4afa106 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3168,6 +3168,12 @@ static void __exit exit_ntfs_fs(void) ntfs_debug("Unregistering NTFS driver."); unregister_filesystem(&ntfs_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ntfs_big_inode_cache); kmem_cache_destroy(ntfs_inode_cache); kmem_cache_destroy(ntfs_name_cache); diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 83b6f98e0665..16b712d260d4 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -691,6 +691,11 @@ static void __exit exit_dlmfs_fs(void) flush_workqueue(user_dlm_worker); destroy_workqueue(user_dlm_worker); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(dlmfs_inode_cache); bdi_destroy(&dlmfs_backing_dev_info); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 68f4541c2db9..0e91ec22a940 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1818,6 +1818,11 @@ static int ocfs2_initialize_mem_caches(void) static void ocfs2_free_mem_caches(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); if (ocfs2_inode_cachep) kmem_cache_destroy(ocfs2_inode_cachep); ocfs2_inode_cachep = NULL; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 4a3477949bca..2ad080faca34 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -463,6 +463,11 @@ static int __init init_openprom_fs(void) static void __exit exit_openprom_fs(void) { unregister_filesystem(&openprom_fs_type); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(op_inode_cachep); } diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 552e994e3aa1..9534b4f76579 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -391,6 +391,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(qnx4_inode_cachep); } diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 2049c814bda4..1b37fff7b5ff 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -651,6 +651,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(qnx6_inode_cachep); } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7a37dabf5a96..1078ae179993 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -608,6 +608,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(reiserfs_inode_cachep); } diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 77c5f2173983..fd7c5f60b46b 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -648,6 +648,11 @@ error_register: static void __exit exit_romfs_fs(void) { unregister_filesystem(&romfs_fs_type); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(romfs_inode_cachep); } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 29cd014ed3a1..260e3928d4f5 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -425,6 +425,11 @@ static int __init init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(squashfs_inode_cachep); } diff --git a/fs/super.c b/fs/super.c index 0902cfa6a12e..5fdf7ff32c4e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -307,12 +307,6 @@ void deactivate_locked_super(struct super_block *s) /* caches are now gone, we can safely kill the shrinker now */ unregister_shrinker(&s->s_shrink); - - /* - * We need to call rcu_barrier so all the delayed rcu free - * inodes are flushed before we release the fs module. - */ - rcu_barrier(); put_filesystem(fs); put_super(s); } else { diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 80e1e2b18df1..0d0c50bd3321 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -360,5 +360,10 @@ int __init sysv_init_icache(void) void sysv_destroy_icache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(sysv_inode_cachep); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 71a197f0f93d..36e09ca9130b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2298,6 +2298,12 @@ static void __exit ubifs_exit(void) dbg_debugfs_exit(); ubifs_compressors_exit(); unregister_shrinker(&ubifs_shrinker_info); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ubifs_inode_slab); unregister_filesystem(&ubifs_fs_type); } diff --git a/fs/udf/super.c b/fs/udf/super.c index 18fc038a438d..b8d27642ab06 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -171,6 +171,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(udf_inode_cachep); } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 444927e5706b..f7cfecfe1cab 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1466,6 +1466,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ufs_inode_cachep); } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 19e2380fb867..83d36e473d2f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1506,6 +1506,11 @@ xfs_init_zones(void) STATIC void xfs_destroy_zones(void) { + /* + * Make sure all delayed rcu free are flushed before we + * destroy caches. + */ + rcu_barrier(); kmem_zone_destroy(xfs_ili_zone); kmem_zone_destroy(xfs_inode_zone); kmem_zone_destroy(xfs_efi_zone); -- cgit v1.2.3 From 8f9c0119d7ba94c3ad13876acc240d7f12b6d8e1 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 19 Sep 2012 12:01:52 +0100 Subject: compat: fs: Generic compat_sys_sendfile implementation This function is used by sparc, powerpc and arm64 for compat support. The patch adds a generic implementation which calls do_sendfile() directly and avoids set_fs(). The sparc architecture has wrappers for the sign extensions while powerpc relies on the compiler to do the this. The patch adds wrappers for powerpc to handle the u32->int type conversion. compat_sys_sendfile64() can be replaced by a sys_sendfile() call since compat_loff_t has the same size as off_t on a 64-bit system. On powerpc, the patch also changes the 64-bit sendfile call from sys_sendile64 to sys_sendfile. Signed-off-by: Catalin Marinas Acked-by: David S. Miller Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Alexander Viro Cc: Andrew Morton Signed-off-by: Al Viro --- arch/powerpc/include/asm/systbl.h | 4 ++-- arch/powerpc/include/asm/unistd.h | 1 + arch/powerpc/kernel/sys_ppc32.c | 45 ++++++-------------------------------- arch/sparc/include/asm/unistd.h | 1 + arch/sparc/kernel/sys32.S | 2 +- arch/sparc/kernel/sys_sparc32.c | 46 --------------------------------------- fs/compat.c | 22 +++++++++++++++++++ fs/read_write.c | 4 ++-- fs/read_write.h | 2 ++ include/linux/compat.h | 3 +++ 10 files changed, 41 insertions(+), 89 deletions(-) (limited to 'fs') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 559ae1ee6706..840838769853 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -189,7 +189,7 @@ SYSCALL_SPU(getcwd) SYSCALL_SPU(capget) SYSCALL_SPU(capset) COMPAT_SYS(sigaltstack) -SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile) +SYSX_SPU(sys_sendfile,compat_sys_sendfile_wrapper,sys_sendfile) SYSCALL(ni_syscall) SYSCALL(ni_syscall) PPC_SYS(vfork) @@ -229,7 +229,7 @@ COMPAT_SYS_SPU(sched_setaffinity) COMPAT_SYS_SPU(sched_getaffinity) SYSCALL(ni_syscall) SYSCALL(ni_syscall) -SYS32ONLY(sendfile64) +SYSX(sys_ni_syscall,compat_sys_sendfile64_wrapper,sys_sendfile64) COMPAT_SYS_SPU(io_setup) SYSCALL_SPU(io_destroy) COMPAT_SYS_SPU(io_getevents) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index bd377a368611..c683fa350add 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -419,6 +419,7 @@ #define __ARCH_WANT_COMPAT_SYS_TIME #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_NEWFSTATAT +#define __ARCH_WANT_COMPAT_SYS_SENDFILE #endif /* diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 81c570633ead..abd1112da54f 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -143,48 +143,17 @@ long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t pt * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) * and the register representation of a signed int (msr in 64-bit mode) is performed. */ -asmlinkage long compat_sys_sendfile(u32 out_fd, u32 in_fd, compat_off_t __user * offset, u32 count) +asmlinkage long compat_sys_sendfile_wrapper(u32 out_fd, u32 in_fd, + compat_off_t __user *offset, u32 count) { - mm_segment_t old_fs = get_fs(); - int ret; - off_t of; - off_t __user *up; - - if (offset && get_user(of, offset)) - return -EFAULT; - - /* The __user pointer cast is valid because of the set_fs() */ - set_fs(KERNEL_DS); - up = offset ? (off_t __user *) &of : NULL; - ret = sys_sendfile((int)out_fd, (int)in_fd, up, count); - set_fs(old_fs); - - if (offset && put_user(of, offset)) - return -EFAULT; - - return ret; + return compat_sys_sendfile((int)out_fd, (int)in_fd, offset, count); } -asmlinkage int compat_sys_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, s32 count) +asmlinkage long compat_sys_sendfile64_wrapper(u32 out_fd, u32 in_fd, + compat_loff_t __user *offset, u32 count) { - mm_segment_t old_fs = get_fs(); - int ret; - loff_t lof; - loff_t __user *up; - - if (offset && get_user(lof, offset)) - return -EFAULT; - - /* The __user pointer cast is valid because of the set_fs() */ - set_fs(KERNEL_DS); - up = offset ? (loff_t __user *) &lof : NULL; - ret = sys_sendfile64(out_fd, in_fd, up, count); - set_fs(old_fs); - - if (offset && put_user(lof, offset)) - return -EFAULT; - - return ret; + return sys_sendfile((int)out_fd, (int)in_fd, + (off_t __user *)offset, count); } long compat_sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index fb2693464807..d9a677c51926 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -447,6 +447,7 @@ #else #define __ARCH_WANT_COMPAT_SYS_TIME #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND +#define __ARCH_WANT_COMPAT_SYS_SENDFILE #endif /* diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index d97f3eb72e06..44025f4ba41f 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -90,7 +90,7 @@ SIGN1(sys32_mkdir, sys_mkdir, %o1) SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5) SIGN1(sys32_sysfs, compat_sys_sysfs, %o0) SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1) -SIGN2(sys32_sendfile64, compat_sys_sendfile64, %o0, %o1) +SIGN2(sys32_sendfile64, sys_sendfile, %o0, %o1) SIGN1(sys32_prctl, sys_prctl, %o0) SIGN1(sys32_sched_rr_get_interval, compat_sys_sched_rr_get_interval, %o0) SIGN2(sys32_waitpid, sys_waitpid, %o0, %o2) diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index f7392336961f..d862499eb01c 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -506,52 +506,6 @@ long compat_sys_fadvise64_64(int fd, advice); } -asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, - compat_off_t __user *offset, - compat_size_t count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - off_t of; - - if (offset && get_user(of, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile(out_fd, in_fd, - offset ? (off_t __user *) &of : NULL, - count); - set_fs(old_fs); - - if (offset && put_user(of, offset)) - return -EFAULT; - - return ret; -} - -asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, - compat_loff_t __user *offset, - compat_size_t count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - loff_t lof; - - if (offset && get_user(lof, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile64(out_fd, in_fd, - offset ? (loff_t __user *) &lof : NULL, - count); - set_fs(old_fs); - - if (offset && put_user(lof, offset)) - return -EFAULT; - - return ret; -} - /* This is just a version for 32-bit applications which does * not force O_LARGEFILE on. */ diff --git a/fs/compat.c b/fs/compat.c index d72d51e19025..b7a24d0ca30d 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1792,3 +1792,25 @@ compat_sys_open_by_handle_at(int mountdirfd, return do_handle_open(mountdirfd, handle, flags); } #endif + +#ifdef __ARCH_WANT_COMPAT_SYS_SENDFILE +asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, + compat_off_t __user *offset, compat_size_t count) +{ + loff_t pos; + off_t off; + ssize_t ret; + + if (offset) { + if (unlikely(get_user(off, offset))) + return -EFAULT; + pos = off; + ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); + if (unlikely(put_user(pos, offset))) + return -EFAULT; + return ret; + } + + return do_sendfile(out_fd, in_fd, NULL, count, 0); +} +#endif /* __ARCH_WANT_COMPAT_SYS_SENDFILE */ diff --git a/fs/read_write.c b/fs/read_write.c index 28b38279a219..d06534857e9e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -862,8 +862,8 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, return ret; } -static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, - size_t count, loff_t max) +ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, + loff_t max) { struct fd in, out; struct inode *in_inode, *out_inode; diff --git a/fs/read_write.h b/fs/read_write.h index d07b954c6e0c..d3e00ef67420 100644 --- a/fs/read_write.h +++ b/fs/read_write.h @@ -12,3 +12,5 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, unsigned long nr_segs, loff_t *ppos, io_fn_t fn); +ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, + loff_t max); diff --git a/include/linux/compat.h b/include/linux/compat.h index 09b28b7369d7..fd4e29956d1c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -590,6 +590,9 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, unsigned long liovcnt, const struct compat_iovec __user *rvec, unsigned long riovcnt, unsigned long flags); +asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, + compat_off_t __user *offset, compat_size_t count); + #else #define is_compat_task() (0) -- cgit v1.2.3 From 457712a0bc5389b75d2c93840a684fd77df2aabb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 Sep 2012 21:04:57 -0700 Subject: ceph: return EIO on invalid layout on GET_DATALOC ioctl If the user calls GET_DATALOC on a file with an invalid (e.g., zeroed) layout, return EIO to userland. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/ioctl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 1396ceb46797..36549a46e311 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -187,14 +187,18 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) u64 tmp; struct ceph_object_layout ol; struct ceph_pg pgid; + int r; /* copy and validate */ if (copy_from_user(&dl, arg, sizeof(dl))) return -EFAULT; down_read(&osdc->map_sem); - ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, - &dl.object_no, &dl.object_offset, &olen); + r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, + &dl.object_no, &dl.object_offset, + &olen); + if (r < 0) + return -EIO; dl.file_offset -= dl.object_offset; dl.object_size = ceph_file_layout_object_size(ci->i_layout); dl.block_size = ceph_file_layout_su(ci->i_layout); -- cgit v1.2.3 From 6285bc231277419255f3498d3eb5ddc9f8e7fe79 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 2 Oct 2012 10:25:51 -0500 Subject: ceph: avoid 32-bit page index overflow A pgoff_t is defined (by default) to have type (unsigned long). On architectures such as i686 that's a 32-bit type. The ceph address space code was attempting to produce 64 bit offsets by shifting a page's index by PAGE_CACHE_SHIFT, but the result was not what was desired because the shift occurred before the result got promoted to 64 bits. Fix this by converting all uses of page->index used in this way to use the page_offset() macro, which ensures the 64-bit result has the intended value. This fixes http://tracker.newdream.net/issues/3112 Reported-by: Mohamed Pakkeer Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- fs/ceph/addr.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4469b63c9b7b..22b6e4583faa 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -205,7 +205,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) dout("readpage inode %p file %p page %p index %lu\n", inode, filp, page, page->index); err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, - page->index << PAGE_CACHE_SHIFT, &len, + (u64) page_offset(page), &len, ci->i_truncate_seq, ci->i_truncate_size, &page, 1, 0); if (err == -ENOENT) @@ -286,7 +286,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) int nr_pages = 0; int ret; - off = page->index << PAGE_CACHE_SHIFT; + off = (u64) page_offset(page); /* count pages */ next_index = page->index; @@ -426,7 +426,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) struct ceph_inode_info *ci; struct ceph_fs_client *fsc; struct ceph_osd_client *osdc; - loff_t page_off = page->index << PAGE_CACHE_SHIFT; + loff_t page_off = page_offset(page); int len = PAGE_CACHE_SIZE; loff_t i_size; int err = 0; @@ -817,8 +817,7 @@ get_more_pages: /* ok */ if (locked_pages == 0) { /* prepare async write request */ - offset = (unsigned long long)page->index - << PAGE_CACHE_SHIFT; + offset = (u64) page_offset(page); len = wsize; req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, @@ -1180,7 +1179,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = vma->vm_file->f_dentry->d_inode; struct page *page = vmf->page; struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; - loff_t off = page->index << PAGE_CACHE_SHIFT; + loff_t off = page_offset(page); loff_t size, len; int ret; -- cgit v1.2.3 From 74b217d0d3a46132fb61adab91d53c57e0d8f68a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Oct 2012 11:28:45 +0300 Subject: ore: signedness bug in _sp2d_min_pg() This for loop doesn't work correctly when "p" is unsigned. Signed-off-by: Dan Carpenter --- fs/exofs/ore_raid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 5f376d14fdcc..b963f38ac298 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -203,7 +203,7 @@ static unsigned _sp2d_min_pg(struct __stripe_pages_2d *sp2d) static unsigned _sp2d_max_pg(struct __stripe_pages_2d *sp2d) { - unsigned p; + int p; for (p = sp2d->pages_in_unit - 1; p >= 0; --p) { struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; -- cgit v1.2.3 From c278531d39f3158bfee93dc67da0b77e09776de2 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Fri, 5 Oct 2012 11:31:55 -0400 Subject: ext4: fix ext4_flush_completed_IO wait semantics BUG #1) All places where we call ext4_flush_completed_IO are broken because buffered io and DIO/AIO goes through three stages 1) submitted io, 2) completed io (in i_completed_io_list) conversion pended 3) finished io (conversion done) And by calling ext4_flush_completed_IO we will flush only requests which were in (2) stage, which is wrong because: 1) punch_hole and truncate _must_ wait for all outstanding unwritten io regardless to it's state. 2) fsync and nolock_dio_read should also wait because there is a time window between end_page_writeback() and ext4_add_complete_io() As result integrity fsync is broken in case of buffered write to fallocated region: fsync blkdev_completion ->filemap_write_and_wait_range ->ext4_end_bio ->end_page_writeback <-- filemap_write_and_wait_range return ->ext4_flush_completed_IO sees empty i_completed_io_list but pended conversion still exist ->ext4_add_complete_io BUG #2) Race window becomes wider due to the 'ext4: completed_io locking cleanup V4' patch series This patch make following changes: 1) ext4_flush_completed_io() now first try to flush completed io and when wait for any outstanding unwritten io via ext4_unwritten_wait() 2) Rename function to more appropriate name. 3) Assert that all callers of ext4_flush_unwritten_io should hold i_mutex to prevent endless wait Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" Reviewed-by: Jan Kara --- fs/ext4/ext4.h | 3 ++- fs/ext4/extents.c | 6 +++--- fs/ext4/file.c | 2 +- fs/ext4/fsync.c | 2 +- fs/ext4/indirect.c | 8 +++++--- fs/ext4/page-io.c | 11 +++++++---- 6 files changed, 19 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1be2b4472a83..3ab2539b7b2e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1947,7 +1947,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p); /* fsync.c */ extern int ext4_sync_file(struct file *, loff_t, loff_t, int); -extern int ext4_flush_completed_IO(struct inode *); +extern int ext4_flush_unwritten_io(struct inode *); /* hash.c */ extern int ext4fs_dirhash(const char *name, int len, struct @@ -2371,6 +2371,7 @@ extern const struct file_operations ext4_dir_operations; extern const struct inode_operations ext4_file_inode_operations; extern const struct file_operations ext4_file_operations; extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); +extern void ext4_unwritten_wait(struct inode *inode); /* namei.c */ extern const struct inode_operations ext4_dir_inode_operations; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c1fcf489e056..1c94cca35ed1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4268,7 +4268,7 @@ void ext4_ext_truncate(struct inode *inode) * finish any pending end_io work so we won't run the risk of * converting any truncated blocks to initialized later */ - ext4_flush_completed_IO(inode); + ext4_flush_unwritten_io(inode); /* * probably first extent we're gonna free will be last in block @@ -4847,10 +4847,10 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) /* Wait all existing dio workers, newcomers will block on i_mutex */ ext4_inode_block_unlocked_dio(inode); - inode_dio_wait(inode); - err = ext4_flush_completed_IO(inode); + err = ext4_flush_unwritten_io(inode); if (err) goto out_dio; + inode_dio_wait(inode); credits = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, credits); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 39335bda404b..ca6f07afe601 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -55,7 +55,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp) return 0; } -static void ext4_unwritten_wait(struct inode *inode) +void ext4_unwritten_wait(struct inode *inode) { wait_queue_head_t *wq = ext4_ioend_wq(inode); diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 460000868b8e..be1d89f385b4 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -138,7 +138,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (inode->i_sb->s_flags & MS_RDONLY) goto out; - ret = ext4_flush_completed_IO(inode); + ret = ext4_flush_unwritten_io(inode); if (ret < 0) goto out; diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 8d849dae8428..792e388e7b44 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -807,9 +807,11 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, retry: if (rw == READ && ext4_should_dioread_nolock(inode)) { - if (unlikely(!list_empty(&ei->i_completed_io_list))) - ext4_flush_completed_IO(inode); - + if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) { + mutex_lock(&inode->i_mutex); + ext4_flush_unwritten_io(inode); + mutex_unlock(&inode->i_mutex); + } /* * Nolock dioread optimization may be dynamically disabled * via ext4_inode_block_unlocked_dio(). Check inode's state diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 5b24c407701b..68e896e12a67 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -189,8 +189,6 @@ static int ext4_do_flush_completed_IO(struct inode *inode, list_add_tail(&io->list, &complete); } - /* It is important to update all flags for all end_io in one shot w/o - * dropping the lock.*/ spin_lock_irqsave(&ei->i_completed_io_lock, flags); while (!list_empty(&complete)) { io = list_entry(complete.next, ext4_io_end_t, list); @@ -228,9 +226,14 @@ static void ext4_end_io_work(struct work_struct *work) ext4_do_flush_completed_IO(io->inode, io); } -int ext4_flush_completed_IO(struct inode *inode) +int ext4_flush_unwritten_io(struct inode *inode) { - return ext4_do_flush_completed_IO(inode, NULL); + int ret; + WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) && + !(inode->i_state & I_FREEING)); + ret = ext4_do_flush_completed_IO(inode, NULL); + ext4_unwritten_wait(inode); + return ret; } ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) -- cgit v1.2.3 From 125c4c706b680c7831f0966ff873c1ad0354ec25 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Thu, 4 Oct 2012 17:13:15 -0700 Subject: idr: rename MAX_LEVEL to MAX_IDR_LEVEL To avoid name conflicts: drivers/video/riva/fbdev.c:281:9: sparse: preprocessor token MAX_LEVEL redefined While at it, also make the other names more consistent and add parentheses. [akpm@linux-foundation.org: repair fallout] [sfr@canb.auug.org.au: IB/mlx4: fix for MAX_ID_MASK to MAX_IDR_MASK name change] Signed-off-by: Fengguang Wu Cc: Bernd Petrovitsch Cc: walter harms Cc: Glauber Costa Signed-off-by: Stephen Rothwell Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/i2c/i2c-core.c | 2 +- drivers/infiniband/core/cm.c | 2 +- drivers/infiniband/hw/mlx4/cm.c | 2 +- drivers/pps/pps.c | 2 +- drivers/thermal/thermal_sys.c | 2 +- fs/super.c | 2 +- include/linux/idr.h | 10 +++++----- lib/idr.c | 32 ++++++++++++++++---------------- 8 files changed, 27 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 2091ae8f539a..2421d95130d4 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -982,7 +982,7 @@ int i2c_add_numbered_adapter(struct i2c_adapter *adap) if (adap->nr == -1) /* -1 means dynamically assign bus id */ return i2c_add_adapter(adap); - if (adap->nr & ~MAX_ID_MASK) + if (adap->nr & ~MAX_IDR_MASK) return -EINVAL; retry: diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index d67999f6e34a..394fea2ba1bc 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -390,7 +390,7 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv) ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id, &id); if (!ret) - next_id = ((unsigned) id + 1) & MAX_ID_MASK; + next_id = ((unsigned) id + 1) & MAX_IDR_MASK; spin_unlock_irqrestore(&cm.lock, flags); } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index e25e4dafb8a8..80079e5a2e30 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -225,7 +225,7 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) ret = idr_get_new_above(&sriov->pv_id_table, ent, next_id, &id); if (!ret) { - next_id = ((unsigned) id + 1) & MAX_ID_MASK; + next_id = ((unsigned) id + 1) & MAX_IDR_MASK; ent->pv_cm_id = (u32)id; sl_id_map_add(ibdev, ent); } diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c index e771487132f7..2420d5af0583 100644 --- a/drivers/pps/pps.c +++ b/drivers/pps/pps.c @@ -306,7 +306,7 @@ int pps_register_cdev(struct pps_device *pps) if (err < 0) return err; - pps->id &= MAX_ID_MASK; + pps->id &= MAX_IDR_MASK; if (pps->id >= PPS_MAX_SOURCES) { pr_err("%s: too many PPS sources in the system\n", pps->info.name); diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 67789b8345d2..efd81bb25e01 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -78,7 +78,7 @@ again: else if (unlikely(err)) return err; - *id = *id & MAX_ID_MASK; + *id = *id & MAX_IDR_MASK; return 0; } diff --git a/fs/super.c b/fs/super.c index 5fdf7ff32c4e..a3bc935069d9 100644 --- a/fs/super.c +++ b/fs/super.c @@ -865,7 +865,7 @@ int get_anon_bdev(dev_t *p) else if (error) return -EAGAIN; - if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { + if ((dev & MAX_IDR_MASK) == (1 << MINORBITS)) { spin_lock(&unnamed_dev_lock); ida_remove(&unnamed_dev_ida, dev); if (unnamed_dev_start > dev) diff --git a/include/linux/idr.h b/include/linux/idr.h index 255491cf522e..87259a44c251 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -38,15 +38,15 @@ #define IDR_SIZE (1 << IDR_BITS) #define IDR_MASK ((1 << IDR_BITS)-1) -#define MAX_ID_SHIFT (sizeof(int)*8 - 1) -#define MAX_ID_BIT (1U << MAX_ID_SHIFT) -#define MAX_ID_MASK (MAX_ID_BIT - 1) +#define MAX_IDR_SHIFT (sizeof(int)*8 - 1) +#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) +#define MAX_IDR_MASK (MAX_IDR_BIT - 1) /* Leave the possibility of an incomplete final layer */ -#define MAX_LEVEL (MAX_ID_SHIFT + IDR_BITS - 1) / IDR_BITS +#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS) /* Number of id_layer structs to leave in free list */ -#define IDR_FREE_MAX MAX_LEVEL + MAX_LEVEL +#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2) struct idr_layer { unsigned long bitmap; /* A zero bit means "space here" */ diff --git a/lib/idr.c b/lib/idr.c index 4046e29c0a99..648239079dd2 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -20,7 +20,7 @@ * that id to this code and it returns your pointer. * You can release ids at any time. When all ids are released, most of - * the memory is returned (we keep IDR_FREE_MAX) in a local pool so we + * the memory is returned (we keep MAX_IDR_FREE) in a local pool so we * don't need to go to the memory "store" during an id allocate, just * so you don't need to be too concerned about locking and conflicts * with the slab allocator. @@ -122,7 +122,7 @@ static void idr_mark_full(struct idr_layer **pa, int id) */ int idr_pre_get(struct idr *idp, gfp_t gfp_mask) { - while (idp->id_free_cnt < IDR_FREE_MAX) { + while (idp->id_free_cnt < MAX_IDR_FREE) { struct idr_layer *new; new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); if (new == NULL) @@ -179,7 +179,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) sh = IDR_BITS*l; id = ((id >> sh) ^ n ^ m) << sh; } - if ((id >= MAX_ID_BIT) || (id < 0)) + if ((id >= MAX_IDR_BIT) || (id < 0)) return IDR_NOMORE_SPACE; if (l == 0) break; @@ -223,7 +223,7 @@ build_up: * Add a new layer to the top of the tree if the requested * id is larger than the currently allocated space. */ - while ((layers < (MAX_LEVEL - 1)) && (id >= (1 << (layers*IDR_BITS)))) { + while ((layers < (MAX_IDR_LEVEL - 1)) && (id >= (1 << (layers*IDR_BITS)))) { layers++; if (!p->count) { /* special case: if the tree is currently empty, @@ -265,7 +265,7 @@ build_up: static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) { - struct idr_layer *pa[MAX_LEVEL]; + struct idr_layer *pa[MAX_IDR_LEVEL]; int id; id = idr_get_empty_slot(idp, starting_id, pa); @@ -357,7 +357,7 @@ static void idr_remove_warning(int id) static void sub_remove(struct idr *idp, int shift, int id) { struct idr_layer *p = idp->top; - struct idr_layer **pa[MAX_LEVEL]; + struct idr_layer **pa[MAX_IDR_LEVEL]; struct idr_layer ***paa = &pa[0]; struct idr_layer *to_free; int n; @@ -402,7 +402,7 @@ void idr_remove(struct idr *idp, int id) struct idr_layer *to_free; /* Mask off upper bits we don't use for the search. */ - id &= MAX_ID_MASK; + id &= MAX_IDR_MASK; sub_remove(idp, (idp->layers - 1) * IDR_BITS, id); if (idp->top && idp->top->count == 1 && (idp->layers > 1) && @@ -420,7 +420,7 @@ void idr_remove(struct idr *idp, int id) to_free->bitmap = to_free->count = 0; free_layer(to_free); } - while (idp->id_free_cnt >= IDR_FREE_MAX) { + while (idp->id_free_cnt >= MAX_IDR_FREE) { p = get_from_free_list(idp); /* * Note: we don't call the rcu callback here, since the only @@ -451,7 +451,7 @@ void idr_remove_all(struct idr *idp) int n, id, max; int bt_mask; struct idr_layer *p; - struct idr_layer *pa[MAX_LEVEL]; + struct idr_layer *pa[MAX_IDR_LEVEL]; struct idr_layer **paa = &pa[0]; n = idp->layers * IDR_BITS; @@ -517,7 +517,7 @@ void *idr_find(struct idr *idp, int id) n = (p->layer+1) * IDR_BITS; /* Mask off upper bits we don't use for the search. */ - id &= MAX_ID_MASK; + id &= MAX_IDR_MASK; if (id >= (1 << n)) return NULL; @@ -555,7 +555,7 @@ int idr_for_each(struct idr *idp, { int n, id, max, error = 0; struct idr_layer *p; - struct idr_layer *pa[MAX_LEVEL]; + struct idr_layer *pa[MAX_IDR_LEVEL]; struct idr_layer **paa = &pa[0]; n = idp->layers * IDR_BITS; @@ -601,7 +601,7 @@ EXPORT_SYMBOL(idr_for_each); */ void *idr_get_next(struct idr *idp, int *nextidp) { - struct idr_layer *p, *pa[MAX_LEVEL]; + struct idr_layer *p, *pa[MAX_IDR_LEVEL]; struct idr_layer **paa = &pa[0]; int id = *nextidp; int n, max; @@ -659,7 +659,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id) n = (p->layer+1) * IDR_BITS; - id &= MAX_ID_MASK; + id &= MAX_IDR_MASK; if (id >= (1 << n)) return ERR_PTR(-EINVAL); @@ -780,7 +780,7 @@ EXPORT_SYMBOL(ida_pre_get); */ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) { - struct idr_layer *pa[MAX_LEVEL]; + struct idr_layer *pa[MAX_IDR_LEVEL]; struct ida_bitmap *bitmap; unsigned long flags; int idr_id = starting_id / IDA_BITMAP_BITS; @@ -793,7 +793,7 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) if (t < 0) return _idr_rc_to_errno(t); - if (t * IDA_BITMAP_BITS >= MAX_ID_BIT) + if (t * IDA_BITMAP_BITS >= MAX_IDR_BIT) return -ENOSPC; if (t != idr_id) @@ -827,7 +827,7 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) } id = idr_id * IDA_BITMAP_BITS + t; - if (id >= MAX_ID_BIT) + if (id >= MAX_IDR_BIT) return -ENOSPC; __set_bit(t, bitmap->bitmap); -- cgit v1.2.3 From 03a7beb55b9fad363f0dd33e72ccf2d3e1c2a406 Mon Sep 17 00:00:00 2001 From: "Paton J. Lewis" Date: Thu, 4 Oct 2012 17:13:39 -0700 Subject: epoll: support for disabling items, and a self-test app Enhanced epoll_ctl to support EPOLL_CTL_DISABLE, which disables an epoll item. If epoll_ctl doesn't return -EBUSY in this case, it is then safe to delete the epoll item in a multi-threaded environment. Also added a new test_epoll self- test app to both demonstrate the need for this feature and test it. Signed-off-by: Paton J. Lewis Cc: Alexander Viro Cc: Jason Baron Cc: Paul Holland Cc: Davide Libenzi Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 38 +++- include/linux/eventpoll.h | 1 + tools/testing/selftests/Makefile | 2 +- tools/testing/selftests/epoll/Makefile | 11 + tools/testing/selftests/epoll/test_epoll.c | 344 +++++++++++++++++++++++++++++ 5 files changed, 392 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/epoll/Makefile create mode 100644 tools/testing/selftests/epoll/test_epoll.c (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index cd96649bfe62..da72250ddc1c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p) /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ static inline int ep_op_has_event(int op) { - return op != EPOLL_CTL_DEL; + return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD; } /* Initialize the poll safe wake up structure */ @@ -676,6 +676,34 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) return 0; } +/* + * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item + * had no event flags set, indicating that another thread may be currently + * handling that item's events (in the case that EPOLLONESHOT was being + * used). Otherwise a zero result indicates that the item has been disabled + * from receiving events. A disabled item may be re-enabled via + * EPOLL_CTL_MOD. Must be called with "mtx" held. + */ +static int ep_disable(struct eventpoll *ep, struct epitem *epi) +{ + int result = 0; + unsigned long flags; + + spin_lock_irqsave(&ep->lock, flags); + if (epi->event.events & ~EP_PRIVATE_BITS) { + if (ep_is_linked(&epi->rdllink)) + list_del_init(&epi->rdllink); + /* Ensure ep_poll_callback will not add epi back onto ready + list: */ + epi->event.events &= EP_PRIVATE_BITS; + } + else + result = -EBUSY; + spin_unlock_irqrestore(&ep->lock, flags); + + return result; +} + static void ep_free(struct eventpoll *ep) { struct rb_node *rbp; @@ -1020,8 +1048,6 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) rb_insert_color(&epi->rbn, &ep->rbr); } - - #define PATH_ARR_SIZE 5 /* * These are the number paths of length 1 to 5, that we are allowing to emanate @@ -1787,6 +1813,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, } else error = -ENOENT; break; + case EPOLL_CTL_DISABLE: + if (epi) + error = ep_disable(ep, epi); + else + error = -ENOENT; + break; } mutex_unlock(&ep->mtx); diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index f4bb378ccf6a..41085d0f3955 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -25,6 +25,7 @@ #define EPOLL_CTL_ADD 1 #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +#define EPOLL_CTL_DISABLE 4 /* * Request the handling of system wakeup events so as to prevent system suspends diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 85baf11e2acd..43480149119e 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,4 +1,4 @@ -TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug +TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug epoll all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/epoll/Makefile b/tools/testing/selftests/epoll/Makefile new file mode 100644 index 000000000000..19806ed62f50 --- /dev/null +++ b/tools/testing/selftests/epoll/Makefile @@ -0,0 +1,11 @@ +# Makefile for epoll selftests + +all: test_epoll +%: %.c + gcc -pthread -g -o $@ $^ + +run_tests: all + ./test_epoll + +clean: + $(RM) test_epoll diff --git a/tools/testing/selftests/epoll/test_epoll.c b/tools/testing/selftests/epoll/test_epoll.c new file mode 100644 index 000000000000..e0fcff1e8331 --- /dev/null +++ b/tools/testing/selftests/epoll/test_epoll.c @@ -0,0 +1,344 @@ +/* + * tools/testing/selftests/epoll/test_epoll.c + * + * Copyright 2012 Adobe Systems Incorporated + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Paton J. Lewis + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * A pointer to an epoll_item_private structure will be stored in the epoll + * item's event structure so that we can get access to the epoll_item_private + * data after calling epoll_wait: + */ +struct epoll_item_private { + int index; /* Position of this struct within the epoll_items array. */ + int fd; + uint32_t events; + pthread_mutex_t mutex; /* Guards the following variables... */ + int stop; + int status; /* Stores any error encountered while handling item. */ + /* The following variable allows us to test whether we have encountered + a problem while attempting to cancel and delete the associated + event. When the test program exits, 'deleted' should be exactly + one. If it is greater than one, then the failed test reflects a real + world situation where we would have tried to access the epoll item's + private data after deleting it: */ + int deleted; +}; + +struct epoll_item_private *epoll_items; + +/* + * Delete the specified item from the epoll set. In a real-world secneario this + * is where we would free the associated data structure, but in this testing + * environment we retain the structure so that we can test for double-deletion: + */ +void delete_item(int index) +{ + __sync_fetch_and_add(&epoll_items[index].deleted, 1); +} + +/* + * A pointer to a read_thread_data structure will be passed as the argument to + * each read thread: + */ +struct read_thread_data { + int stop; + int status; /* Indicates any error encountered by the read thread. */ + int epoll_set; +}; + +/* + * The function executed by the read threads: + */ +void *read_thread_function(void *function_data) +{ + struct read_thread_data *thread_data = + (struct read_thread_data *)function_data; + struct epoll_event event_data; + struct epoll_item_private *item_data; + char socket_data; + + /* Handle events until we encounter an error or this thread's 'stop' + condition is set: */ + while (1) { + int result = epoll_wait(thread_data->epoll_set, + &event_data, + 1, /* Number of desired events */ + 1000); /* Timeout in ms */ + if (result < 0) { + /* Breakpoints signal all threads. Ignore that while + debugging: */ + if (errno == EINTR) + continue; + thread_data->status = errno; + return 0; + } else if (thread_data->stop) + return 0; + else if (result == 0) /* Timeout */ + continue; + + /* We need the mutex here because checking for the stop + condition and re-enabling the epoll item need to be done + together as one atomic operation when EPOLL_CTL_DISABLE is + available: */ + item_data = (struct epoll_item_private *)event_data.data.ptr; + pthread_mutex_lock(&item_data->mutex); + + /* Remove the item from the epoll set if we want to stop + handling that event: */ + if (item_data->stop) + delete_item(item_data->index); + else { + /* Clear the data that was written to the other end of + our non-blocking socket: */ + do { + if (read(item_data->fd, &socket_data, 1) < 1) { + if ((errno == EAGAIN) || + (errno == EWOULDBLOCK)) + break; + else + goto error_unlock; + } + } while (item_data->events & EPOLLET); + + /* The item was one-shot, so re-enable it: */ + event_data.events = item_data->events; + if (epoll_ctl(thread_data->epoll_set, + EPOLL_CTL_MOD, + item_data->fd, + &event_data) < 0) + goto error_unlock; + } + + pthread_mutex_unlock(&item_data->mutex); + } + +error_unlock: + thread_data->status = item_data->status = errno; + pthread_mutex_unlock(&item_data->mutex); + return 0; +} + +/* + * A pointer to a write_thread_data structure will be passed as the argument to + * the write thread: + */ +struct write_thread_data { + int stop; + int status; /* Indicates any error encountered by the write thread. */ + int n_fds; + int *fds; +}; + +/* + * The function executed by the write thread. It writes a single byte to each + * socket in turn until the stop condition for this thread is set. If writing to + * a socket would block (i.e. errno was EAGAIN), we leave that socket alone for + * the moment and just move on to the next socket in the list. We don't care + * about the order in which we deliver events to the epoll set. In fact we don't + * care about the data we're writing to the pipes at all; we just want to + * trigger epoll events: + */ +void *write_thread_function(void *function_data) +{ + const char data = 'X'; + int index; + struct write_thread_data *thread_data = + (struct write_thread_data *)function_data; + while (!write_thread_data->stop) + for (index = 0; + !thread_data->stop && (index < thread_data->n_fds); + ++index) + if ((write(thread_data->fds[index], &data, 1) < 1) && + (errno != EAGAIN) && + (errno != EWOULDBLOCK)) { + write_thread_data->status = errno; + return; + } +} + +/* + * Arguments are currently ignored: + */ +int main(int argc, char **argv) +{ + const int n_read_threads = 100; + const int n_epoll_items = 500; + int index; + int epoll_set = epoll_create1(0); + struct write_thread_data write_thread_data = { + 0, 0, n_epoll_items, malloc(n_epoll_items * sizeof(int)) + }; + struct read_thread_data *read_thread_data = + malloc(n_read_threads * sizeof(struct read_thread_data)); + pthread_t *read_threads = malloc(n_read_threads * sizeof(pthread_t)); + pthread_t write_thread; + + printf("-----------------\n"); + printf("Runing test_epoll\n"); + printf("-----------------\n"); + + epoll_items = malloc(n_epoll_items * sizeof(struct epoll_item_private)); + + if (epoll_set < 0 || epoll_items == 0 || write_thread_data.fds == 0 || + read_thread_data == 0 || read_threads == 0) + goto error; + + if (sysconf(_SC_NPROCESSORS_ONLN) < 2) { + printf("Error: please run this test on a multi-core system.\n"); + goto error; + } + + /* Create the socket pairs and epoll items: */ + for (index = 0; index < n_epoll_items; ++index) { + int socket_pair[2]; + struct epoll_event event_data; + if (socketpair(AF_UNIX, + SOCK_STREAM | SOCK_NONBLOCK, + 0, + socket_pair) < 0) + goto error; + write_thread_data.fds[index] = socket_pair[0]; + epoll_items[index].index = index; + epoll_items[index].fd = socket_pair[1]; + if (pthread_mutex_init(&epoll_items[index].mutex, NULL) != 0) + goto error; + /* We always use EPOLLONESHOT because this test is currently + structured to demonstrate the need for EPOLL_CTL_DISABLE, + which only produces useful information in the EPOLLONESHOT + case (without EPOLLONESHOT, calling epoll_ctl with + EPOLL_CTL_DISABLE will never return EBUSY). If support for + testing events without EPOLLONESHOT is desired, it should + probably be implemented in a separate unit test. */ + epoll_items[index].events = EPOLLIN | EPOLLONESHOT; + if (index < n_epoll_items / 2) + epoll_items[index].events |= EPOLLET; + epoll_items[index].stop = 0; + epoll_items[index].status = 0; + epoll_items[index].deleted = 0; + event_data.events = epoll_items[index].events; + event_data.data.ptr = &epoll_items[index]; + if (epoll_ctl(epoll_set, + EPOLL_CTL_ADD, + epoll_items[index].fd, + &event_data) < 0) + goto error; + } + + /* Create and start the read threads: */ + for (index = 0; index < n_read_threads; ++index) { + read_thread_data[index].stop = 0; + read_thread_data[index].status = 0; + read_thread_data[index].epoll_set = epoll_set; + if (pthread_create(&read_threads[index], + NULL, + read_thread_function, + &read_thread_data[index]) != 0) + goto error; + } + + if (pthread_create(&write_thread, + NULL, + write_thread_function, + &write_thread_data) != 0) + goto error; + + /* Cancel all event pollers: */ +#ifdef EPOLL_CTL_DISABLE + for (index = 0; index < n_epoll_items; ++index) { + pthread_mutex_lock(&epoll_items[index].mutex); + ++epoll_items[index].stop; + if (epoll_ctl(epoll_set, + EPOLL_CTL_DISABLE, + epoll_items[index].fd, + NULL) == 0) + delete_item(index); + else if (errno != EBUSY) { + pthread_mutex_unlock(&epoll_items[index].mutex); + goto error; + } + /* EBUSY means events were being handled; allow the other thread + to delete the item. */ + pthread_mutex_unlock(&epoll_items[index].mutex); + } +#else + for (index = 0; index < n_epoll_items; ++index) { + pthread_mutex_lock(&epoll_items[index].mutex); + ++epoll_items[index].stop; + pthread_mutex_unlock(&epoll_items[index].mutex); + /* Wait in case a thread running read_thread_function is + currently executing code between epoll_wait and + pthread_mutex_lock with this item. Note that a longer delay + would make double-deletion less likely (at the expense of + performance), but there is no guarantee that any delay would + ever be sufficient. Note also that we delete all event + pollers at once for testing purposes, but in a real-world + environment we are likely to want to be able to cancel event + pollers at arbitrary times. Therefore we can't improve this + situation by just splitting this loop into two loops + (i.e. signal 'stop' for all items, sleep, and then delete all + items). We also can't fix the problem via EPOLL_CTL_DEL + because that command can't prevent the case where some other + thread is executing read_thread_function within the region + mentioned above: */ + usleep(1); + pthread_mutex_lock(&epoll_items[index].mutex); + if (!epoll_items[index].deleted) + delete_item(index); + pthread_mutex_unlock(&epoll_items[index].mutex); + } +#endif + + /* Shut down the read threads: */ + for (index = 0; index < n_read_threads; ++index) + __sync_fetch_and_add(&read_thread_data[index].stop, 1); + for (index = 0; index < n_read_threads; ++index) { + if (pthread_join(read_threads[index], NULL) != 0) + goto error; + if (read_thread_data[index].status) + goto error; + } + + /* Shut down the write thread: */ + __sync_fetch_and_add(&write_thread_data.stop, 1); + if ((pthread_join(write_thread, NULL) != 0) || write_thread_data.status) + goto error; + + /* Check for final error conditions: */ + for (index = 0; index < n_epoll_items; ++index) { + if (epoll_items[index].status != 0) + goto error; + if (pthread_mutex_destroy(&epoll_items[index].mutex) < 0) + goto error; + } + for (index = 0; index < n_epoll_items; ++index) + if (epoll_items[index].deleted != 1) { + printf("Error: item data deleted %1d times.\n", + epoll_items[index].deleted); + goto error; + } + + printf("[PASS]\n"); + return 0; + + error: + printf("[FAIL]\n"); + return errno; +} -- cgit v1.2.3 From 6eec482f47a8e8888132b05575dea352187278cb Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 4 Oct 2012 17:13:42 -0700 Subject: binfmt_elf: Uninitialized variable load_elf_interp() has interp_map_addr carefully described as "uninitialized_var" and marked so as to avoid a warning. However if you trace the code it is passed into load_elf_interp and then this value is checked against NULL. As this return value isn't used this is actually safe but it freaks various analysis tools that see un-initialized memory addresses being read before their value is ever defined. Set it to NULL as a matter of programming good taste if nothing else Signed-off-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0225fddf49b7..7ef5f9fe2729 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -881,7 +881,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) } if (elf_interpreter) { - unsigned long uninitialized_var(interp_map_addr); + unsigned long interp_map_addr = 0; elf_entry = load_elf_interp(&loc->interp_elf_ex, interpreter, -- cgit v1.2.3 From 32daab969cc16e263a544d9330972b25a0081851 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 4 Oct 2012 17:14:39 -0700 Subject: hpfs: convert to use leXX_add_cpu() Convert cpu_to_leXX(leXX_to_cpu(E1) + E2) to use leXX_add_cpu(). dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Cc: Mikulas Patocka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hpfs/anode.c | 6 +++--- fs/hpfs/dnode.c | 28 ++++++++++++++-------------- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c index 4bae4a4a60b1..2d5b254ad9e2 100644 --- a/fs/hpfs/anode.c +++ b/fs/hpfs/anode.c @@ -102,7 +102,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi return -1; } if (hpfs_alloc_if_possible(s, se = le32_to_cpu(btree->u.external[n].disk_secno) + le32_to_cpu(btree->u.external[n].length))) { - btree->u.external[n].length = cpu_to_le32(le32_to_cpu(btree->u.external[n].length) + 1); + le32_add_cpu(&btree->u.external[n].length, 1); mark_buffer_dirty(bh); brelse(bh); return se; @@ -153,7 +153,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi btree = &anode->btree; } btree->n_free_nodes--; n = btree->n_used_nodes++; - btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 12); + le16_add_cpu(&btree->first_free, 12); btree->u.external[n].disk_secno = cpu_to_le32(se); btree->u.external[n].file_secno = cpu_to_le32(fs); btree->u.external[n].length = cpu_to_le32(1); @@ -174,7 +174,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } if (btree->n_free_nodes) { btree->n_free_nodes--; n = btree->n_used_nodes++; - btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 8); + le16_add_cpu(&btree->first_free, 8); btree->u.internal[n].file_secno = cpu_to_le32(-1); btree->u.internal[n].down = cpu_to_le32(na); btree->u.internal[n-1].file_secno = cpu_to_le32(fs); diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c index 3228c524ebe5..4364b2a02c5d 100644 --- a/fs/hpfs/dnode.c +++ b/fs/hpfs/dnode.c @@ -145,10 +145,10 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno } } if (ptr) { - d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + 4); + le32_add_cpu(&d->first_free, 4); if (le32_to_cpu(d->first_free) > 2048) { hpfs_error(s, "set_last_pointer: too long dnode %08x", le32_to_cpu(d->self)); - d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - 4); + le32_add_cpu(&d->first_free, -4); return; } de->length = cpu_to_le16(36); @@ -184,7 +184,7 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d, de->not_8x3 = hpfs_is_name_long(name, namelen); de->namelen = namelen; memcpy(de->name, name, namelen); - d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + d_size); + le32_add_cpu(&d->first_free, d_size); return de; } @@ -314,7 +314,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno, set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0); de = de_next_de(de); memmove((char *)nd + 20, de, le32_to_cpu(nd->first_free) + (char *)nd - (char *)de); - nd->first_free = cpu_to_le32(le32_to_cpu(nd->first_free) - ((char *)de - (char *)nd - 20)); + le32_add_cpu(&nd->first_free, -((char *)de - (char *)nd - 20)); memcpy(d, nd, le32_to_cpu(nd->first_free)); for_all_poss(i, hpfs_pos_del, (loff_t)dno << 4, pos); fix_up_ptrs(i->i_sb, ad); @@ -474,8 +474,8 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to) hpfs_brelse4(&qbh); return 0; } - dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4); - de->length = cpu_to_le16(le16_to_cpu(de->length) - 4); + le32_add_cpu(&dnode->first_free, -4); + le16_add_cpu(&de->length, -4); de->down = 0; hpfs_mark_4buffers_dirty(&qbh); dno = up; @@ -570,8 +570,8 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, ((loff_t)up << 4) | p); if (!down) { de->down = 0; - de->length = cpu_to_le16(le16_to_cpu(de->length) - 4); - dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4); + le16_add_cpu(&de->length, -4); + le32_add_cpu(&dnode->first_free, -4); memmove(de_next_de(de), (char *)de_next_de(de) + 4, (char *)dnode + le32_to_cpu(dnode->first_free) - (char *)de_next_de(de)); } else { @@ -647,14 +647,14 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n"); printk("HPFS: warning: goin'on\n"); } - del->length = cpu_to_le16(le16_to_cpu(del->length) + 4); + le16_add_cpu(&del->length, 4); del->down = 1; - d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) + 4); + le32_add_cpu(&d1->first_free, 4); } if (dlp && !down) { - del->length = cpu_to_le16(le16_to_cpu(del->length) - 4); + le16_add_cpu(&del->length, -4); del->down = 0; - d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4); + le32_add_cpu(&d1->first_free, -4); } else if (down) *(__le32 *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); } else goto endm; @@ -668,9 +668,9 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) memcpy(de_cp, de_prev, le16_to_cpu(de_prev->length)); hpfs_delete_de(i->i_sb, dnode, de_prev); if (!de_prev->down) { - de_prev->length = cpu_to_le16(le16_to_cpu(de_prev->length) + 4); + le16_add_cpu(&de_prev->length, 4); de_prev->down = 1; - dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4); + le32_add_cpu(&dnode->first_free, 4); } *(__le32 *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown); hpfs_mark_4buffers_dirty(&qbh); -- cgit v1.2.3 From 4b63709861e431e73f0be6b83f420fdd8fc518f5 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 4 Oct 2012 17:14:41 -0700 Subject: fat: use accessor function for msdos_dir_entry 'start' Use accessor function for msdos_dir_entry 'start' Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fat/dir.c b/fs/fat/dir.c index dc49ed2cbffa..d70d8f31f704 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -1141,10 +1141,8 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) de[0].ctime_cs = de[1].ctime_cs = 0; de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0; } - de[0].start = cpu_to_le16(cluster); - de[0].starthi = cpu_to_le16(cluster >> 16); - de[1].start = cpu_to_le16(MSDOS_I(dir)->i_logstart); - de[1].starthi = cpu_to_le16(MSDOS_I(dir)->i_logstart >> 16); + fat_set_start(&de[0], cluster); + fat_set_start(&de[1], MSDOS_I(dir)->i_logstart); de[0].size = de[1].size = 0; memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de)); set_buffer_uptodate(bhs[0]); -- cgit v1.2.3 From 21b6633d516c4f5d03ec02ede6374e320191003f Mon Sep 17 00:00:00 2001 From: "Steven J. Magnani" Date: Thu, 4 Oct 2012 17:14:44 -0700 Subject: fat (exportfs): move NFS support code Under memory pressure, the system may evict dentries from cache. When the FAT driver receives a NFS request involving an evicted dentry, it is unable to reconnect it to the filesystem root. This causes the request to fail, often with ENOENT. This is partially due to ineffectiveness of the current FAT NFS implementation, and partially due to an unimplemented fh_to_parent method. The latter can cause file accesses to fail on shares exported with subtree_check. This patch set provides the FAT driver with the ability to reconnect dentries. NFS file handle generation and lookups are simplified and made congruent with ext2. Testing has involved a memory-starved virtual machine running 3.5-rc5 that exports a ~2 GB vfat filesystem containing a kernel tree (~770 MB, ~40000 files, 9 levels). Both 'cp -r' and 'ls -lR' operations were performed from a client, some overlapping, some consecutive. Exports with 'subtree_check' and 'no_subtree_check' have been tested. Note that while this patch set improves FAT's NFS support, it does not eliminate ESTALE errors completely. The following should be considered for NFS clients who are sensitive to ESTALE: * Mounting with lookupcache=none Unfortunately this can degrade performance severely, particularly for deep filesystems. * Incorporating VFS patches to retry ESTALE failures on the client-side, such as https://lkml.org/lkml/2012/6/29/381 * Handling ESTALE errors in client application code This patch: Move NFS-related code into its own C file. No functional changes. Signed-off-by: Steven J. Magnani Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/Makefile | 2 +- fs/fat/fat.h | 22 +++++++++ fs/fat/inode.c | 130 ------------------------------------------------ fs/fat/nfs.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 174 insertions(+), 131 deletions(-) create mode 100644 fs/fat/nfs.c (limited to 'fs') diff --git a/fs/fat/Makefile b/fs/fat/Makefile index e06190322c1c..964b634f6667 100644 --- a/fs/fat/Makefile +++ b/fs/fat/Makefile @@ -6,6 +6,6 @@ obj-$(CONFIG_FAT_FS) += fat.o obj-$(CONFIG_VFAT_FS) += vfat.o obj-$(CONFIG_MSDOS_FS) += msdos.o -fat-y := cache.o dir.o fatent.o file.o inode.o misc.o +fat-y := cache.o dir.o fatent.o file.o inode.o misc.o nfs.o vfat-y := namei_vfat.o msdos-y := namei_msdos.o diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 7d8e0dcac5d5..fb95939ff870 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -341,6 +341,20 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent, extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); +static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, + struct inode *inode) +{ + loff_t i_pos; +#if BITS_PER_LONG == 32 + spin_lock(&sbi->inode_hash_lock); +#endif + i_pos = MSDOS_I(inode)->i_pos; +#if BITS_PER_LONG == 32 + spin_unlock(&sbi->inode_hash_lock); +#endif + return i_pos; +} + /* fat/misc.c */ extern __printf(3, 4) __cold void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...); @@ -366,6 +380,14 @@ extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); int fat_cache_init(void); void fat_cache_destroy(void); +/* fat/nfs.c */ +struct fid; +extern int fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, + struct inode *parent); +extern struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); +extern struct dentry *fat_get_parent(struct dentry *child_dir); + /* helper for printk */ typedef unsigned long long llu; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 4e5a6ac54ebd..169f6ebddf96 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -562,20 +562,6 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, - struct inode *inode) -{ - loff_t i_pos; -#if BITS_PER_LONG == 32 - spin_lock(&sbi->inode_hash_lock); -#endif - i_pos = MSDOS_I(inode)->i_pos; -#if BITS_PER_LONG == 32 - spin_unlock(&sbi->inode_hash_lock); -#endif - return i_pos; -} - static int __fat_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; @@ -668,122 +654,6 @@ static const struct super_operations fat_sops = { .show_options = fat_show_options, }; -/* - * a FAT file handle with fhtype 3 is - * 0/ i_ino - for fast, reliable lookup if still in the cache - * 1/ i_generation - to see if i_ino is still valid - * bit 0 == 0 iff directory - * 2/ i_pos(8-39) - if ino has changed, but still in cache - * 3/ i_pos(4-7)|i_logstart - to semi-verify inode found at i_pos - * 4/ i_pos(0-3)|parent->i_logstart - maybe used to hunt for the file on disc - * - * Hack for NFSv2: Maximum FAT entry number is 28bits and maximum - * i_pos is 40bits (blocknr(32) + dir offset(8)), so two 4bits - * of i_logstart is used to store the directory entry offset. - */ - -static struct dentry *fat_fh_to_dentry(struct super_block *sb, - struct fid *fid, int fh_len, int fh_type) -{ - struct inode *inode = NULL; - u32 *fh = fid->raw; - - if (fh_len < 5 || fh_type != 3) - return NULL; - - inode = ilookup(sb, fh[0]); - if (!inode || inode->i_generation != fh[1]) { - if (inode) - iput(inode); - inode = NULL; - } - if (!inode) { - loff_t i_pos; - int i_logstart = fh[3] & 0x0fffffff; - - i_pos = (loff_t)fh[2] << 8; - i_pos |= ((fh[3] >> 24) & 0xf0) | (fh[4] >> 28); - - /* try 2 - see if i_pos is in F-d-c - * require i_logstart to be the same - * Will fail if you truncate and then re-write - */ - - inode = fat_iget(sb, i_pos); - if (inode && MSDOS_I(inode)->i_logstart != i_logstart) { - iput(inode); - inode = NULL; - } - } - - /* - * For now, do nothing if the inode is not found. - * - * What we could do is: - * - * - follow the file starting at fh[4], and record the ".." entry, - * and the name of the fh[2] entry. - * - then follow the ".." file finding the next step up. - * - * This way we build a path to the root of the tree. If this works, we - * lookup the path and so get this inode into the cache. Finally try - * the fat_iget lookup again. If that fails, then we are totally out - * of luck. But all that is for another day - */ - return d_obtain_alias(inode); -} - -static int -fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent) -{ - int len = *lenp; - struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); - loff_t i_pos; - - if (len < 5) { - *lenp = 5; - return 255; /* no room */ - } - - i_pos = fat_i_pos_read(sbi, inode); - *lenp = 5; - fh[0] = inode->i_ino; - fh[1] = inode->i_generation; - fh[2] = i_pos >> 8; - fh[3] = ((i_pos & 0xf0) << 24) | MSDOS_I(inode)->i_logstart; - fh[4] = (i_pos & 0x0f) << 28; - if (parent) - fh[4] |= MSDOS_I(parent)->i_logstart; - return 3; -} - -static struct dentry *fat_get_parent(struct dentry *child) -{ - struct super_block *sb = child->d_sb; - struct buffer_head *bh; - struct msdos_dir_entry *de; - loff_t i_pos; - struct dentry *parent; - struct inode *inode; - int err; - - lock_super(sb); - - err = fat_get_dotdot_entry(child->d_inode, &bh, &de, &i_pos); - if (err) { - parent = ERR_PTR(err); - goto out; - } - inode = fat_build_inode(sb, de, i_pos); - brelse(bh); - - parent = d_obtain_alias(inode); -out: - unlock_super(sb); - - return parent; -} - static const struct export_operations fat_export_ops = { .encode_fh = fat_encode_fh, .fh_to_dentry = fat_fh_to_dentry, diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c new file mode 100644 index 000000000000..21609a1e9355 --- /dev/null +++ b/fs/fat/nfs.c @@ -0,0 +1,151 @@ +/* fs/fat/nfs.c + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include "fat.h" + +/* + * a FAT file handle with fhtype 3 is + * 0/ i_ino - for fast, reliable lookup if still in the cache + * 1/ i_generation - to see if i_ino is still valid + * bit 0 == 0 iff directory + * 2/ i_pos(8-39) - if ino has changed, but still in cache + * 3/ i_pos(4-7)|i_logstart - to semi-verify inode found at i_pos + * 4/ i_pos(0-3)|parent->i_logstart - maybe used to hunt for the file on disc + * + * Hack for NFSv2: Maximum FAT entry number is 28bits and maximum + * i_pos is 40bits (blocknr(32) + dir offset(8)), so two 4bits + * of i_logstart is used to store the directory entry offset. + */ + +int +fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent) +{ + int len = *lenp; + struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + loff_t i_pos; + + if (len < 5) { + *lenp = 5; + return 255; /* no room */ + } + + i_pos = fat_i_pos_read(sbi, inode); + *lenp = 5; + fh[0] = inode->i_ino; + fh[1] = inode->i_generation; + fh[2] = i_pos >> 8; + fh[3] = ((i_pos & 0xf0) << 24) | MSDOS_I(inode)->i_logstart; + fh[4] = (i_pos & 0x0f) << 28; + if (parent) + fh[4] |= MSDOS_I(parent)->i_logstart; + return 3; +} + +static int fat_is_valid_fh(int fh_len, int fh_type) +{ + return ((fh_len >= 5) && (fh_type == 3)); +} + +/** + * Map a NFS file handle to a corresponding dentry. + * The dentry may or may not be connected to the filesystem root. + */ +struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + struct inode *inode = NULL; + u32 *fh = fid->raw; + loff_t i_pos; + unsigned long i_ino; + __u32 i_generation; + int i_logstart; + + if (!fat_is_valid_fh(fh_len, fh_type)) + return NULL; + + i_ino = fh[0]; + i_generation = fh[1]; + i_logstart = fh[3] & 0x0fffffff; + + /* Try i_ino lookup first - fastest and most reliable */ + inode = ilookup(sb, i_ino); + if (inode && (inode->i_generation != i_generation)) { + iput(inode); + inode = NULL; + } + if (!inode) { + i_pos = (loff_t)fh[2] << 8; + i_pos |= ((fh[3] >> 24) & 0xf0) | (fh[4] >> 28); + + /* try 2 - see if i_pos is in F-d-c + * require i_logstart to be the same + * Will fail if you truncate and then re-write + */ + + inode = fat_iget(sb, i_pos); + if (inode && MSDOS_I(inode)->i_logstart != i_logstart) { + iput(inode); + inode = NULL; + } + } + + /* + * For now, do nothing if the inode is not found. + * + * What we could do is: + * + * - follow the file starting at fh[4], and record the ".." entry, + * and the name of the fh[2] entry. + * - then follow the ".." file finding the next step up. + * + * This way we build a path to the root of the tree. If this works, we + * lookup the path and so get this inode into the cache. Finally try + * the fat_iget lookup again. If that fails, then we are totally out + * of luck. But all that is for another day + */ + return d_obtain_alias(inode); +} + +/* + * Find the parent for a directory that is not currently connected to + * the filesystem root. + * + * On entry, the caller holds child_dir->d_inode->i_mutex. + */ +struct dentry *fat_get_parent(struct dentry *child_dir) +{ + struct super_block *sb = child_dir->d_sb; + struct buffer_head *bh = NULL; + struct msdos_dir_entry *de; + loff_t i_pos; + struct dentry *parent; + struct inode *inode; + int err; + + lock_super(sb); + + err = fat_get_dotdot_entry(child_dir->d_inode, &bh, &de, &i_pos); + if (err) { + parent = ERR_PTR(err); + goto out; + } + inode = fat_build_inode(sb, de, i_pos); + + parent = d_obtain_alias(inode); +out: + brelse(bh); + unlock_super(sb); + + return parent; +} -- cgit v1.2.3 From 7669e8fb09da47dd45c07a51394f01031ea81da8 Mon Sep 17 00:00:00 2001 From: "Steven J. Magnani" Date: Thu, 4 Oct 2012 17:14:45 -0700 Subject: fat (exportfs): fix dentry reconnection Maintain an index of directory inodes by starting cluster, so that fat_get_parent() can return the proper cached inode rather than inventing one that cannot be traced back to the filesystem root. Add a new msdos/vfat binary mount option "nfs" so that FAT filesystems that are _not_ exported via NFS are not saddled with maintenance of an index they will never use. Finally, simplify NFS file handle generation and lookups. An ext2-congruent implementation is adequate for FAT needs. Signed-off-by: Steven J. Magnani Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 20 +++---- fs/fat/fat.h | 27 +++++----- fs/fat/inode.c | 71 +++++++++++++++++++++--- fs/fat/namei_msdos.c | 5 +- fs/fat/namei_vfat.c | 5 +- fs/fat/nfs.c | 150 +++++++++++++++++---------------------------------- 6 files changed, 141 insertions(+), 137 deletions(-) (limited to 'fs') diff --git a/fs/fat/dir.c b/fs/fat/dir.c index d70d8f31f704..55e088cc0613 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -872,21 +872,23 @@ static int fat_get_short_entry(struct inode *dir, loff_t *pos, } /* - * The ".." entry can not provide the "struct fat_slot_info" informations - * for inode. So, this function provide the some informations only. + * The ".." entry can not provide the "struct fat_slot_info" information + * for inode, nor a usable i_pos. So, this function provides some information + * only. + * + * Since this function walks through the on-disk inodes within a directory, + * callers are responsible for taking any locks necessary to prevent the + * directory from changing. */ int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, - struct msdos_dir_entry **de, loff_t *i_pos) + struct msdos_dir_entry **de) { - loff_t offset; + loff_t offset = 0; - offset = 0; - *bh = NULL; + *de = NULL; while (fat_get_short_entry(dir, &offset, bh, de) >= 0) { - if (!strncmp((*de)->name, MSDOS_DOTDOT, MSDOS_NAME)) { - *i_pos = fat_make_i_pos(dir->i_sb, *bh, *de); + if (!strncmp((*de)->name, MSDOS_DOTDOT, MSDOS_NAME)) return 0; - } } return -ENOENT; } diff --git a/fs/fat/fat.h b/fs/fat/fat.h index fb95939ff870..ec54c3a7f2f7 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,8 @@ struct fat_mount_options { usefree:1, /* Use free_clusters for FAT32 */ tz_utc:1, /* Filesystem timestamps are in UTC */ rodir:1, /* allow ATTR_RO for directory */ - discard:1; /* Issue discard requests on deletions */ + discard:1, /* Issue discard requests on deletions */ + nfs:1; /* Do extra work needed for NFS export */ }; #define FAT_HASH_BITS 8 @@ -88,6 +90,9 @@ struct msdos_sb_info { spinlock_t inode_hash_lock; struct hlist_head inode_hashtable[FAT_HASH_SIZE]; + + spinlock_t dir_hash_lock; + struct hlist_head dir_hashtable[FAT_HASH_SIZE]; }; #define FAT_CACHE_VALID 0 /* special case for valid cache */ @@ -110,6 +115,7 @@ struct msdos_inode_info { int i_attrs; /* unused attribute bits */ loff_t i_pos; /* on-disk position of directory entry or 0 */ struct hlist_node i_fat_hash; /* hash by i_location */ + struct hlist_node i_dir_hash; /* hash by i_logstart */ struct rw_semaphore truncate_lock; /* protect bmap against truncate */ struct inode vfs_inode; }; @@ -262,7 +268,7 @@ extern int fat_subdirs(struct inode *dir); extern int fat_scan(struct inode *dir, const unsigned char *name, struct fat_slot_info *sinfo); extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, - struct msdos_dir_entry **de, loff_t *i_pos); + struct msdos_dir_entry **de); extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, struct fat_slot_info *sinfo); @@ -341,18 +347,9 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent, extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); -static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, - struct inode *inode) +static inline unsigned long fat_dir_hash(int logstart) { - loff_t i_pos; -#if BITS_PER_LONG == 32 - spin_lock(&sbi->inode_hash_lock); -#endif - i_pos = MSDOS_I(inode)->i_pos; -#if BITS_PER_LONG == 32 - spin_unlock(&sbi->inode_hash_lock); -#endif - return i_pos; + return hash_32(logstart, FAT_HASH_BITS); } /* fat/misc.c */ @@ -382,10 +379,10 @@ void fat_cache_destroy(void); /* fat/nfs.c */ struct fid; -extern int fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, - struct inode *parent); extern struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); +extern struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); extern struct dentry *fat_get_parent(struct dentry *child_dir); /* helper for printk */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 169f6ebddf96..5741b11650f1 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -281,15 +281,42 @@ static inline unsigned long fat_hash(loff_t i_pos) return hash_32(i_pos, FAT_HASH_BITS); } +static void dir_hash_init(struct super_block *sb) +{ + struct msdos_sb_info *sbi = MSDOS_SB(sb); + int i; + + spin_lock_init(&sbi->dir_hash_lock); + for (i = 0; i < FAT_HASH_SIZE; i++) + INIT_HLIST_HEAD(&sbi->dir_hashtable[i]); +} + void fat_attach(struct inode *inode, loff_t i_pos) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); - struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); - spin_lock(&sbi->inode_hash_lock); - MSDOS_I(inode)->i_pos = i_pos; - hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head); - spin_unlock(&sbi->inode_hash_lock); + if (inode->i_ino != MSDOS_ROOT_INO) { + struct hlist_head *head = sbi->inode_hashtable + + fat_hash(i_pos); + + spin_lock(&sbi->inode_hash_lock); + MSDOS_I(inode)->i_pos = i_pos; + hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head); + spin_unlock(&sbi->inode_hash_lock); + } + + /* If NFS support is enabled, cache the mapping of start cluster + * to directory inode. This is used during reconnection of + * dentries to the filesystem root. + */ + if (S_ISDIR(inode->i_mode) && sbi->options.nfs) { + struct hlist_head *d_head = sbi->dir_hashtable; + d_head += fat_dir_hash(MSDOS_I(inode)->i_logstart); + + spin_lock(&sbi->dir_hash_lock); + hlist_add_head(&MSDOS_I(inode)->i_dir_hash, d_head); + spin_unlock(&sbi->dir_hash_lock); + } } EXPORT_SYMBOL_GPL(fat_attach); @@ -300,6 +327,12 @@ void fat_detach(struct inode *inode) MSDOS_I(inode)->i_pos = 0; hlist_del_init(&MSDOS_I(inode)->i_fat_hash); spin_unlock(&sbi->inode_hash_lock); + + if (S_ISDIR(inode->i_mode) && sbi->options.nfs) { + spin_lock(&sbi->dir_hash_lock); + hlist_del_init(&MSDOS_I(inode)->i_dir_hash); + spin_unlock(&sbi->dir_hash_lock); + } } EXPORT_SYMBOL_GPL(fat_detach); @@ -504,6 +537,7 @@ static void init_once(void *foo) ei->cache_valid_id = FAT_CACHE_VALID + 1; INIT_LIST_HEAD(&ei->cache_lru); INIT_HLIST_NODE(&ei->i_fat_hash); + INIT_HLIST_NODE(&ei->i_dir_hash); inode_init_once(&ei->vfs_inode); } @@ -562,6 +596,20 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, + struct inode *inode) +{ + loff_t i_pos; +#if BITS_PER_LONG == 32 + spin_lock(&sbi->inode_hash_lock); +#endif + i_pos = MSDOS_I(inode)->i_pos; +#if BITS_PER_LONG == 32 + spin_unlock(&sbi->inode_hash_lock); +#endif + return i_pos; +} + static int __fat_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; @@ -655,8 +703,8 @@ static const struct super_operations fat_sops = { }; static const struct export_operations fat_export_ops = { - .encode_fh = fat_encode_fh, .fh_to_dentry = fat_fh_to_dentry, + .fh_to_parent = fat_fh_to_parent, .get_parent = fat_get_parent, }; @@ -706,6 +754,8 @@ static int fat_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",usefree"); if (opts->quiet) seq_puts(m, ",quiet"); + if (opts->nfs) + seq_puts(m, ",nfs"); if (opts->showexec) seq_puts(m, ",showexec"); if (opts->sys_immutable) @@ -750,7 +800,7 @@ enum { Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, - Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err, + Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_err, }; static const match_table_t fat_tokens = { @@ -779,6 +829,7 @@ static const match_table_t fat_tokens = { {Opt_err_panic, "errors=panic"}, {Opt_err_ro, "errors=remount-ro"}, {Opt_discard, "discard"}, + {Opt_nfs, "nfs"}, {Opt_obsolete, "conv=binary"}, {Opt_obsolete, "conv=text"}, {Opt_obsolete, "conv=auto"}, @@ -859,6 +910,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, opts->numtail = 1; opts->usefree = opts->nocase = 0; opts->tz_utc = 0; + opts->nfs = 0; opts->errors = FAT_ERRORS_RO; *debug = 0; @@ -1023,6 +1075,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, case Opt_discard: opts->discard = 1; break; + case Opt_nfs: + opts->nfs = 1; + break; /* obsolete mount options */ case Opt_obsolete: @@ -1313,6 +1368,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, /* set up enough so that it can read an inode */ fat_hash_init(sb); + dir_hash_init(sb); fat_ent_access_init(sb); /* @@ -1367,6 +1423,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, } error = -ENOMEM; insert_inode_hash(root_inode); + fat_attach(root_inode, 0); sb->s_root = d_make_root(root_inode); if (!sb->s_root) { fat_msg(sb, KERN_ERR, "get root inode failed"); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index b0e12bf9f4a1..c27c63020292 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -440,7 +440,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, struct inode *old_inode, *new_inode; struct fat_slot_info old_sinfo, sinfo; struct timespec ts; - loff_t dotdot_i_pos, new_i_pos; + loff_t new_i_pos; int err, old_attrs, is_dir, update_dotdot, corrupt = 0; old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; @@ -456,8 +456,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, is_dir = S_ISDIR(old_inode->i_mode); update_dotdot = (is_dir && old_dir != new_dir); if (update_dotdot) { - if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de, - &dotdot_i_pos) < 0) { + if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de)) { err = -EIO; goto out; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 6a6d8c0715a1..e535dd75b986 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -914,7 +914,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *old_inode, *new_inode; struct fat_slot_info old_sinfo, sinfo; struct timespec ts; - loff_t dotdot_i_pos, new_i_pos; + loff_t new_i_pos; int err, is_dir, update_dotdot, corrupt = 0; struct super_block *sb = old_dir->i_sb; @@ -929,8 +929,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, is_dir = S_ISDIR(old_inode->i_mode); update_dotdot = (is_dir && old_dir != new_dir); if (update_dotdot) { - if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de, - &dotdot_i_pos) < 0) { + if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de)) { err = -EIO; goto out; } diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c index 21609a1e9355..ef4b5faba87b 100644 --- a/fs/fat/nfs.c +++ b/fs/fat/nfs.c @@ -14,47 +14,46 @@ #include #include "fat.h" -/* - * a FAT file handle with fhtype 3 is - * 0/ i_ino - for fast, reliable lookup if still in the cache - * 1/ i_generation - to see if i_ino is still valid - * bit 0 == 0 iff directory - * 2/ i_pos(8-39) - if ino has changed, but still in cache - * 3/ i_pos(4-7)|i_logstart - to semi-verify inode found at i_pos - * 4/ i_pos(0-3)|parent->i_logstart - maybe used to hunt for the file on disc - * - * Hack for NFSv2: Maximum FAT entry number is 28bits and maximum - * i_pos is 40bits (blocknr(32) + dir offset(8)), so two 4bits - * of i_logstart is used to store the directory entry offset. +/** + * Look up a directory inode given its starting cluster. */ - -int -fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent) +static struct inode *fat_dget(struct super_block *sb, int i_logstart) { - int len = *lenp; - struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); - loff_t i_pos; + struct msdos_sb_info *sbi = MSDOS_SB(sb); + struct hlist_head *head; + struct hlist_node *_p; + struct msdos_inode_info *i; + struct inode *inode = NULL; - if (len < 5) { - *lenp = 5; - return 255; /* no room */ + head = sbi->dir_hashtable + fat_dir_hash(i_logstart); + spin_lock(&sbi->dir_hash_lock); + hlist_for_each_entry(i, _p, head, i_dir_hash) { + BUG_ON(i->vfs_inode.i_sb != sb); + if (i->i_logstart != i_logstart) + continue; + inode = igrab(&i->vfs_inode); + if (inode) + break; } - - i_pos = fat_i_pos_read(sbi, inode); - *lenp = 5; - fh[0] = inode->i_ino; - fh[1] = inode->i_generation; - fh[2] = i_pos >> 8; - fh[3] = ((i_pos & 0xf0) << 24) | MSDOS_I(inode)->i_logstart; - fh[4] = (i_pos & 0x0f) << 28; - if (parent) - fh[4] |= MSDOS_I(parent)->i_logstart; - return 3; + spin_unlock(&sbi->dir_hash_lock); + return inode; } -static int fat_is_valid_fh(int fh_len, int fh_type) +static struct inode *fat_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) { - return ((fh_len >= 5) && (fh_type == 3)); + struct inode *inode; + + if ((ino < MSDOS_ROOT_INO) || (ino == MSDOS_FSINFO_INO)) + return NULL; + + inode = ilookup(sb, ino); + if (inode && generation && (inode->i_generation != generation)) { + iput(inode); + inode = NULL; + } + + return inode; } /** @@ -64,57 +63,19 @@ static int fat_is_valid_fh(int fh_len, int fh_type) struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { - struct inode *inode = NULL; - u32 *fh = fid->raw; - loff_t i_pos; - unsigned long i_ino; - __u32 i_generation; - int i_logstart; - - if (!fat_is_valid_fh(fh_len, fh_type)) - return NULL; - - i_ino = fh[0]; - i_generation = fh[1]; - i_logstart = fh[3] & 0x0fffffff; - - /* Try i_ino lookup first - fastest and most reliable */ - inode = ilookup(sb, i_ino); - if (inode && (inode->i_generation != i_generation)) { - iput(inode); - inode = NULL; - } - if (!inode) { - i_pos = (loff_t)fh[2] << 8; - i_pos |= ((fh[3] >> 24) & 0xf0) | (fh[4] >> 28); - - /* try 2 - see if i_pos is in F-d-c - * require i_logstart to be the same - * Will fail if you truncate and then re-write - */ - - inode = fat_iget(sb, i_pos); - if (inode && MSDOS_I(inode)->i_logstart != i_logstart) { - iput(inode); - inode = NULL; - } - } + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + fat_nfs_get_inode); +} - /* - * For now, do nothing if the inode is not found. - * - * What we could do is: - * - * - follow the file starting at fh[4], and record the ".." entry, - * and the name of the fh[2] entry. - * - then follow the ".." file finding the next step up. - * - * This way we build a path to the root of the tree. If this works, we - * lookup the path and so get this inode into the cache. Finally try - * the fat_iget lookup again. If that fails, then we are totally out - * of luck. But all that is for another day - */ - return d_obtain_alias(inode); +/* + * Find the parent for a file specified by NFS handle. + * This requires that the handle contain the i_ino of the parent. + */ +struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + fat_nfs_get_inode); } /* @@ -128,24 +89,13 @@ struct dentry *fat_get_parent(struct dentry *child_dir) struct super_block *sb = child_dir->d_sb; struct buffer_head *bh = NULL; struct msdos_dir_entry *de; - loff_t i_pos; - struct dentry *parent; - struct inode *inode; - int err; + struct inode *parent_inode = NULL; - lock_super(sb); - - err = fat_get_dotdot_entry(child_dir->d_inode, &bh, &de, &i_pos); - if (err) { - parent = ERR_PTR(err); - goto out; + if (!fat_get_dotdot_entry(child_dir->d_inode, &bh, &de)) { + int parent_logstart = fat_get_start(MSDOS_SB(sb), de); + parent_inode = fat_dget(sb, parent_logstart); } - inode = fat_build_inode(sb, de, i_pos); - - parent = d_obtain_alias(inode); -out: brelse(bh); - unlock_super(sb); - return parent; + return d_obtain_alias(parent_inode); } -- cgit v1.2.3 From 85cb9bf535a3f0aac1725541bf5d74363d3e5a65 Mon Sep 17 00:00:00 2001 From: Cruz Julian Bishop Date: Thu, 4 Oct 2012 17:14:47 -0700 Subject: fs/fat: fix a checkpatch issue in namei_msdos.c Add a space before an equals sign/operator in line 410. Signed-off-by: Cruz Julian Bishop Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/namei_msdos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index c27c63020292..c1055e778fff 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -407,7 +407,7 @@ out: static int msdos_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; - struct super_block *sb= inode->i_sb; + struct super_block *sb = inode->i_sb; struct fat_slot_info sinfo; int err; -- cgit v1.2.3 From d5a4a3867f64c6f9664a2ec4b23316fd9cb9fe7a Mon Sep 17 00:00:00 2001 From: Cruz Julian Bishop Date: Thu, 4 Oct 2012 17:14:49 -0700 Subject: fs/fat: fix some checkpatch issues in fat.h Mainly fix spacing issues such as "foo * bar" and "foo= bar" Signed-off-by: Cruz Julian Bishop Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/fat/fat.h b/fs/fat/fat.h index ec54c3a7f2f7..95cb776e497a 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -61,7 +61,7 @@ struct msdos_sb_info { unsigned short sec_per_clus; /* sectors/cluster */ unsigned short cluster_bits; /* log2(cluster_size) */ unsigned int cluster_size; /* cluster size */ - unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ + unsigned char fats, fat_bits; /* number of FATs, FAT bits (12 or 16) */ unsigned short fat_start; unsigned long fat_length; /* FAT start & length (sec.) */ unsigned long dir_start; @@ -328,7 +328,7 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); extern const struct file_operations fat_file_operations; extern const struct inode_operations fat_file_inode_operations; -extern int fat_setattr(struct dentry * dentry, struct iattr * attr); +extern int fat_setattr(struct dentry *dentry, struct iattr *attr); extern void fat_truncate_blocks(struct inode *inode, loff_t offset); extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); @@ -346,7 +346,7 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, void (*setup)(struct super_block *)); extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, - struct inode *i2); + struct inode *i2); static inline unsigned long fat_dir_hash(int logstart) { return hash_32(logstart, FAT_HASH_BITS); -- cgit v1.2.3 From 4a3aeb13b774a941823b58715eb4144acedae92a Mon Sep 17 00:00:00 2001 From: Cruz Julian Bishop Date: Thu, 4 Oct 2012 17:14:52 -0700 Subject: fs/fat: chang indentation of some comments in fat.h The comments were not lined up properly, so I just re-indented them. This also fixes a stupid checkpatch issue unknowingly Signed-off-by: Cruz Julian Bishop Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 72 ++++++++++++++++++++++++++++++------------------------------ 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 95cb776e497a..ca7e8f8bad7c 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -28,27 +28,27 @@ struct fat_mount_options { kgid_t fs_gid; unsigned short fs_fmask; unsigned short fs_dmask; - unsigned short codepage; /* Codepage for shortname conversions */ - char *iocharset; /* Charset used for filename input/display */ - unsigned short shortname; /* flags for shortname display/create rule */ - unsigned char name_check; /* r = relaxed, n = normal, s = strict */ - unsigned char errors; /* On error: continue, panic, remount-ro */ + unsigned short codepage; /* Codepage for shortname conversions */ + char *iocharset; /* Charset used for filename input/display */ + unsigned short shortname; /* flags for shortname display/create rule */ + unsigned char name_check; /* r = relaxed, n = normal, s = strict */ + unsigned char errors; /* On error: continue, panic, remount-ro */ unsigned short allow_utime;/* permission for setting the [am]time */ - unsigned quiet:1, /* set = fake successful chmods and chowns */ - showexec:1, /* set = only set x bit for com/exe/bat */ - sys_immutable:1, /* set = system files are immutable */ - dotsOK:1, /* set = hidden and system files are named '.filename' */ - isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ - utf8:1, /* Use of UTF-8 character set (Default) */ - unicode_xlate:1, /* create escape sequences for unhandled Unicode */ - numtail:1, /* Does first alias have a numeric '~1' type tail? */ - flush:1, /* write things quickly */ - nocase:1, /* Does this need case conversion? 0=need case conversion*/ - usefree:1, /* Use free_clusters for FAT32 */ - tz_utc:1, /* Filesystem timestamps are in UTC */ - rodir:1, /* allow ATTR_RO for directory */ - discard:1, /* Issue discard requests on deletions */ - nfs:1; /* Do extra work needed for NFS export */ + unsigned quiet:1, /* set = fake successful chmods and chowns */ + showexec:1, /* set = only set x bit for com/exe/bat */ + sys_immutable:1, /* set = system files are immutable */ + dotsOK:1, /* set = hidden and system files are named '.filename' */ + isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ + utf8:1, /* Use of UTF-8 character set (Default) */ + unicode_xlate:1, /* create escape sequences for unhandled Unicode */ + numtail:1, /* Does first alias have a numeric '~1' type tail? */ + flush:1, /* write things quickly */ + nocase:1, /* Does this need case conversion? 0=need case conversion*/ + usefree:1, /* Use free_clusters for FAT32 */ + tz_utc:1, /* Filesystem timestamps are in UTC */ + rodir:1, /* allow ATTR_RO for directory */ + discard:1, /* Issue discard requests on deletions */ + nfs:1; /* Do extra work needed for NFS export */ }; #define FAT_HASH_BITS 8 @@ -58,28 +58,28 @@ struct fat_mount_options { * MS-DOS file system in-core superblock data */ struct msdos_sb_info { - unsigned short sec_per_clus; /* sectors/cluster */ - unsigned short cluster_bits; /* log2(cluster_size) */ - unsigned int cluster_size; /* cluster size */ + unsigned short sec_per_clus; /* sectors/cluster */ + unsigned short cluster_bits; /* log2(cluster_size) */ + unsigned int cluster_size; /* cluster size */ unsigned char fats, fat_bits; /* number of FATs, FAT bits (12 or 16) */ unsigned short fat_start; - unsigned long fat_length; /* FAT start & length (sec.) */ + unsigned long fat_length; /* FAT start & length (sec.) */ unsigned long dir_start; - unsigned short dir_entries; /* root dir start & entries */ - unsigned long data_start; /* first data sector */ - unsigned long max_cluster; /* maximum cluster number */ - unsigned long root_cluster; /* first cluster of the root directory */ - unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ + unsigned short dir_entries; /* root dir start & entries */ + unsigned long data_start; /* first data sector */ + unsigned long max_cluster; /* maximum cluster number */ + unsigned long root_cluster; /* first cluster of the root directory */ + unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ struct mutex fat_lock; - unsigned int prev_free; /* previously allocated cluster number */ - unsigned int free_clusters; /* -1 if undefined */ + unsigned int prev_free; /* previously allocated cluster number */ + unsigned int free_clusters; /* -1 if undefined */ unsigned int free_clus_valid; /* is free_clusters valid? */ struct fat_mount_options options; - struct nls_table *nls_disk; /* Codepage used on disk */ - struct nls_table *nls_io; /* Charset used for input and display */ - const void *dir_ops; /* Opaque; default directory operations */ - int dir_per_block; /* dir entries per block */ - int dir_per_block_bits; /* log2(dir_per_block) */ + struct nls_table *nls_disk; /* Codepage used on disk */ + struct nls_table *nls_io; /* Charset used for input and display */ + const void *dir_ops; /* Opaque; default directory operations */ + int dir_per_block; /* dir entries per block */ + int dir_per_block_bits; /* log2(dir_per_block) */ int fatent_shift; struct fatent_operations *fatent_ops; -- cgit v1.2.3 From c90518290ec88a45305a709aae72003a9bc8ad7c Mon Sep 17 00:00:00 2001 From: Cruz Julian Bishop Date: Thu, 4 Oct 2012 17:14:53 -0700 Subject: fs/fat: fix two checkpatch issues in cache.c This does the following: 1: Splits the arguments of a function call to stop it from exceeding 80 characters 2: Re-indents the arguments of another function call to prevent the splitting of a quoted string. Signed-off-by: Cruz Julian Bishop Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/cache.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 1cc7038e273d..91ad9e1c9441 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -190,7 +190,8 @@ static void __fat_cache_inval_inode(struct inode *inode) struct fat_cache *cache; while (!list_empty(&i->cache_lru)) { - cache = list_entry(i->cache_lru.next, struct fat_cache, cache_list); + cache = list_entry(i->cache_lru.next, + struct fat_cache, cache_list); list_del_init(&cache->cache_list); i->nr_caches--; fat_cache_free(cache); @@ -261,9 +262,10 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) if (nr < 0) goto out; else if (nr == FAT_ENT_FREE) { - fat_fs_error_ratelimit(sb, "%s: invalid cluster chain" - " (i_pos %lld)", __func__, - MSDOS_I(inode)->i_pos); + fat_fs_error_ratelimit(sb, + "%s: invalid cluster chain (i_pos %lld)", + __func__, + MSDOS_I(inode)->i_pos); nr = -EIO; goto out; } else if (nr == FAT_ENT_EOF) { -- cgit v1.2.3 From 3f36f6100a29ef5a48f451d7123d0e57850b1f0f Mon Sep 17 00:00:00 2001 From: Cruz Julian Bishop Date: Thu, 4 Oct 2012 17:14:55 -0700 Subject: fs/fat: fix some small checkpatch issues in dir.c Simply remove the spacing between function definitions and EXPORT_SYMBOL_GPL calls, which were previously generating warnings. Signed-off-by: Cruz Julian Bishop Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs') diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 55e088cc0613..69fbe055d86e 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -537,7 +537,6 @@ end_of_dir: return err; } - EXPORT_SYMBOL_GPL(fat_search_long); struct fat_ioctl_filldir_callback { @@ -892,7 +891,6 @@ int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, } return -ENOENT; } - EXPORT_SYMBOL_GPL(fat_get_dotdot_entry); /* See if directory is empty */ @@ -915,7 +913,6 @@ int fat_dir_empty(struct inode *dir) brelse(bh); return result; } - EXPORT_SYMBOL_GPL(fat_dir_empty); /* @@ -961,7 +958,6 @@ int fat_scan(struct inode *dir, const unsigned char *name, } return -ENOENT; } - EXPORT_SYMBOL_GPL(fat_scan); static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) @@ -1049,7 +1045,6 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) return 0; } - EXPORT_SYMBOL_GPL(fat_remove_entries); static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used, @@ -1161,7 +1156,6 @@ error_free: error: return err; } - EXPORT_SYMBOL_GPL(fat_alloc_new_dir); static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots, @@ -1377,5 +1371,4 @@ error_remove: __fat_remove_entries(dir, pos, free_slots); return err; } - EXPORT_SYMBOL_GPL(fat_add_entries); -- cgit v1.2.3 From f08b4988f229fb41a4995829dc0db34c5e35dfbb Mon Sep 17 00:00:00 2001 From: Cruz Julian Bishop Date: Thu, 4 Oct 2012 17:14:57 -0700 Subject: fs/fat: fix all other checkpatch issues in dir.c 1: Import linux/uaccess.h instead of asm.uaccess.h 2: Stop any lines going over 80 characters 3: Stopped setting any variables in if statements 4: Stopped splitting quoted strings 5: Removed unneeded parentheses Signed-off-by: Cruz Julian Bishop Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 69fbe055d86e..bca6d0a1255e 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include "fat.h" @@ -123,7 +123,8 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos, { /* Fast stuff first */ if (*bh && *de && - (*de - (struct msdos_dir_entry *)(*bh)->b_data) < MSDOS_SB(dir->i_sb)->dir_per_block - 1) { + (*de - (struct msdos_dir_entry *)(*bh)->b_data) < + MSDOS_SB(dir->i_sb)->dir_per_block - 1) { *pos += sizeof(struct msdos_dir_entry); (*de)++; return 0; @@ -155,7 +156,8 @@ static int uni16_to_x8(struct super_block *sb, unsigned char *ascii, while (*ip && ((len - NLS_MAX_CHARSET_SIZE) > 0)) { ec = *ip++; - if ((charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE)) > 0) { + charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE); + if (charlen > 0) { op += charlen; len -= charlen; } else { @@ -172,12 +174,12 @@ static int uni16_to_x8(struct super_block *sb, unsigned char *ascii, } if (unlikely(*ip)) { - fat_msg(sb, KERN_WARNING, "filename was truncated while " - "converting."); + fat_msg(sb, KERN_WARNING, + "filename was truncated while converting."); } *op = 0; - return (op - ascii); + return op - ascii; } static inline int fat_uni_to_x8(struct super_block *sb, const wchar_t *uni, @@ -205,7 +207,8 @@ fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni) } static inline int -fat_short2lower_uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni) +fat_short2lower_uni(struct nls_table *t, unsigned char *c, + int clen, wchar_t *uni) { int charlen; wchar_t wc; @@ -220,7 +223,8 @@ fat_short2lower_uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *un if (!nc) nc = *c; - if ( (charlen = t->char2uni(&nc, 1, uni)) < 0) { + charlen = t->char2uni(&nc, 1, uni); + if (charlen < 0) { *uni = 0x003f; /* a question mark */ charlen = 1; } @@ -573,7 +577,8 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, /* Fake . and .. for the root directory. */ if (inode->i_ino == MSDOS_ROOT_INO) { while (cpos < 2) { - if (filldir(dirent, "..", cpos+1, cpos, MSDOS_ROOT_INO, DT_DIR) < 0) + if (filldir(dirent, "..", cpos+1, cpos, + MSDOS_ROOT_INO, DT_DIR) < 0) goto out; cpos++; filp->f_pos++; -- cgit v1.2.3 From 441dff34aa39e1597bdf2fdd07fe83be56528b32 Mon Sep 17 00:00:00 2001 From: Cruz Julian Bishop Date: Thu, 4 Oct 2012 17:14:59 -0700 Subject: fs/fat: fix checkpatch issues in fatent.c 1: Stop any lines going over 80 characters 2: Remove a blank line before EXPORT_SYMBOL_GPL Signed-off-by: Cruz Julian Bishop Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fatent.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 31f08ab62c56..67fd23744720 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -203,15 +203,18 @@ static int fat12_ent_next(struct fat_entry *fatent) fatent->entry++; if (fatent->nr_bhs == 1) { - WARN_ON(ent12_p[0] > (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 2))); - WARN_ON(ent12_p[1] > (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))); + WARN_ON(ent12_p[0] > (u8 *)(bhs[0]->b_data + + (bhs[0]->b_size - 2))); + WARN_ON(ent12_p[1] > (u8 *)(bhs[0]->b_data + + (bhs[0]->b_size - 1))); if (nextp < (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))) { ent12_p[0] = nextp - 1; ent12_p[1] = nextp; return 1; } } else { - WARN_ON(ent12_p[0] != (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))); + WARN_ON(ent12_p[0] != (u8 *)(bhs[0]->b_data + + (bhs[0]->b_size - 1))); WARN_ON(ent12_p[1] != (u8 *)bhs[1]->b_data); ent12_p[0] = nextp - 1; ent12_p[1] = nextp; @@ -631,7 +634,6 @@ error: return err; } - EXPORT_SYMBOL_GPL(fat_free_clusters); /* 128kb is the whole sectors for FAT12 and FAT16 */ -- cgit v1.2.3 From 126ac0518c71ba0e54e599dc129b76027a7c2d23 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 4 Oct 2012 17:15:02 -0700 Subject: fat: no need to reset EOF in ent_put for FAT32 #define FAT_ENT_EOF(EOF_FAT32) there is no need to reset value of 'new' for FAT32 as the values is already correct Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fatent.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 67fd23744720..260705c58062 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -186,9 +186,6 @@ static void fat16_ent_put(struct fat_entry *fatent, int new) static void fat32_ent_put(struct fat_entry *fatent, int new) { - if (new == FAT_ENT_EOF) - new = EOF_FAT32; - WARN_ON(new & 0xf0000000); new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff; *fatent->u.ent32_p = cpu_to_le32(new); -- cgit v1.2.3 From 14864655c0f1fafe690c6a44086e83a4ac08b6f6 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 4 Oct 2012 17:15:04 -0700 Subject: fat: simplify writeback_inode() [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5741b11650f1..76f60c642c06 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1463,18 +1463,14 @@ static int writeback_inode(struct inode *inode) { int ret; - struct address_space *mapping = inode->i_mapping; - struct writeback_control wbc = { - .sync_mode = WB_SYNC_NONE, - .nr_to_write = 0, - }; - /* if we used WB_SYNC_ALL, sync_inode waits for the io for the - * inode to finish. So WB_SYNC_NONE is sent down to sync_inode + + /* if we used wait=1, sync_inode_metadata waits for the io for the + * inode to finish. So wait=0 is sent down to sync_inode_metadata * and filemap_fdatawrite is used for the data blocks */ - ret = sync_inode(inode, &wbc); + ret = sync_inode_metadata(inode, 0); if (!ret) - ret = filemap_fdatawrite(mapping); + ret = filemap_fdatawrite(inode->i_mapping); return ret; } -- cgit v1.2.3 From 046d662f481830e652ac34cd112249adde16452a Mon Sep 17 00:00:00 2001 From: Alex Kelly Date: Thu, 4 Oct 2012 17:15:23 -0700 Subject: coredump: make core dump functionality optional Adds an expert Kconfig option, CONFIG_COREDUMP, which allows disabling of core dump. This saves approximately 2.6k in the compiled kernel, and complements CONFIG_ELF_CORE, which now depends on it. CONFIG_COREDUMP also disables coredump-related sysctls, except for suid_dumpable and related functions, which are necessary for ptrace. [akpm@linux-foundation.org: fix binfmt_aout.c build] Signed-off-by: Alex Kelly Reviewed-by: Josh Triplett Acked-by: Serge Hallyn Acked-by: Kees Cook Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig.binfmt | 8 ++++++++ fs/Makefile | 3 ++- fs/binfmt_aout.c | 52 +++++++++++++++++++++++++------------------------ include/linux/binfmts.h | 4 ++++ init/Kconfig | 1 + kernel/sysctl.c | 12 +++++++++++- 6 files changed, 53 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 022574202749..0efd1524b977 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -164,3 +164,11 @@ config BINFMT_MISC You may say M here for module support and later load the module when you have use for it; the module is called binfmt_misc. If you don't know what to answer at this point, say Y. + +config COREDUMP + bool "Enable core dump support" if EXPERT + default y + help + This option enables support for performing core dumps. You almost + certainly want to say Y here. Not necessary on systems that never + need debugging or only ever run flawless code. diff --git a/fs/Makefile b/fs/Makefile index 8938f8250320..1d7af79288a0 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o drop_caches.o splice.o sync.o utimes.o \ - stack.o fs_struct.o statfs.o coredump.o + stack.o fs_struct.o statfs.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o @@ -48,6 +48,7 @@ obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_GENERIC_ACL) += generic_acl.o +obj-$(CONFIG_COREDUMP) += coredump.o obj-$(CONFIG_FHANDLE) += fhandle.o diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index d146e181d10d..4b5b5117f00a 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -32,31 +32,8 @@ static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); static int load_aout_library(struct file*); -static int aout_core_dump(struct coredump_params *cprm); - -static struct linux_binfmt aout_format = { - .module = THIS_MODULE, - .load_binary = load_aout_binary, - .load_shlib = load_aout_library, - .core_dump = aout_core_dump, - .min_coredump = PAGE_SIZE -}; - -#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) - -static int set_brk(unsigned long start, unsigned long end) -{ - start = PAGE_ALIGN(start); - end = PAGE_ALIGN(end); - if (end > start) { - unsigned long addr; - addr = vm_brk(start, end - start); - if (BAD_ADDR(addr)) - return addr; - } - return 0; -} +#ifdef CONFIG_COREDUMP /* * Routine writes a core dump image in the current directory. * Currently only a stub-function. @@ -66,7 +43,6 @@ static int set_brk(unsigned long start, unsigned long end) * field, which also makes sure the core-dumps won't be recursive if the * dumping of the process results in another error.. */ - static int aout_core_dump(struct coredump_params *cprm) { struct file *file = cprm->file; @@ -135,6 +111,32 @@ end_coredump: set_fs(fs); return has_dumped; } +#else +#define aout_core_dump NULL +#endif + +static struct linux_binfmt aout_format = { + .module = THIS_MODULE, + .load_binary = load_aout_binary, + .load_shlib = load_aout_library, + .core_dump = aout_core_dump, + .min_coredump = PAGE_SIZE +}; + +#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) + +static int set_brk(unsigned long start, unsigned long end) +{ + start = PAGE_ALIGN(start); + end = PAGE_ALIGN(end); + if (end > start) { + unsigned long addr; + addr = vm_brk(start, end - start); + if (BAD_ADDR(addr)) + return addr; + } + return 0; +} /* * create_aout_tables() parses the env- and arg-strings in new user diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 366422bc1633..00e2e8908953 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -132,7 +132,11 @@ extern int copy_strings_kernel(int argc, const char *const *argv, struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); +#ifdef CONFIG_COREDUMP extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); +#else +static inline void do_coredump(long signr, int exit_code, struct pt_regs *regs) {} +#endif extern void set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); diff --git a/init/Kconfig b/init/Kconfig index e82f289290fa..ed6334dd5e71 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1199,6 +1199,7 @@ config BUG Just say Y. config ELF_CORE + depends on COREDUMP default y bool "Enable ELF core dumps" if EXPERT help diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 84c76a34e41c..c2a2f8084bad 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -97,10 +97,12 @@ extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern int max_threads; -extern int core_uses_pid; extern int suid_dumpable; +#ifdef CONFIG_COREDUMP +extern int core_uses_pid; extern char core_pattern[]; extern unsigned int core_pipe_limit; +#endif extern int pid_max; extern int min_free_kbytes; extern int pid_max_min, pid_max_max; @@ -177,8 +179,10 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +#ifdef CONFIG_COREDUMP static int proc_dostring_coredump(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +#endif #ifdef CONFIG_MAGIC_SYSRQ /* Note: sysrq code uses it's own private copy */ @@ -404,6 +408,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_COREDUMP { .procname = "core_uses_pid", .data = &core_uses_pid, @@ -425,6 +430,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", @@ -2036,12 +2042,14 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, static void validate_coredump_safety(void) { +#ifdef CONFIG_COREDUMP if (suid_dumpable == SUID_DUMPABLE_SAFE && core_pattern[0] != '/' && core_pattern[0] != '|') { printk(KERN_WARNING "Unsafe core_pattern used with "\ "suid_dumpable=2. Pipe handler or fully qualified "\ "core dump path required.\n"); } +#endif } static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, @@ -2053,6 +2061,7 @@ static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, return error; } +#ifdef CONFIG_COREDUMP static int proc_dostring_coredump(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -2061,6 +2070,7 @@ static int proc_dostring_coredump(struct ctl_table *table, int write, validate_coredump_safety(); return error; } +#endif static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, void __user *buffer, -- cgit v1.2.3 From 179899fd5dc780fe3bcd44d0eb7823e3d855c855 Mon Sep 17 00:00:00 2001 From: Alex Kelly Date: Thu, 4 Oct 2012 17:15:24 -0700 Subject: coredump: update coredump-related headers Create a new header file, fs/coredump.h, which contains functions only used by the new coredump.c. It also moves do_coredump to the include/linux/coredump.h header file, for consistency. Signed-off-by: Alex Kelly Reviewed-by: Josh Triplett Acked-by: Serge Hallyn Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coredump.c | 2 ++ fs/coredump.h | 6 ++++++ fs/exec.c | 1 + include/linux/binfmts.h | 5 ----- include/linux/coredump.h | 5 +++++ include/linux/sched.h | 1 - kernel/signal.c | 1 + 7 files changed, 15 insertions(+), 6 deletions(-) create mode 100644 fs/coredump.h (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index f045bbad6822..c01aa7b9ab5d 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,7 @@ #include #include "internal.h" +#include "coredump.h" #include diff --git a/fs/coredump.h b/fs/coredump.h new file mode 100644 index 000000000000..e39ff072110d --- /dev/null +++ b/fs/coredump.h @@ -0,0 +1,6 @@ +#ifndef _FS_COREDUMP_H +#define _FS_COREDUMP_H + +extern int __get_dumpable(unsigned long mm_flags); + +#endif diff --git a/fs/exec.c b/fs/exec.c index 48fb26ef8a1b..bd941a1cdbfc 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -63,6 +63,7 @@ #include #include "internal.h" +#include "coredump.h" #include diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 00e2e8908953..c7b16ee71415 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -132,11 +132,6 @@ extern int copy_strings_kernel(int argc, const char *const *argv, struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); -#ifdef CONFIG_COREDUMP -extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); -#else -static inline void do_coredump(long signr, int exit_code, struct pt_regs *regs) {} -#endif extern void set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); diff --git a/include/linux/coredump.h b/include/linux/coredump.h index ba4b85a6d9b8..42f9752a0a40 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -11,5 +11,10 @@ */ extern int dump_write(struct file *file, const void *addr, int nr); extern int dump_seek(struct file *file, loff_t off); +#ifdef CONFIG_COREDUMP +extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); +#else +static inline void do_coredump(long signr, int exit_code, struct pt_regs *regs) {} +#endif #endif /* _LINUX_COREDUMP_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 9d51e260bde0..9c5612f0374b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -405,7 +405,6 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); -extern int __get_dumpable(unsigned long mm_flags); /* get/set_dumpable() values */ #define SUID_DUMPABLE_DISABLED 0 diff --git a/kernel/signal.c b/kernel/signal.c index 2c681f11b7d2..2ad3f5904bd7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 12a2b4b2241e318b4f6df31228e4272d2c2968a1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 4 Oct 2012 17:15:25 -0700 Subject: coredump: add support for %d=__get_dumpable() in core name Some coredump handlers want to create a core file in a way compatible with standard behavior. Standard behavior with fs.suid_dumpable = 2 is to create core file with uid=gid=0. However, there was no way for coredump handler to know that the process being dumped was suid'ed. This patch adds the new %d specifier for format_corename() which simply reports __get_dumpable(mm->flags), this is compatible with /proc/sys/fs/suid_dumpable we already have. Addresses https://bugzilla.redhat.com/show_bug.cgi?id=787135 Developed during a discussion with Denys Vlasenko. Signed-off-by: Oleg Nesterov Cc: Denys Vlasenko Cc: Alex Kelly Cc: Andi Kleen Cc: Cong Wang Cc: Jiri Moskovcak Acked-by: Neil Horman Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/kernel.txt | 2 ++ fs/coredump.c | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 6d78841fd416..2907ba6c3607 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -181,6 +181,8 @@ core_pattern is used to specify a core dumpfile pattern name. %p pid %u uid %g gid + %d dump mode, matches PR_SET_DUMPABLE and + /proc/sys/fs/suid_dumpable %s signal number %t UNIX time of dump %h hostname diff --git a/fs/coredump.c b/fs/coredump.c index c01aa7b9ab5d..4fce06fc3b56 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -149,7 +149,7 @@ put_exe_file: * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. */ -static int format_corename(struct core_name *cn, long signr) +static int format_corename(struct core_name *cn, struct coredump_params *cprm) { const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; @@ -194,9 +194,13 @@ static int format_corename(struct core_name *cn, long signr) case 'g': err = cn_printf(cn, "%d", cred->gid); break; + case 'd': + err = cn_printf(cn, "%d", + __get_dumpable(cprm->mm_flags)); + break; /* signal that caused the coredump */ case 's': - err = cn_printf(cn, "%ld", signr); + err = cn_printf(cn, "%ld", cprm->signr); break; /* UNIX time of coredump */ case 't': { @@ -515,7 +519,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) */ clear_thread_flag(TIF_SIGPENDING); - ispipe = format_corename(&cn, signr); + ispipe = format_corename(&cn, &cprm); if (ispipe) { int dump_count; -- cgit v1.2.3 From 0f4cfb2e4e7a7e4e97a3e90e2ba1062f07fb2cb1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 4 Oct 2012 17:15:27 -0700 Subject: coredump: use SUID_DUMPABLE_ENABLED rather than hardcoded 1 Cosmetic. Change setup_new_exec() and task_dumpable() to use SUID_DUMPABLE_ENABLED for /bin/grep. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- fs/proc/internal.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index bd941a1cdbfc..9824473a7ec1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1097,7 +1097,7 @@ void setup_new_exec(struct linux_binprm * bprm) current->sas_ss_sp = current->sas_ss_size = 0; if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid())) - set_dumpable(current->mm, 1); + set_dumpable(current->mm, SUID_DUMPABLE_ENABLED); else set_dumpable(current->mm, suid_dumpable); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 67925a7bd8cb..cceaab07ad54 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -103,7 +103,7 @@ static inline int task_dumpable(struct task_struct *task) if (mm) dumpable = get_dumpable(mm); task_unlock(task); - if(dumpable == 1) + if (dumpable == SUID_DUMPABLE_ENABLED) return 1; return 0; } -- cgit v1.2.3 From 5ab1c309b344880d81494e9eab7fb27682bc6d9d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 4 Oct 2012 17:15:29 -0700 Subject: coredump: pass siginfo_t* to do_coredump() and below, not merely signr This is a preparatory patch for the introduction of NT_SIGINFO elf note. With this patch we pass "siginfo_t *siginfo" instead of "int signr" to do_coredump() and put it into coredump_params. It will be used by the next patch. Most changes are simple s/signr/siginfo->si_signo/. Signed-off-by: Denys Vlasenko Reviewed-by: Oleg Nesterov Cc: Amerigo Wang Cc: "Jonathan M. Foote" Cc: Roland McGrath Cc: Pedro Alves Cc: Fengguang Wu Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_aout.c | 2 +- fs/binfmt_elf.c | 14 +++++++------- fs/binfmt_elf_fdpic.c | 6 +++--- fs/binfmt_flat.c | 2 +- fs/coredump.c | 10 +++++----- include/linux/binfmts.h | 2 +- include/linux/coredump.h | 4 ++-- kernel/signal.c | 2 +- 8 files changed, 21 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 4b5b5117f00a..0e7a6f81ae36 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -65,7 +65,7 @@ static int aout_core_dump(struct coredump_params *cprm) current->flags |= PF_DUMPCORE; strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); dump.u_ar0 = offsetof(struct user, regs); - dump.signal = cprm->signr; + dump.signal = cprm->siginfo->si_signo; aout_dump_thread(cprm->regs, &dump); /* If the size of the dump file exceeds the rlimit, then see what would happen diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7ef5f9fe2729..4450e82a05aa 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1480,7 +1480,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, - long signr, struct pt_regs *regs) + siginfo_t *siginfo, struct pt_regs *regs) { struct task_struct *dump_task = current; const struct user_regset_view *view = task_user_regset_view(dump_task); @@ -1550,7 +1550,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, * Now fill in each thread's information. */ for (t = info->thread; t != NULL; t = t->next) - if (!fill_thread_core_info(t, view, signr, &info->size)) + if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size)) return 0; /* @@ -1713,14 +1713,14 @@ static int elf_note_info_init(struct elf_note_info *info) static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, - long signr, struct pt_regs *regs) + siginfo_t *siginfo, struct pt_regs *regs) { struct list_head *t; if (!elf_note_info_init(info)) return 0; - if (signr) { + if (siginfo->si_signo) { struct core_thread *ct; struct elf_thread_status *ets; @@ -1738,13 +1738,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, int sz; ets = list_entry(t, struct elf_thread_status, list); - sz = elf_dump_thread_status(signr, ets); + sz = elf_dump_thread_status(siginfo->si_signo, ets); info->thread_status_size += sz; } } /* now collect the dump for the current */ memset(info->prstatus, 0, sizeof(*info->prstatus)); - fill_prstatus(info->prstatus, current, signr); + fill_prstatus(info->prstatus, current, siginfo->si_signo); elf_core_copy_regs(&info->prstatus->pr_reg, regs); /* Set up header */ @@ -1951,7 +1951,7 @@ static int elf_core_dump(struct coredump_params *cprm) * Collect all the non-memory information about the process for the * notes. This also sets up the file header. */ - if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs)) + if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs)) goto cleanup; has_dumped = 1; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 3d77cf81ba3c..08d812b32282 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1642,7 +1642,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto cleanup; #endif - if (cprm->signr) { + if (cprm->siginfo->si_signo) { struct core_thread *ct; struct elf_thread_status *tmp; @@ -1661,13 +1661,13 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) int sz; tmp = list_entry(t, struct elf_thread_status, list); - sz = elf_dump_thread_status(cprm->signr, tmp); + sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp); thread_status_size += sz; } } /* now collect the dump for the current */ - fill_prstatus(prstatus, current, cprm->signr); + fill_prstatus(prstatus, current, cprm->siginfo->si_signo); elf_core_copy_regs(&prstatus->pr_reg, cprm->regs); segs = current->mm->map_count; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 178cb70acc26..e280352b28f9 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -107,7 +107,7 @@ static struct linux_binfmt flat_format = { static int flat_core_dump(struct coredump_params *cprm) { printk("Process %s:%d received signr %d and should have core dumped\n", - current->comm, current->pid, (int) cprm->signr); + current->comm, current->pid, (int) cprm->siginfo->si_signo); return(1); } diff --git a/fs/coredump.c b/fs/coredump.c index 4fce06fc3b56..fd37facac8dc 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -200,7 +200,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) break; /* signal that caused the coredump */ case 's': - err = cn_printf(cn, "%ld", cprm->signr); + err = cn_printf(cn, "%ld", cprm->siginfo->si_signo); break; /* UNIX time of coredump */ case 't': { @@ -457,7 +457,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) return 0; } -void do_coredump(long signr, int exit_code, struct pt_regs *regs) +void do_coredump(siginfo_t *siginfo, struct pt_regs *regs) { struct core_state core_state; struct core_name cn; @@ -472,7 +472,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) bool need_nonrelative = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { - .signr = signr, + .siginfo = siginfo, .regs = regs, .limit = rlimit(RLIMIT_CORE), /* @@ -483,7 +483,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) .mm_flags = mm->flags, }; - audit_core_dumps(signr); + audit_core_dumps(siginfo->si_signo); binfmt = mm->binfmt; if (!binfmt || !binfmt->core_dump) @@ -507,7 +507,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) need_nonrelative = true; } - retval = coredump_wait(exit_code, &core_state); + retval = coredump_wait(siginfo->si_signo, &core_state); if (retval < 0) goto fail_creds; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index c7b16ee71415..37935c2d2e8f 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -72,7 +72,7 @@ struct linux_binprm { /* Function parameter for binfmt->coredump */ struct coredump_params { - long signr; + siginfo_t *siginfo; struct pt_regs *regs; struct file *file; unsigned long limit; diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 42f9752a0a40..1775eb8acc03 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -12,9 +12,9 @@ extern int dump_write(struct file *file, const void *addr, int nr); extern int dump_seek(struct file *file, loff_t off); #ifdef CONFIG_COREDUMP -extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); +extern void do_coredump(siginfo_t *siginfo, struct pt_regs *regs); #else -static inline void do_coredump(long signr, int exit_code, struct pt_regs *regs) {} +static inline void do_coredump(siginfo_t *siginfo, struct pt_regs *regs) {} #endif #endif /* _LINUX_COREDUMP_H */ diff --git a/kernel/signal.c b/kernel/signal.c index 2ad3f5904bd7..0af8868525d6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2360,7 +2360,7 @@ relock: * first and our do_group_exit call below will use * that value and ignore the one we pass it. */ - do_coredump(info->si_signo, info->si_signo, regs); + do_coredump(info, regs); } /* -- cgit v1.2.3 From 49ae4d4b113be03dc4a2ec5f2a1f573ff0fcddb3 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 4 Oct 2012 17:15:35 -0700 Subject: coredump: add a new elf note with siginfo of the signal Existing PRSTATUS note contains only si_signo, si_code, si_errno fields from the siginfo of the signal which caused core to be dumped. There are tools which try to analyze crashes for possible security implications, and they want to use, among other data, si_addr field from the SIGSEGV. This patch adds a new elf note, NT_SIGINFO, which contains the complete siginfo_t of the signal which killed the process. Signed-off-by: Denys Vlasenko Reviewed-by: Oleg Nesterov Cc: Amerigo Wang Cc: "Jonathan M. Foote" Cc: Roland McGrath Cc: Pedro Alves Cc: Fengguang Wu Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 27 +++++++++++++++++++++++++-- fs/compat_binfmt_elf.c | 6 ++++++ include/linux/elf.h | 5 +++++ 3 files changed, 36 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 4450e82a05aa..865f9be6a2d3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -37,6 +37,10 @@ #include #include +#ifndef user_siginfo_t +#define user_siginfo_t siginfo_t +#endif + static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); static int load_elf_library(struct file *); static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, @@ -1372,6 +1376,16 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm) fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv); } +static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, + siginfo_t *siginfo) +{ + mm_segment_t old_fs = get_fs(); + set_fs(KERNEL_DS); + copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo); + set_fs(old_fs); + fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata); +} + #ifdef CORE_DUMP_USE_REGSET #include @@ -1385,7 +1399,9 @@ struct elf_thread_core_info { struct elf_note_info { struct elf_thread_core_info *thread; struct memelfnote psinfo; + struct memelfnote signote; struct memelfnote auxv; + user_siginfo_t csigdata; size_t size; int thread_notes; }; @@ -1559,6 +1575,9 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); info->size += notesize(&info->psinfo); + fill_siginfo_note(&info->signote, &info->csigdata, siginfo); + info->size += notesize(&info->signote); + fill_auxv_note(&info->auxv, current->mm); info->size += notesize(&info->auxv); @@ -1588,6 +1607,8 @@ static int write_note_info(struct elf_note_info *info, if (first && !writenote(&info->psinfo, file, foffset)) return 0; + if (first && !writenote(&info->signote, file, foffset)) + return 0; if (first && !writenote(&info->auxv, file, foffset)) return 0; @@ -1681,6 +1702,7 @@ struct elf_note_info { #ifdef ELF_CORE_COPY_XFPREGS elf_fpxregset_t *xfpu; #endif + user_siginfo_t csigdata; int thread_status_size; int numnote; }; @@ -1690,8 +1712,8 @@ static int elf_note_info_init(struct elf_note_info *info) memset(info, 0, sizeof(*info)); INIT_LIST_HEAD(&info->thread_list); - /* Allocate space for six ELF notes */ - info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL); + /* Allocate space for ELF notes */ + info->notes = kmalloc(7 * sizeof(struct memelfnote), GFP_KERNEL); if (!info->notes) return 0; info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); @@ -1763,6 +1785,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, info->numnote = 2; + fill_siginfo_note(&info->notes[info->numnote++], &info->csigdata, siginfo); fill_auxv_note(&info->notes[info->numnote++], current->mm); /* Try to dump the FPU. */ diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index 112e45a17e99..0fbcf6347437 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -37,6 +37,12 @@ #define elf_note elf32_note #define elf_addr_t Elf32_Addr +/* + * Some data types as stored in coredump. + */ +#define user_siginfo_t compat_siginfo_t +#define copy_siginfo_to_user copy_siginfo_to_user32 + /* * The machine-dependent core note format types are defined in elfcore-compat.h, * which requires asm/elf.h to define compat_elf_gregset_t et al. diff --git a/include/linux/elf.h b/include/linux/elf.h index 0a05051a8924..dc62da7447ca 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -372,6 +372,11 @@ typedef struct elf64_shdr { #define NT_PRPSINFO 3 #define NT_TASKSTRUCT 4 #define NT_AUXV 6 +/* + * Note to userspace developers: size of NT_SIGINFO note may increase + * in the future to accomodate more fields, don't assume it is fixed! + */ +#define NT_SIGINFO 0x53494749 #define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ #define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */ #define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */ -- cgit v1.2.3 From 2aa362c49c314a98fb9aebbd7760a461667bac05 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 4 Oct 2012 17:15:36 -0700 Subject: coredump: extend core dump note section to contain file names of mapped files This note has the following format: long count -- how many files are mapped long page_size -- units for file_ofs array of [COUNT] elements of long start long end long file_ofs followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... Signed-off-by: Denys Vlasenko Cc: Oleg Nesterov Cc: Amerigo Wang Cc: "Jonathan M. Foote" Cc: Roland McGrath Cc: Pedro Alves Cc: Fengguang Wu Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++-- fs/compat_binfmt_elf.c | 1 + include/linux/elf.h | 1 + 3 files changed, 108 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 865f9be6a2d3..28a64e769527 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,9 @@ #include #include +#ifndef user_long_t +#define user_long_t long +#endif #ifndef user_siginfo_t #define user_siginfo_t siginfo_t #endif @@ -1386,6 +1390,93 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata); } +#define MAX_FILE_NOTE_SIZE (4*1024*1024) +/* + * Format of NT_FILE note: + * + * long count -- how many files are mapped + * long page_size -- units for file_ofs + * array of [COUNT] elements of + * long start + * long end + * long file_ofs + * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... + */ +static void fill_files_note(struct memelfnote *note) +{ + struct vm_area_struct *vma; + unsigned count, size, names_ofs, remaining, n; + user_long_t *data; + user_long_t *start_end_ofs; + char *name_base, *name_curpos; + + /* *Estimated* file count and total data size needed */ + count = current->mm->map_count; + size = count * 64; + + names_ofs = (2 + 3 * count) * sizeof(data[0]); + alloc: + if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ + goto err; + size = round_up(size, PAGE_SIZE); + data = vmalloc(size); + if (!data) + goto err; + + start_end_ofs = data + 2; + name_base = name_curpos = ((char *)data) + names_ofs; + remaining = size - names_ofs; + count = 0; + for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) { + struct file *file; + const char *filename; + + file = vma->vm_file; + if (!file) + continue; + filename = d_path(&file->f_path, name_curpos, remaining); + if (IS_ERR(filename)) { + if (PTR_ERR(filename) == -ENAMETOOLONG) { + vfree(data); + size = size * 5 / 4; + goto alloc; + } + continue; + } + + /* d_path() fills at the end, move name down */ + /* n = strlen(filename) + 1: */ + n = (name_curpos + remaining) - filename; + remaining = filename - name_curpos; + memmove(name_curpos, filename, n); + name_curpos += n; + + *start_end_ofs++ = vma->vm_start; + *start_end_ofs++ = vma->vm_end; + *start_end_ofs++ = vma->vm_pgoff; + count++; + } + + /* Now we know exact count of files, can store it */ + data[0] = count; + data[1] = PAGE_SIZE; + /* + * Count usually is less than current->mm->map_count, + * we need to move filenames down. + */ + n = current->mm->map_count - count; + if (n != 0) { + unsigned shift_bytes = n * 3 * sizeof(data[0]); + memmove(name_base - shift_bytes, name_base, + name_curpos - name_base); + name_curpos -= shift_bytes; + } + + size = name_curpos - (char *)data; + fill_note(note, "CORE", NT_FILE, size, data); + err: ; +} + #ifdef CORE_DUMP_USE_REGSET #include @@ -1401,6 +1492,7 @@ struct elf_note_info { struct memelfnote psinfo; struct memelfnote signote; struct memelfnote auxv; + struct memelfnote files; user_siginfo_t csigdata; size_t size; int thread_notes; @@ -1581,6 +1673,9 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_auxv_note(&info->auxv, current->mm); info->size += notesize(&info->auxv); + fill_files_note(&info->files); + info->size += notesize(&info->files); + return 1; } @@ -1611,6 +1706,8 @@ static int write_note_info(struct elf_note_info *info, return 0; if (first && !writenote(&info->auxv, file, foffset)) return 0; + if (first && !writenote(&info->files, file, foffset)) + return 0; for (i = 1; i < info->thread_notes; ++i) if (t->notes[i].data && @@ -1637,6 +1734,7 @@ static void free_note_info(struct elf_note_info *info) kfree(t); } kfree(info->psinfo.data); + vfree(info->files.data); } #else @@ -1713,7 +1811,7 @@ static int elf_note_info_init(struct elf_note_info *info) INIT_LIST_HEAD(&info->thread_list); /* Allocate space for ELF notes */ - info->notes = kmalloc(7 * sizeof(struct memelfnote), GFP_KERNEL); + info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL); if (!info->notes) return 0; info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); @@ -1783,10 +1881,11 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_note(info->notes + 1, "CORE", NT_PRPSINFO, sizeof(*info->psinfo), info->psinfo); - info->numnote = 2; + fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo); + fill_auxv_note(info->notes + 3, current->mm); + fill_files_note(info->notes + 4); - fill_siginfo_note(&info->notes[info->numnote++], &info->csigdata, siginfo); - fill_auxv_note(&info->notes[info->numnote++], current->mm); + info->numnote = 5; /* Try to dump the FPU. */ info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, @@ -1848,6 +1947,9 @@ static void free_note_info(struct elf_note_info *info) kfree(list_entry(tmp, struct elf_thread_status, list)); } + /* Free data allocated by fill_files_note(): */ + vfree(info->notes[4].data); + kfree(info->prstatus); kfree(info->psinfo); kfree(info->notes); diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index 0fbcf6347437..a81147e2e4ef 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -40,6 +40,7 @@ /* * Some data types as stored in coredump. */ +#define user_long_t compat_long_t #define user_siginfo_t compat_siginfo_t #define copy_siginfo_to_user copy_siginfo_to_user32 diff --git a/include/linux/elf.h b/include/linux/elf.h index dc62da7447ca..59ef40650e1e 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -377,6 +377,7 @@ typedef struct elf64_shdr { * in the future to accomodate more fields, don't assume it is fixed! */ #define NT_SIGINFO 0x53494749 +#define NT_FILE 0x46494c45 #define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ #define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */ #define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */ -- cgit v1.2.3 From 620727506dc6da0562fa4f6950dedb8a51bd8237 Mon Sep 17 00:00:00 2001 From: yan Date: Thu, 4 Oct 2012 17:15:38 -0700 Subject: proc: return -ENOMEM when inode allocation failed If proc_get_inode() returns NULL then presumably it encountered memory exhaustion. proc_lookup_de() should return -ENOMEM in this case, not -EINVAL. Signed-off-by: yan Cc: Ryan Mallon Cc: Cong Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index b3647fe6a608..9e8f63164309 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -427,7 +427,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { pde_get(de); spin_unlock(&proc_subdir_lock); - error = -EINVAL; + error = -ENOMEM; inode = proc_get_inode(dir->i_sb, de); goto out_unlock; } -- cgit v1.2.3 From 0e06936057674ef3f2bd0b398a32ddc512642992 Mon Sep 17 00:00:00 2001 From: yan Date: Thu, 4 Oct 2012 17:15:41 -0700 Subject: proc: no need to initialize proc_inode->fd in proc_get_inode() proc_get_inode() obtains the inode via a call to iget_locked(). iget_locked() calls alloc_inode() which will call proc_alloc_inode() which clears proc_inode.fd, so there is no need to clear this field in proc_get_inode(). If iget_locked() instead found the inode via find_inode_fast(), that inode will not have I_NEW set so this change has no effect. Signed-off-by: yan Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/inode.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 7ac817b64a71..3b22bbdee9ec 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -450,7 +450,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) return NULL; if (inode->i_state & I_NEW) { inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - PROC_I(inode)->fd = 0; PROC_I(inode)->pde = de; if (de->mode) { -- cgit v1.2.3 From 17baa2a2c40f2f0330d25819b589a515d36b2d40 Mon Sep 17 00:00:00 2001 From: yan Date: Thu, 4 Oct 2012 17:15:43 -0700 Subject: proc: use kzalloc instead of kmalloc and memset Part of the memory will be written twice after this change, but that should be negligible. [akpm@linux-foundation.org: fix __proc_create() coding-style issues, remove unneeded zero-initialisations] Signed-off-by: yan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 9e8f63164309..0d80cef4cfb9 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -605,7 +605,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, unsigned int len; /* make sure name is valid */ - if (!name || !strlen(name)) goto out; + if (!name || !strlen(name)) + goto out; if (xlate_proc_name(name, parent, &fn) != 0) goto out; @@ -616,20 +617,18 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, len = strlen(fn); - ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); - if (!ent) goto out; + ent = kzalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); + if (!ent) + goto out; - memset(ent, 0, sizeof(struct proc_dir_entry)); memcpy(ent->name, fn, len + 1); ent->namelen = len; ent->mode = mode; ent->nlink = nlink; atomic_set(&ent->count, 1); - ent->pde_users = 0; spin_lock_init(&ent->pde_unload_lock); - ent->pde_unload_completion = NULL; INIT_LIST_HEAD(&ent->pde_openers); - out: +out: return ent; } -- cgit v1.2.3 From ab4a1f247003ad146c1a1067b2ba043922d0dff2 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Thu, 4 Oct 2012 17:15:45 -0700 Subject: proc_sysctl.c: use BUG_ON instead of BUG The use of if (!head) BUG(); can be replaced with the single line BUG_ON(!head). Signed-off-by: Prasad Joshi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index eb7cc91b7258..dcd56f84db7e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -266,8 +266,7 @@ void sysctl_head_put(struct ctl_table_header *head) static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) { - if (!head) - BUG(); + BUG_ON(!head); spin_lock(&sysctl_lock); if (!use_table(head)) head = ERR_PTR(-ENOENT); -- cgit v1.2.3 From 9fb88442105f5fce96ea343927500d6307c8c550 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Thu, 4 Oct 2012 17:15:46 -0700 Subject: fs/proc/root.c: use NULL instead of 0 for pointer This cleanup also fixes the following sparse warning: fs/proc/root.c:64:45: warning: Using plain integer as NULL pointer Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/root.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/root.c b/fs/proc/root.c index 9a2d9fd7cadd..9889a92d2e01 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -61,7 +61,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) if (!*p) continue; - args[0].to = args[0].from = 0; + args[0].to = args[0].from = NULL; token = match_token(p, tokens, args); switch (token) { case Opt_gid: -- cgit v1.2.3 From c99b6841d74a5c7d3698cc2a3ec44241fe64b769 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 4 Oct 2012 17:16:53 -0700 Subject: omfs: convert to use beXX_add_cpu() Convert cpu_to_beXX(beXX_to_cpu(E1) + E2) to use beXX_add_cpu(). dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Acked-by: Bob Copeland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/omfs/file.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 2c6d95257a4d..77e3cb2962b4 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -146,8 +146,7 @@ static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe, be64_to_cpu(entry->e_blocks); if (omfs_allocate_block(inode->i_sb, new_block)) { - entry->e_blocks = - cpu_to_be64(be64_to_cpu(entry->e_blocks) + 1); + be64_add_cpu(&entry->e_blocks, 1); terminator->e_blocks = ~(cpu_to_be64( be64_to_cpu(~terminator->e_blocks) + 1)); goto out; @@ -177,7 +176,7 @@ static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe, be64_to_cpu(~terminator->e_blocks) + (u64) new_count)); /* write in new entry */ - oe->e_extent_count = cpu_to_be32(1 + be32_to_cpu(oe->e_extent_count)); + be32_add_cpu(&oe->e_extent_count, 1); out: *ret_block = new_block; -- cgit v1.2.3 From d5bbd43d5f450c3fca058f5b85f3dfb4e8cc88c9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 8 Oct 2012 19:13:01 +0200 Subject: exec: make de_thread() killable Change de_thread() to use KILLABLE rather than UNINTERRUPTIBLE while waiting for other threads. The only complication is that we should clear ->group_exit_task and ->notify_count before we return, and we should do this under tasklist_lock. -EAGAIN is used to match the initial signal_group_exit() check/return, it doesn't really matter. This fixes the (unlikely) race with coredump. de_thread() checks signal_group_exit() before it starts to kill the subthreads, but this can't help if another CLONE_VM (but non CLONE_THREAD) task starts the coredumping after de_thread() unlocks ->siglock. In this case the killed sub-thread can block in exit_mm() waiting for coredump_finish(), execing thread waits for that sub-thead, and the coredumping thread waits for execing thread. Deadlock. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/exec.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 9824473a7ec1..19f4fb80cd17 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -878,9 +878,11 @@ static int de_thread(struct task_struct *tsk) sig->notify_count--; while (sig->notify_count) { - __set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(TASK_KILLABLE); spin_unlock_irq(lock); schedule(); + if (unlikely(__fatal_signal_pending(tsk))) + goto killed; spin_lock_irq(lock); } spin_unlock_irq(lock); @@ -898,9 +900,11 @@ static int de_thread(struct task_struct *tsk) write_lock_irq(&tasklist_lock); if (likely(leader->exit_state)) break; - __set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(TASK_KILLABLE); write_unlock_irq(&tasklist_lock); schedule(); + if (unlikely(__fatal_signal_pending(tsk))) + goto killed; } /* @@ -994,6 +998,14 @@ no_thread_group: BUG_ON(!thread_group_leader(tsk)); return 0; + +killed: + /* protects against exit_notify() and __exit_signal() */ + read_lock(&tasklist_lock); + sig->group_exit_task = NULL; + sig->notify_count = 0; + read_unlock(&tasklist_lock); + return -EAGAIN; } char *get_task_comm(char *buf, struct task_struct *tsk) -- cgit v1.2.3 From 121977187ca0a7f20b848530deb04cc56167769b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 8 Oct 2012 23:21:58 +0100 Subject: Fix F_DUPFD_CLOEXEC breakage Fix a braino in F_DUPFD_CLOEXEC; f_dupfd() expects flags for alloc_fd(), get_unused_fd() etc and there clone-on-exec if O_CLOEXEC, not FD_CLOEXEC. Reported-by: Richard W.M. Jones Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/fcntl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index 8f704291d4ed..71a600a19f06 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -258,7 +258,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, err = f_dupfd(arg, filp, 0); break; case F_DUPFD_CLOEXEC: - err = f_dupfd(arg, filp, FD_CLOEXEC); + err = f_dupfd(arg, filp, O_CLOEXEC); break; case F_GETFD: err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; -- cgit v1.2.3 From 0b173bc4daa8f8ec03a85abf5e47b23502ff80af Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 8 Oct 2012 16:28:46 -0700 Subject: mm: kill vma flag VM_CAN_NONLINEAR Move actual pte filling for non-linear file mappings into the new special vma operation: ->remap_pages(). Filesystems must implement this method to get non-linear mapping support, if it uses filemap_fault() then generic_file_remap_pages() can be used. Now device drivers can implement this method and obtain nonlinear vma support. Signed-off-by: Konstantin Khlebnikov Cc: Alexander Viro Cc: Carsten Otte Cc: Chris Metcalf #arch/tile Cc: Cyrill Gorcunov Cc: Eric Paris Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Morris Cc: Jason Baron Cc: Kentaro Takeda Cc: Matt Helsley Cc: Nick Piggin Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Robert Richter Cc: Suresh Siddha Cc: Tetsuo Handa Cc: Venkatesh Pallipadi Acked-by: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/staging/android/ashmem.c | 1 - fs/9p/vfs_file.c | 1 + fs/btrfs/file.c | 2 +- fs/ceph/addr.c | 2 +- fs/cifs/file.c | 1 + fs/ext4/file.c | 2 +- fs/fuse/file.c | 1 + fs/gfs2/file.c | 2 +- fs/nfs/file.c | 1 + fs/nilfs2/file.c | 2 +- fs/ocfs2/mmap.c | 2 +- fs/ubifs/file.c | 1 + fs/xfs/xfs_file.c | 2 +- include/linux/fs.h | 2 ++ include/linux/mm.h | 7 ++++--- mm/filemap.c | 2 +- mm/filemap_xip.c | 3 ++- mm/fremap.c | 14 ++++++++------ mm/mmap.c | 3 +-- mm/nommu.c | 8 ++++++++ mm/shmem.c | 3 +-- 21 files changed, 39 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 94a740d2883d..634b9ae713e0 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -332,7 +332,6 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); vma->vm_file = asma->file; - vma->vm_flags |= VM_CAN_NONLINEAR; out: mutex_unlock(&ashmem_mutex); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index dd6f7ee1e312..c2483e97beee 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -738,6 +738,7 @@ v9fs_cached_file_write(struct file *filp, const char __user * data, static const struct vm_operations_struct v9fs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = v9fs_vm_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5caf285c6e4d..f6b40e86121b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1599,6 +1599,7 @@ out: static const struct vm_operations_struct btrfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = btrfs_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) @@ -1610,7 +1611,6 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) file_accessed(filp); vma->vm_ops = &btrfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 22b6e4583faa..6690269f5dde 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1224,6 +1224,7 @@ out: static struct vm_operations_struct ceph_vmops = { .fault = filemap_fault, .page_mkwrite = ceph_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; int ceph_mmap(struct file *file, struct vm_area_struct *vma) @@ -1234,6 +1235,5 @@ int ceph_mmap(struct file *file, struct vm_area_struct *vma) return -ENOEXEC; file_accessed(file); vma->vm_ops = &ceph_vmops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7d7bbdc4c8e7..edb25b4bbb95 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3003,6 +3003,7 @@ cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static struct vm_operations_struct cifs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = cifs_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ca6f07afe601..bf3966bccd34 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -207,6 +207,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, static const struct vm_operations_struct ext4_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = ext4_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) @@ -217,7 +218,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) return -ENOEXEC; file_accessed(file); vma->vm_ops = &ext4_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index aba15f1b7ad2..78d2837bc940 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1379,6 +1379,7 @@ static const struct vm_operations_struct fuse_file_vm_ops = { .close = fuse_vma_close, .fault = filemap_fault, .page_mkwrite = fuse_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 30e21997a1a1..0def0504afc1 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -492,6 +492,7 @@ out: static const struct vm_operations_struct gfs2_vm_ops = { .fault = filemap_fault, .page_mkwrite = gfs2_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; /** @@ -526,7 +527,6 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) return error; } vma->vm_ops = &gfs2_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6a7fcab7ecb3..f692be97676d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -578,6 +578,7 @@ out: static const struct vm_operations_struct nfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = nfs_vm_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int nfs_need_sync_write(struct file *filp, struct inode *inode) diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 5b387a4c293e..16f35f7423c5 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -135,13 +135,13 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static const struct vm_operations_struct nilfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = nilfs_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &nilfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index d150372fd81d..47a87dda54ce 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -173,6 +173,7 @@ out: static const struct vm_operations_struct ocfs2_file_vm_ops = { .fault = ocfs2_fault, .page_mkwrite = ocfs2_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) @@ -188,7 +189,6 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level); out: vma->vm_ops = &ocfs2_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index ff48c5a85309..5bc77817f382 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1536,6 +1536,7 @@ out_unlock: static const struct vm_operations_struct ubifs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = ubifs_vm_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 1eaeb8be3aae..aa473fa640a2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -940,7 +940,6 @@ xfs_file_mmap( struct vm_area_struct *vma) { vma->vm_ops = &xfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; file_accessed(filp); return 0; @@ -1443,4 +1442,5 @@ const struct file_operations xfs_dir_file_operations = { static const struct vm_operations_struct xfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = xfs_vm_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index ca6d8c806f47..5a8a273d5b2f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2552,6 +2552,8 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); +extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, + unsigned long size, pgoff_t pgoff); extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); diff --git a/include/linux/mm.h b/include/linux/mm.h index fb0685b17914..44d3fc25f556 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -105,7 +105,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ #define VM_NODUMP 0x04000000 /* Do not include in the core dump */ -#define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ @@ -171,8 +170,7 @@ extern pgprot_t protection_map[16]; * of VM_FAULT_xxx flags that give details about how the fault was handled. * * pgoff should be used in favour of virtual_address, if possible. If pgoff - * is used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get nonlinear - * mapping support. + * is used, one may implement ->remap_pages to get nonlinear mapping support. */ struct vm_fault { unsigned int flags; /* FAULT_FLAG_xxx flags */ @@ -230,6 +228,9 @@ struct vm_operations_struct { int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from, const nodemask_t *to, unsigned long flags); #endif + /* called by sys_remap_file_pages() to populate non-linear mapping */ + int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr, + unsigned long size, pgoff_t pgoff); }; struct mmu_gather; diff --git a/mm/filemap.c b/mm/filemap.c index 384344575c37..a9827b42556e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1737,6 +1737,7 @@ EXPORT_SYMBOL(filemap_page_mkwrite); const struct vm_operations_struct generic_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = filemap_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; /* This is used for a general mmap of a disk file */ @@ -1749,7 +1750,6 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma) return -ENOEXEC; file_accessed(file); vma->vm_ops = &generic_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 13e013b1270c..91750227a191 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -305,6 +305,7 @@ out: static const struct vm_operations_struct xip_file_vm_ops = { .fault = xip_file_fault, .page_mkwrite = filemap_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; int xip_file_mmap(struct file * file, struct vm_area_struct * vma) @@ -313,7 +314,7 @@ int xip_file_mmap(struct file * file, struct vm_area_struct * vma) file_accessed(file); vma->vm_ops = &xip_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR | VM_MIXEDMAP; + vma->vm_flags |= VM_MIXEDMAP; return 0; } EXPORT_SYMBOL_GPL(xip_file_mmap); diff --git a/mm/fremap.c b/mm/fremap.c index 048659c0c03d..3d731a498788 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -5,6 +5,7 @@ * * started by Ingo Molnar, Copyright (C) 2002, 2003 */ +#include #include #include #include @@ -80,9 +81,10 @@ out: return err; } -static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, unsigned long size, pgoff_t pgoff) +int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, + unsigned long size, pgoff_t pgoff) { + struct mm_struct *mm = vma->vm_mm; int err; do { @@ -95,9 +97,9 @@ static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma, pgoff++; } while (size); - return 0; - + return 0; } +EXPORT_SYMBOL(generic_file_remap_pages); /** * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma @@ -167,7 +169,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) goto out; - if (!(vma->vm_flags & VM_CAN_NONLINEAR)) + if (!vma->vm_ops->remap_pages) goto out; if (start < vma->vm_start || start + size > vma->vm_end) @@ -228,7 +230,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, } mmu_notifier_invalidate_range_start(mm, start, start + size); - err = populate_range(mm, vma, start, size, pgoff); + err = vma->vm_ops->remap_pages(vma, start, size, pgoff); mmu_notifier_invalidate_range_end(mm, start, start + size); if (!err && !(flags & MAP_NONBLOCK)) { if (vma->vm_flags & VM_LOCKED) { diff --git a/mm/mmap.c b/mm/mmap.c index b0989f4d4f09..d0686d355113 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -669,8 +669,7 @@ again: remove_next = 1 + (end > next->vm_end); static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags) { - /* VM_CAN_NONLINEAR may get set later by f_op->mmap() */ - if ((vma->vm_flags ^ vm_flags) & ~VM_CAN_NONLINEAR) + if (vma->vm_flags ^ vm_flags) return 0; if (vma->vm_file != file) return 0; diff --git a/mm/nommu.c b/mm/nommu.c index dee2ff89fd58..98318dcff742 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1961,6 +1961,14 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } EXPORT_SYMBOL(filemap_fault); +int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, + unsigned long size, pgoff_t pgoff) +{ + BUG(); + return 0; +} +EXPORT_SYMBOL(generic_file_remap_pages); + static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, int write) { diff --git a/mm/shmem.c b/mm/shmem.c index d3752110c8c7..cc12072f8787 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1339,7 +1339,6 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &shmem_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } @@ -2643,6 +2642,7 @@ static const struct vm_operations_struct shmem_vm_ops = { .set_policy = shmem_set_policy, .get_policy = shmem_get_policy, #endif + .remap_pages = generic_file_remap_pages, }; static struct dentry *shmem_mount(struct file_system_type *fs_type, @@ -2836,7 +2836,6 @@ int shmem_zero_setup(struct vm_area_struct *vma) fput(vma->vm_file); vma->vm_file = file; vma->vm_ops = &shmem_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } -- cgit v1.2.3 From 0103bd16fb90bc741c7a03fd1ea4e8a505abad23 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 8 Oct 2012 16:28:59 -0700 Subject: mm: prepare VM_DONTDUMP for using in drivers Rename VM_NODUMP into VM_DONTDUMP: this name matches other negative flags: VM_DONTEXPAND, VM_DONTCOPY. Currently this flag used only for sys_madvise. The next patch will use it for replacing the outdated flag VM_RESERVED. Also forbid madvise(MADV_DODUMP) for special kernel mappings VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) Signed-off-by: Konstantin Khlebnikov Cc: Alexander Viro Cc: Carsten Otte Cc: Chris Metcalf Cc: Cyrill Gorcunov Cc: Eric Paris Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Morris Cc: Jason Baron Cc: Kentaro Takeda Cc: Matt Helsley Cc: Nick Piggin Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Robert Richter Cc: Suresh Siddha Cc: Tetsuo Handa Cc: Venkatesh Pallipadi Acked-by: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 2 +- include/linux/mm.h | 2 +- mm/madvise.c | 8 ++++++-- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 28a64e769527..2b72d26e2e4b 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1123,7 +1123,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, if (always_dump_vma(vma)) goto whole; - if (vma->vm_flags & VM_NODUMP) + if (vma->vm_flags & VM_DONTDUMP) return 0; /* Hugetlb memory check */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 25ef49c1f2bd..dc08d558e058 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -102,7 +102,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ -#define VM_NODUMP 0x04000000 /* Do not include in the core dump */ +#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ diff --git a/mm/madvise.c b/mm/madvise.c index 14d260fa0d17..03dfa5c7adb3 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -69,10 +69,14 @@ static long madvise_behavior(struct vm_area_struct * vma, new_flags &= ~VM_DONTCOPY; break; case MADV_DONTDUMP: - new_flags |= VM_NODUMP; + new_flags |= VM_DONTDUMP; break; case MADV_DODUMP: - new_flags &= ~VM_NODUMP; + if (new_flags & VM_SPECIAL) { + error = -EINVAL; + goto out; + } + new_flags &= ~VM_DONTDUMP; break; case MADV_MERGEABLE: case MADV_UNMERGEABLE: -- cgit v1.2.3 From 314e51b9851b4f4e8ab302243ff5a6fc6147f379 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 8 Oct 2012 16:29:02 -0700 Subject: mm: kill vma flag VM_RESERVED and mm->reserved_vm counter A long time ago, in v2.4, VM_RESERVED kept swapout process off VMA, currently it lost original meaning but still has some effects: | effect | alternative flags -+------------------------+--------------------------------------------- 1| account as reserved_vm | VM_IO 2| skip in core dump | VM_IO, VM_DONTDUMP 3| do not merge or expand | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP 4| do not mlock | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP This patch removes reserved_vm counter from mm_struct. Seems like nobody cares about it, it does not exported into userspace directly, it only reduces total_vm showed in proc. Thus VM_RESERVED can be replaced with VM_IO or pair VM_DONTEXPAND | VM_DONTDUMP. remap_pfn_range() and io_remap_pfn_range() set VM_IO|VM_DONTEXPAND|VM_DONTDUMP. remap_vmalloc_range() set VM_DONTEXPAND | VM_DONTDUMP. [akpm@linux-foundation.org: drivers/vfio/pci/vfio_pci.c fixup] Signed-off-by: Konstantin Khlebnikov Cc: Alexander Viro Cc: Carsten Otte Cc: Chris Metcalf Cc: Cyrill Gorcunov Cc: Eric Paris Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Morris Cc: Jason Baron Cc: Kentaro Takeda Cc: Matt Helsley Cc: Nick Piggin Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Robert Richter Cc: Suresh Siddha Cc: Tetsuo Handa Cc: Venkatesh Pallipadi Acked-by: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/unevictable-lru.txt | 4 ++-- arch/alpha/kernel/pci-sysfs.c | 2 +- arch/ia64/kernel/perfmon.c | 2 +- arch/ia64/mm/init.c | 3 ++- arch/powerpc/kvm/book3s_hv.c | 2 +- arch/sparc/kernel/pci.c | 2 +- arch/unicore32/kernel/process.c | 2 +- arch/x86/xen/mmu.c | 3 +-- drivers/char/mbcs.c | 2 +- drivers/char/mem.c | 2 +- drivers/char/mspec.c | 2 +- drivers/gpu/drm/drm_gem.c | 2 +- drivers/gpu/drm/drm_vm.c | 10 ++-------- drivers/gpu/drm/exynos/exynos_drm_gem.c | 2 +- drivers/gpu/drm/gma500/framebuffer.c | 3 +-- drivers/gpu/drm/ttm/ttm_bo_vm.c | 4 ++-- drivers/gpu/drm/udl/udl_fb.c | 2 +- drivers/infiniband/hw/ehca/ehca_uverbs.c | 4 ++-- drivers/infiniband/hw/ipath/ipath_file_ops.c | 2 +- drivers/infiniband/hw/qib/qib_file_ops.c | 2 +- drivers/media/pci/meye/meye.c | 2 +- drivers/media/platform/omap/omap_vout.c | 2 +- drivers/media/platform/vino.c | 2 +- drivers/media/usb/sn9c102/sn9c102_core.c | 3 +-- drivers/media/usb/usbvision/usbvision-video.c | 3 +-- drivers/media/v4l2-core/videobuf-dma-sg.c | 2 +- drivers/media/v4l2-core/videobuf-vmalloc.c | 2 +- drivers/media/v4l2-core/videobuf2-memops.c | 2 +- drivers/misc/carma/carma-fpga.c | 2 -- drivers/misc/sgi-gru/grufile.c | 5 ++--- drivers/mtd/mtdchar.c | 2 +- drivers/scsi/sg.c | 2 +- drivers/staging/omapdrm/omap_gem_dmabuf.c | 2 +- drivers/staging/tidspbridge/rmgr/drv_interface.c | 2 +- drivers/uio/uio.c | 4 +--- drivers/usb/mon/mon_bin.c | 2 +- drivers/video/68328fb.c | 2 +- drivers/video/aty/atyfb_base.c | 3 +-- drivers/video/fb-puv3.c | 3 +-- drivers/video/fb_defio.c | 2 +- drivers/video/fbmem.c | 3 +-- drivers/video/gbefb.c | 2 +- drivers/video/omap2/omapfb/omapfb-main.c | 2 +- drivers/video/sbuslib.c | 5 ++--- drivers/video/smscufx.c | 1 - drivers/video/udlfb.c | 1 - drivers/video/vermilion/vermilion.c | 1 - drivers/video/vfb.c | 1 - drivers/xen/gntalloc.c | 2 +- drivers/xen/gntdev.c | 2 +- drivers/xen/privcmd.c | 3 ++- fs/binfmt_elf.c | 2 +- fs/binfmt_elf_fdpic.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/proc/task_mmu.c | 2 +- include/linux/mempolicy.h | 2 +- include/linux/mm.h | 3 +-- include/linux/mm_types.h | 1 - kernel/events/core.c | 2 +- mm/ksm.c | 3 +-- mm/memory.c | 11 +++++------ mm/mlock.c | 2 +- mm/mmap.c | 2 -- mm/nommu.c | 2 +- mm/vmalloc.c | 3 +-- security/selinux/selinuxfs.c | 2 +- sound/core/pcm_native.c | 6 +++--- sound/usb/usx2y/us122l.c | 2 +- sound/usb/usx2y/usX2Yhwdep.c | 2 +- sound/usb/usx2y/usx2yhwdeppcm.c | 2 +- 70 files changed, 77 insertions(+), 105 deletions(-) (limited to 'fs') diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt index fa206cccf89f..323ff5dba1cc 100644 --- a/Documentation/vm/unevictable-lru.txt +++ b/Documentation/vm/unevictable-lru.txt @@ -371,8 +371,8 @@ mlock_fixup() filters several classes of "special" VMAs: mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to allocate the huge pages and populate the ptes. -3) VMAs with VM_DONTEXPAND or VM_RESERVED are generally userspace mappings of - kernel pages, such as the VDSO page, relay channel pages, etc. These pages +3) VMAs with VM_DONTEXPAND are generally userspace mappings of kernel pages, + such as the VDSO page, relay channel pages, etc. These pages are inherently unevictable and are not managed on the LRU lists. mlock_fixup() treats these VMAs the same as hugetlbfs VMAs. It calls make_pages_present() to populate the ptes. diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c index 53649c7d0068..b51f7b4818cd 100644 --- a/arch/alpha/kernel/pci-sysfs.c +++ b/arch/alpha/kernel/pci-sysfs.c @@ -26,7 +26,7 @@ static int hose_mmap_page_range(struct pci_controller *hose, base = sparse ? hose->sparse_io_base : hose->dense_io_base; vma->vm_pgoff += base >> PAGE_SHIFT; - vma->vm_flags |= (VM_IO | VM_RESERVED); + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_end - vma->vm_start, diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index f388b4e18a37..ea39eba61ef5 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2307,7 +2307,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t */ vma->vm_mm = mm; vma->vm_file = get_file(filp); - vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; + vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ /* diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 0eab454867a2..082e383c1b6f 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -138,7 +138,8 @@ ia64_init_addr_space (void) vma->vm_mm = current->mm; vma->vm_end = PAGE_SIZE; vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); - vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED; + vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | + VM_DONTEXPAND | VM_DONTDUMP; down_write(¤t->mm->mmap_sem); if (insert_vm_struct(current->mm, vma)) { up_write(¤t->mm->mmap_sem); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 83e929e66f9d..721d4603a235 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1183,7 +1183,7 @@ static const struct vm_operations_struct kvm_rma_vm_ops = { static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) { - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &kvm_rma_vm_ops; return 0; } diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index acc8c838ff72..75b31bcdeadf 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -779,7 +779,7 @@ static int __pci_mmap_make_offset(struct pci_dev *pdev, static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state) { - vma->vm_flags |= (VM_IO | VM_RESERVED); + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; } /* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index b6f0458c3143..b008586dad75 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -380,7 +380,7 @@ int vectors_user_mapping(void) return install_special_mapping(mm, 0xffff0000, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC | - VM_RESERVED, + VM_DONTEXPAND | VM_DONTDUMP, NULL); } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5a16824cc2b3..fd28d86fe3d2 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2451,8 +2451,7 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == - (VM_PFNMAP | VM_RESERVED | VM_IO))); + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); rmd.mfn = mfn; rmd.prot = prot; diff --git a/drivers/char/mbcs.c b/drivers/char/mbcs.c index 0c7d340b9ab9..f74e892711dd 100644 --- a/drivers/char/mbcs.c +++ b/drivers/char/mbcs.c @@ -507,7 +507,7 @@ static int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */ + /* Remap-pfn-range will mark the range VM_IO */ if (remap_pfn_range(vma, vma->vm_start, __pa(soft->gscr_addr) >> PAGE_SHIFT, diff --git a/drivers/char/mem.c b/drivers/char/mem.c index e5eedfa24c91..0537903c985b 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -322,7 +322,7 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma) vma->vm_ops = &mmap_mem_ops; - /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */ + /* Remap-pfn-range will mark the range VM_IO */ if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c index 845f97fd1832..e1f60f968fdd 100644 --- a/drivers/char/mspec.c +++ b/drivers/char/mspec.c @@ -286,7 +286,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma, atomic_set(&vdata->refcnt, 1); vma->vm_private_data = vdata; - vma->vm_flags |= (VM_IO | VM_RESERVED | VM_PFNMAP | VM_DONTEXPAND); + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; if (vdata->type == MSPEC_FETCHOP || vdata->type == MSPEC_UNCACHED) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); vma->vm_ops = &mspec_vm_ops; diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 92177d5aedee..24efae464e2c 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -706,7 +706,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) goto out_unlock; } - vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND; + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = obj->dev->driver->gem_vm_ops; vma->vm_private_data = map->handle; vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index 23a824e6a22a..db7bd292410b 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c @@ -514,8 +514,7 @@ static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma) vma->vm_ops = &drm_vm_dma_ops; - vma->vm_flags |= VM_RESERVED; /* Don't swap */ - vma->vm_flags |= VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; drm_vm_open_locked(dev, vma); return 0; @@ -643,21 +642,16 @@ int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma) case _DRM_SHM: vma->vm_ops = &drm_vm_shm_ops; vma->vm_private_data = (void *)map; - /* Don't let this area swap. Change when - DRM_KERNEL advisory is supported. */ - vma->vm_flags |= VM_RESERVED; break; case _DRM_SCATTER_GATHER: vma->vm_ops = &drm_vm_sg_ops; vma->vm_private_data = (void *)map; - vma->vm_flags |= VM_RESERVED; vma->vm_page_prot = drm_dma_prot(map->type, vma); break; default: return -EINVAL; /* This should never happen. */ } - vma->vm_flags |= VM_RESERVED; /* Don't swap */ - vma->vm_flags |= VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; drm_vm_open_locked(dev, vma); return 0; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index fcdbe46914f7..d2545560664f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -500,7 +500,7 @@ static int exynos_drm_gem_mmap_buffer(struct file *filp, DRM_DEBUG_KMS("%s\n", __FILE__); - vma->vm_flags |= (VM_IO | VM_RESERVED); + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; update_vm_cache_attr(exynos_gem_obj, vma); diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index 884ba73ac6ce..afded54dbb10 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -178,8 +178,7 @@ static int psbfb_mmap(struct fb_info *info, struct vm_area_struct *vma) */ vma->vm_ops = &psbfb_vm_ops; vma->vm_private_data = (void *)psbfb; - vma->vm_flags |= VM_RESERVED | VM_IO | - VM_MIXEDMAP | VM_DONTEXPAND; + vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP; return 0; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index a877813571a4..3ba72dbdc4bd 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -285,7 +285,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, */ vma->vm_private_data = bo; - vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND; + vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP; return 0; out_unref: ttm_bo_unref(&bo); @@ -300,7 +300,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo) vma->vm_ops = &ttm_bo_vm_ops; vma->vm_private_data = ttm_bo_reference(bo); - vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND; + vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND; return 0; } EXPORT_SYMBOL(ttm_fbdev_mmap); diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index 67df842fbb33..69a2b16f42a6 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -243,7 +243,7 @@ static int udl_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) size = 0; } - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index 45ee89b65c23..1a1d5d99fcf9 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -117,7 +117,7 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, physical = galpas->user.fw_handle; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); - /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, vma->vm_page_prot); if (unlikely(ret)) { @@ -139,7 +139,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, u64 start, ofs; struct page *page; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; start = vma->vm_start; for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 736d9edbdbe7..3eb7e454849b 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1225,7 +1225,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; vma->vm_ops = &ipath_file_vm_ops; - vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; ret = 1; bail: diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index faa44cb08071..959a5c4ff812 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -971,7 +971,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; vma->vm_ops = &qib_file_vm_ops; - vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; ret = 1; bail: diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c index 7bc775219f97..e5a76da86081 100644 --- a/drivers/media/pci/meye/meye.c +++ b/drivers/media/pci/meye/meye.c @@ -1647,7 +1647,7 @@ static int meye_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_ops = &meye_vm_ops; vma->vm_flags &= ~VM_IO; /* not I/O memory */ - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = (void *) (offset / gbufsize); meye_vm_open(vma); diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c index 66ac21d466af..134016f0e660 100644 --- a/drivers/media/platform/omap/omap_vout.c +++ b/drivers/media/platform/omap/omap_vout.c @@ -911,7 +911,7 @@ static int omap_vout_mmap(struct file *file, struct vm_area_struct *vma) q->bufs[i]->baddr = vma->vm_start; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); vma->vm_ops = &omap_vout_vm_ops; vma->vm_private_data = (void *) vout; diff --git a/drivers/media/platform/vino.c b/drivers/media/platform/vino.c index 790d96cffeea..70b0bf4b2900 100644 --- a/drivers/media/platform/vino.c +++ b/drivers/media/platform/vino.c @@ -3950,7 +3950,7 @@ found: fb->map_count = 1; - vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_flags &= ~VM_IO; vma->vm_private_data = fb; vma->vm_file = file; diff --git a/drivers/media/usb/sn9c102/sn9c102_core.c b/drivers/media/usb/sn9c102/sn9c102_core.c index 19ea780b16ff..5bfc8e2f018f 100644 --- a/drivers/media/usb/sn9c102/sn9c102_core.c +++ b/drivers/media/usb/sn9c102/sn9c102_core.c @@ -2126,8 +2126,7 @@ static int sn9c102_mmap(struct file* filp, struct vm_area_struct *vma) return -EINVAL; } - vma->vm_flags |= VM_IO; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; pos = cam->frame[i].bufmem; while (size > 0) { /* size is page-aligned */ diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c index f67018ed3795..5c36a57e6590 100644 --- a/drivers/media/usb/usbvision/usbvision-video.c +++ b/drivers/media/usb/usbvision/usbvision-video.c @@ -1108,8 +1108,7 @@ static int usbvision_mmap(struct file *file, struct vm_area_struct *vma) } /* VM_IO is eventually going to replace PageReserved altogether */ - vma->vm_flags |= VM_IO; - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; pos = usbvision->frame[i].data; while (size > 0) { diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index f300deafd268..828e7c10bd70 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -582,7 +582,7 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q, map->count = 1; map->q = q; vma->vm_ops = &videobuf_vm_ops; - vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_flags &= ~VM_IO; /* using shared anonymous pages */ vma->vm_private_data = map; dprintk(1, "mmap %p: q=%p %08lx-%08lx pgoff %08lx bufs %d-%d\n", diff --git a/drivers/media/v4l2-core/videobuf-vmalloc.c b/drivers/media/v4l2-core/videobuf-vmalloc.c index df142580e44c..2ff7fcc77b11 100644 --- a/drivers/media/v4l2-core/videobuf-vmalloc.c +++ b/drivers/media/v4l2-core/videobuf-vmalloc.c @@ -270,7 +270,7 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q, } vma->vm_ops = &videobuf_vm_ops; - vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = map; dprintk(1, "mmap %p: q=%p %08lx-%08lx (%lx) pgoff %08lx buf %d\n", diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c index 504cd4cbe29e..051ea3571b20 100644 --- a/drivers/media/v4l2-core/videobuf2-memops.c +++ b/drivers/media/v4l2-core/videobuf2-memops.c @@ -163,7 +163,7 @@ int vb2_mmap_pfn_range(struct vm_area_struct *vma, unsigned long paddr, return ret; } - vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = priv; vma->vm_ops = vm_ops; diff --git a/drivers/misc/carma/carma-fpga.c b/drivers/misc/carma/carma-fpga.c index 0c43297ed9ac..8835eabb3b87 100644 --- a/drivers/misc/carma/carma-fpga.c +++ b/drivers/misc/carma/carma-fpga.c @@ -1243,8 +1243,6 @@ static int data_mmap(struct file *filp, struct vm_area_struct *vma) return -EINVAL; } - /* IO memory (stop cacheing) */ - vma->vm_flags |= VM_IO | VM_RESERVED; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); return io_remap_pfn_range(vma, vma->vm_start, addr, vsize, diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index ecafa4ba238b..492c8cac69ac 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -108,9 +108,8 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) return -EINVAL; - vma->vm_flags |= - (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP | - VM_RESERVED); + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_LOCKED | + VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = PAGE_SHARED; vma->vm_ops = &gru_vm_ops; diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index a6e74514e662..73ae81a629f2 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1182,7 +1182,7 @@ static int mtdchar_mmap(struct file *file, struct vm_area_struct *vma) return -EINVAL; if (set_vm_offset(vma, off) < 0) return -EINVAL; - vma->vm_flags |= VM_IO | VM_RESERVED; + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; #ifdef pgprot_noncached if (file->f_flags & O_DSYNC || off >= __pa(high_memory)) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 9c5c5f2b3962..be2c9a6561ff 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1257,7 +1257,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) } sfp->mmap_called = 1; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = sfp; vma->vm_ops = &sg_mmap_vm_ops; return 0; diff --git a/drivers/staging/omapdrm/omap_gem_dmabuf.c b/drivers/staging/omapdrm/omap_gem_dmabuf.c index 42728e0cc194..c6f3ef6f57b9 100644 --- a/drivers/staging/omapdrm/omap_gem_dmabuf.c +++ b/drivers/staging/omapdrm/omap_gem_dmabuf.c @@ -160,7 +160,7 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer, goto out_unlock; } - vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND; + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = obj->dev->driver->gem_vm_ops; vma->vm_private_data = obj; vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); diff --git a/drivers/staging/tidspbridge/rmgr/drv_interface.c b/drivers/staging/tidspbridge/rmgr/drv_interface.c index bddea1d3b2c3..701a11ac676d 100644 --- a/drivers/staging/tidspbridge/rmgr/drv_interface.c +++ b/drivers/staging/tidspbridge/rmgr/drv_interface.c @@ -261,7 +261,7 @@ static int bridge_mmap(struct file *filp, struct vm_area_struct *vma) { u32 status; - vma->vm_flags |= VM_RESERVED | VM_IO; + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); dev_dbg(bridge, "%s: vm filp %p start %lx end %lx page_prot %ulx " diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index a783d533a1a6..5110f367f1f1 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -653,8 +653,6 @@ static int uio_mmap_physical(struct vm_area_struct *vma) if (mi < 0) return -EINVAL; - vma->vm_flags |= VM_IO | VM_RESERVED; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); return remap_pfn_range(vma, @@ -666,7 +664,7 @@ static int uio_mmap_physical(struct vm_area_struct *vma) static int uio_mmap_logical(struct vm_area_struct *vma) { - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &uio_vm_ops; uio_vma_open(vma); return 0; diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index 91cd85076a44..9a62e89d6dc0 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -1247,7 +1247,7 @@ static int mon_bin_mmap(struct file *filp, struct vm_area_struct *vma) { /* don't do anything here: "fault" will set up page table entries */ vma->vm_ops = &mon_bin_vm_ops; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = filp->private_data; mon_bin_vma_open(vma); return 0; diff --git a/drivers/video/68328fb.c b/drivers/video/68328fb.c index a425d65d5ba2..fa44fbed397d 100644 --- a/drivers/video/68328fb.c +++ b/drivers/video/68328fb.c @@ -400,7 +400,7 @@ static int mc68x328fb_mmap(struct fb_info *info, struct vm_area_struct *vma) #ifndef MMU /* this is uClinux (no MMU) specific code */ - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_start = videomemory; return 0; diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index 3f2e8c13f1ca..868932f904ef 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -1942,8 +1942,7 @@ static int atyfb_mmap(struct fb_info *info, struct vm_area_struct *vma) off = vma->vm_pgoff << PAGE_SHIFT; size = vma->vm_end - vma->vm_start; - /* To stop the swapper from even considering these pages. */ - vma->vm_flags |= (VM_IO | VM_RESERVED); + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ if (((vma->vm_pgoff == 0) && (size == info->fix.smem_len)) || ((off == info->fix.smem_len) && (size == PAGE_SIZE))) diff --git a/drivers/video/fb-puv3.c b/drivers/video/fb-puv3.c index 60a787fa32cf..7d106f1f4906 100644 --- a/drivers/video/fb-puv3.c +++ b/drivers/video/fb-puv3.c @@ -653,9 +653,8 @@ int unifb_mmap(struct fb_info *info, vma->vm_page_prot)) return -EAGAIN; - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ return 0; - } static struct fb_ops unifb_ops = { diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c index 64cda560c488..88cad6b8b479 100644 --- a/drivers/video/fb_defio.c +++ b/drivers/video/fb_defio.c @@ -166,7 +166,7 @@ static const struct address_space_operations fb_deferred_io_aops = { static int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma) { vma->vm_ops = &fb_deferred_io_vm_ops; - vma->vm_flags |= ( VM_RESERVED | VM_DONTEXPAND ); + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; if (!(info->flags & FBINFO_VIRTFB)) vma->vm_flags |= VM_IO; vma->vm_private_data = info; diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 0dff12a1daef..3ff0105a496a 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -1410,8 +1410,7 @@ fb_mmap(struct file *file, struct vm_area_struct * vma) return -EINVAL; off += start; vma->vm_pgoff = off >> PAGE_SHIFT; - /* This is an IO map - tell maydump to skip this VMA */ - vma->vm_flags |= VM_IO | VM_RESERVED; + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by io_remap_pfn_range()*/ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); fb_pgprotect(file, vma, off); if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c index 7e7b7a9ba274..05e2a8a99d8f 100644 --- a/drivers/video/gbefb.c +++ b/drivers/video/gbefb.c @@ -1024,7 +1024,7 @@ static int gbefb_mmap(struct fb_info *info, pgprot_val(vma->vm_page_prot) = pgprot_fb(pgprot_val(vma->vm_page_prot)); - vma->vm_flags |= VM_IO | VM_RESERVED; + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ /* look for the starting tile */ tile = &gbe_tiles.cpu[offset >> TILE_SHIFT]; diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c index 3c39aa8de928..15373f4aee19 100644 --- a/drivers/video/omap2/omapfb/omapfb-main.c +++ b/drivers/video/omap2/omapfb/omapfb-main.c @@ -1128,7 +1128,7 @@ static int omapfb_mmap(struct fb_info *fbi, struct vm_area_struct *vma) DBG("user mmap region start %lx, len %d, off %lx\n", start, len, off); vma->vm_pgoff = off >> PAGE_SHIFT; - vma->vm_flags |= VM_IO | VM_RESERVED; + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); vma->vm_ops = &mmap_user_ops; vma->vm_private_data = rg; diff --git a/drivers/video/sbuslib.c b/drivers/video/sbuslib.c index 3c1de981a18c..296afae442f4 100644 --- a/drivers/video/sbuslib.c +++ b/drivers/video/sbuslib.c @@ -57,9 +57,8 @@ int sbusfb_mmap_helper(struct sbus_mmap_map *map, off = vma->vm_pgoff << PAGE_SHIFT; - /* To stop the swapper from even considering these pages */ - vma->vm_flags |= (VM_IO | VM_RESERVED); - + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); /* Each page, see which map applies */ diff --git a/drivers/video/smscufx.c b/drivers/video/smscufx.c index 5533a32c6ca1..97bd6620c364 100644 --- a/drivers/video/smscufx.c +++ b/drivers/video/smscufx.c @@ -803,7 +803,6 @@ static int ufx_ops_mmap(struct fb_info *info, struct vm_area_struct *vma) size = 0; } - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ return 0; } diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c index 8af64148294b..f45eba3d6150 100644 --- a/drivers/video/udlfb.c +++ b/drivers/video/udlfb.c @@ -345,7 +345,6 @@ static int dlfb_ops_mmap(struct fb_info *info, struct vm_area_struct *vma) size = 0; } - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ return 0; } diff --git a/drivers/video/vermilion/vermilion.c b/drivers/video/vermilion/vermilion.c index 970e43d13f52..89aef343e295 100644 --- a/drivers/video/vermilion/vermilion.c +++ b/drivers/video/vermilion/vermilion.c @@ -1018,7 +1018,6 @@ static int vmlfb_mmap(struct fb_info *info, struct vm_area_struct *vma) offset += vinfo->vram_start; pgprot_val(vma->vm_page_prot) |= _PAGE_PCD; pgprot_val(vma->vm_page_prot) &= ~_PAGE_PWT; - vma->vm_flags |= VM_RESERVED | VM_IO; if (remap_pfn_range(vma, vma->vm_start, offset >> PAGE_SHIFT, size, vma->vm_page_prot)) return -EAGAIN; diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c index 501a922aa9dc..c7f692525b88 100644 --- a/drivers/video/vfb.c +++ b/drivers/video/vfb.c @@ -439,7 +439,6 @@ static int vfb_mmap(struct fb_info *info, size = 0; } - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ return 0; } diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index 934985d14c24..4097987b330e 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -535,7 +535,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_private_data = vm_priv; - vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &gntalloc_vmops; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 5df9fd847b2e..610bfc6be177 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -720,7 +720,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_ops = &gntdev_vmops; - vma->vm_flags |= VM_RESERVED|VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; if (use_ptemod) vma->vm_flags |= VM_DONTCOPY; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index ef6389580b8c..8adb9cc267f9 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -455,7 +455,8 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) { /* DONTCOPY is essential for Xen because copy_page_range doesn't know * how to recreate these mappings */ - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY | + VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &privcmd_vm_ops; vma->vm_private_data = NULL; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 2b72d26e2e4b..e800dec958c3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1135,7 +1135,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, } /* Do not dump I/O mapped devices or special mappings */ - if (vma->vm_flags & (VM_IO | VM_RESERVED)) + if (vma->vm_flags & VM_IO) return 0; /* By default, dump shared memory if mapped from an anonymous file. */ diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 08d812b32282..262db114ff01 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1205,7 +1205,7 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags) int dump_ok; /* Do not dump I/O mapped devices or special mappings */ - if (vma->vm_flags & (VM_IO | VM_RESERVED)) { + if (vma->vm_flags & VM_IO) { kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags); return 0; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9460120a5170..0a0ab8e21b19 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -110,7 +110,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * way when do_mmap_pgoff unwinds (may be important on powerpc * and ia64). */ - vma->vm_flags |= VM_HUGETLB | VM_RESERVED; + vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &hugetlb_vm_ops; if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4540b8f76f16..79827ce03e3b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -54,7 +54,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) "VmPTE:\t%8lu kB\n" "VmSwap:\t%8lu kB\n", hiwater_vm << (PAGE_SHIFT-10), - (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), + total_vm << (PAGE_SHIFT-10), mm->locked_vm << (PAGE_SHIFT-10), mm->pinned_vm << (PAGE_SHIFT-10), hiwater_rss << (PAGE_SHIFT-10), diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 95b738c7abff..ba7a0ff19d39 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -239,7 +239,7 @@ extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, /* Check if a vma is migratable */ static inline int vma_migratable(struct vm_area_struct *vma) { - if (vma->vm_flags & (VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED)) + if (vma->vm_flags & (VM_IO | VM_HUGETLB | VM_PFNMAP)) return 0; /* * Migration allocates pages in the highest zone. If we cannot diff --git a/include/linux/mm.h b/include/linux/mm.h index dc08d558e058..0514fe9d3c84 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -96,7 +96,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ -#define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */ #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ @@ -148,7 +147,7 @@ extern unsigned int kobjsize(const void *objp); * Special vmas that are non-mergable, non-mlock()able. * Note: mm/huge_memory.c VM_NO_THP depends on this definition. */ -#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) +#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP) /* * mapping from the currently active vm_flags protection bits (the diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 58d3173eb365..a57a43f5ca7c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -349,7 +349,6 @@ struct mm_struct { unsigned long shared_vm; /* Shared pages (files) */ unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ unsigned long stack_vm; /* VM_GROWSUP/DOWN */ - unsigned long reserved_vm; /* VM_RESERVED|VM_IO pages */ unsigned long def_flags; unsigned long nr_ptes; /* Page table pages */ unsigned long start_code, end_code, start_data, end_data; diff --git a/kernel/events/core.c b/kernel/events/core.c index f16f3c58f11a..cda3ebd49e86 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3671,7 +3671,7 @@ unlock: atomic_inc(&event->mmap_count); mutex_unlock(&event->mmap_mutex); - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &perf_mmap_vmops; return ret; diff --git a/mm/ksm.c b/mm/ksm.c index f9ccb16559ee..9638620a7530 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1469,8 +1469,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, */ if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | VM_PFNMAP | VM_IO | VM_DONTEXPAND | - VM_RESERVED | VM_HUGETLB | - VM_NONLINEAR | VM_MIXEDMAP)) + VM_HUGETLB | VM_NONLINEAR | VM_MIXEDMAP)) return 0; /* just ignore the advice */ #ifdef VM_SAO diff --git a/mm/memory.c b/mm/memory.c index 7b1e4feaec06..e09c04813186 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2297,14 +2297,13 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, * rest of the world about it: * VM_IO tells people not to look at these pages * (accesses can have side effects). - * VM_RESERVED is specified all over the place, because - * in 2.4 it kept swapout's vma scan off this vma; but - * in 2.6 the LRU scan won't even find its pages, so this - * flag means no more than count its pages in reserved_vm, - * and omit it from core dump, even when VM_IO turned off. * VM_PFNMAP tells the core MM that the base pages are just * raw PFN mappings, and do not have a "struct page" associated * with them. + * VM_DONTEXPAND + * Disable vma merging and expanding with mremap(). + * VM_DONTDUMP + * Omit vma from core dump, even when VM_IO turned off. * * There's a horrible special case to handle copy-on-write * behaviour that some programs depend on. We mark the "original" @@ -2321,7 +2320,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, if (err) return -EINVAL; - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; BUG_ON(addr >= end); pfn -= addr >> PAGE_SHIFT; diff --git a/mm/mlock.c b/mm/mlock.c index ef726e8aa8e9..a948be4b7ba7 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -227,7 +227,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, if (vma->vm_flags & (VM_IO | VM_PFNMAP)) goto no_mlock; - if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || + if (!((vma->vm_flags & VM_DONTEXPAND) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm))) { diff --git a/mm/mmap.c b/mm/mmap.c index c1ad2e78ea58..a76042dc806d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -945,8 +945,6 @@ void vm_stat_account(struct mm_struct *mm, unsigned long flags, mm->exec_vm += pages; } else if (flags & stack_flags) mm->stack_vm += pages; - if (flags & (VM_RESERVED|VM_IO)) - mm->reserved_vm += pages; } #endif /* CONFIG_PROC_FS */ diff --git a/mm/nommu.c b/mm/nommu.c index 9c4a7b63a4df..12e84e69dd06 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1811,7 +1811,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, if (addr != (pfn << PAGE_SHIFT)) return -EINVAL; - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; return 0; } EXPORT_SYMBOL(remap_pfn_range); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 2bb90b1d241c..8de704679bfc 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2163,8 +2163,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, usize -= PAGE_SIZE; } while (usize > 0); - /* Prevent "things" like memory migration? VM_flags need a cleanup... */ - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; return 0; } diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 55af8c5b57e6..3a6e8731646c 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -485,7 +485,7 @@ static int sel_mmap_policy(struct file *filp, struct vm_area_struct *vma) return -EACCES; } - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &sel_mmap_policy_ops; return 0; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 20554eff5a21..5e12e5bacbba 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3039,7 +3039,7 @@ static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file return -EINVAL; area->vm_ops = &snd_pcm_vm_ops_status; area->vm_private_data = substream; - area->vm_flags |= VM_RESERVED; + area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; return 0; } @@ -3076,7 +3076,7 @@ static int snd_pcm_mmap_control(struct snd_pcm_substream *substream, struct file return -EINVAL; area->vm_ops = &snd_pcm_vm_ops_control; area->vm_private_data = substream; - area->vm_flags |= VM_RESERVED; + area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; return 0; } #else /* ! coherent mmap */ @@ -3170,7 +3170,7 @@ static const struct vm_operations_struct snd_pcm_vm_ops_data_fault = { int snd_pcm_lib_default_mmap(struct snd_pcm_substream *substream, struct vm_area_struct *area) { - area->vm_flags |= VM_RESERVED; + area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; #ifdef ARCH_HAS_DMA_MMAP_COHERENT if (!substream->ops->page && substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV) diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c index c4fd3b1d9592..d0323a693ba2 100644 --- a/sound/usb/usx2y/us122l.c +++ b/sound/usb/usx2y/us122l.c @@ -262,7 +262,7 @@ static int usb_stream_hwdep_mmap(struct snd_hwdep *hw, } area->vm_ops = &usb_stream_hwdep_vm_ops; - area->vm_flags |= VM_RESERVED; + area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; area->vm_private_data = us122l; atomic_inc(&us122l->mmap_count); out: diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c index 04aafb43a13c..0b34dbc8f302 100644 --- a/sound/usb/usx2y/usX2Yhwdep.c +++ b/sound/usb/usx2y/usX2Yhwdep.c @@ -82,7 +82,7 @@ static int snd_us428ctls_mmap(struct snd_hwdep * hw, struct file *filp, struct v us428->us428ctls_sharedmem->CtlSnapShotLast = -2; } area->vm_ops = &us428ctls_vm_ops; - area->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; area->vm_private_data = hw->private_data; return 0; } diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c index 8e40b6e67e9e..cc56007791e0 100644 --- a/sound/usb/usx2y/usx2yhwdeppcm.c +++ b/sound/usb/usx2y/usx2yhwdeppcm.c @@ -723,7 +723,7 @@ static int snd_usX2Y_hwdep_pcm_mmap(struct snd_hwdep * hw, struct file *filp, st return -ENODEV; } area->vm_ops = &snd_usX2Y_hwdep_pcm_vm_ops; - area->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; area->vm_private_data = hw->private_data; return 0; } -- cgit v1.2.3 From 01dc52ebdf472f77cca623ca693ca24cfc0f1bbe Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Oct 2012 16:29:30 -0700 Subject: oom: remove deprecated oom_adj The deprecated /proc//oom_adj is scheduled for removal this month. Signed-off-by: Davidlohr Bueso Acked-by: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/obsolete/proc-pid-oom_adj | 22 ------ Documentation/filesystems/proc.txt | 22 +----- fs/proc/base.c | 117 +--------------------------- include/linux/oom.h | 11 --- include/linux/sched.h | 1 - kernel/fork.c | 1 - mm/oom_kill.c | 4 +- 7 files changed, 7 insertions(+), 171 deletions(-) delete mode 100644 Documentation/ABI/obsolete/proc-pid-oom_adj (limited to 'fs') diff --git a/Documentation/ABI/obsolete/proc-pid-oom_adj b/Documentation/ABI/obsolete/proc-pid-oom_adj deleted file mode 100644 index 9a3cb88ade47..000000000000 --- a/Documentation/ABI/obsolete/proc-pid-oom_adj +++ /dev/null @@ -1,22 +0,0 @@ -What: /proc//oom_adj -When: August 2012 -Why: /proc//oom_adj allows userspace to influence the oom killer's - badness heuristic used to determine which task to kill when the kernel - is out of memory. - - The badness heuristic has since been rewritten since the introduction of - this tunable such that its meaning is deprecated. The value was - implemented as a bitshift on a score generated by the badness() - function that did not have any precise units of measure. With the - rewrite, the score is given as a proportion of available memory to the - task allocating pages, so using a bitshift which grows the score - exponentially is, thus, impossible to tune with fine granularity. - - A much more powerful interface, /proc//oom_score_adj, was - introduced with the oom killer rewrite that allows users to increase or - decrease the badness score linearly. This interface will replace - /proc//oom_adj. - - A warning will be emitted to the kernel log if an application uses this - deprecated interface. After it is printed once, future warnings will be - suppressed until the kernel is rebooted. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index fb0a6aeb936c..a1793d670cd0 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -33,7 +33,7 @@ Table of Contents 2 Modifying System Parameters 3 Per-Process Parameters - 3.1 /proc//oom_adj & /proc//oom_score_adj - Adjust the oom-killer + 3.1 /proc//oom_score_adj - Adjust the oom-killer score 3.2 /proc//oom_score - Display current oom-killer score 3.3 /proc//io - Display the IO accounting fields @@ -1320,10 +1320,10 @@ of the kernel. CHAPTER 3: PER-PROCESS PARAMETERS ------------------------------------------------------------------------------ -3.1 /proc//oom_adj & /proc//oom_score_adj- Adjust the oom-killer score +3.1 /proc//oom_score_adj- Adjust the oom-killer score -------------------------------------------------------------------------------- -These file can be used to adjust the badness heuristic used to select which +This file can be used to adjust the badness heuristic used to select which process gets killed in out of memory conditions. The badness heuristic assigns a value to each candidate task ranging from 0 @@ -1361,22 +1361,10 @@ same system, cpuset, mempolicy, or memory controller resources to use at least equivalent to discounting 50% of the task's allowed memory from being considered as scoring against the task. -For backwards compatibility with previous kernels, /proc//oom_adj may also -be used to tune the badness score. Its acceptable values range from -16 -(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17 -(OOM_DISABLE) to disable oom killing entirely for that task. Its value is -scaled linearly with /proc//oom_score_adj. - -Writing to /proc//oom_score_adj or /proc//oom_adj will change the -other with its scaled value. - The value of /proc//oom_score_adj may be reduced no lower than the last value set by a CAP_SYS_RESOURCE process. To reduce the value any lower requires CAP_SYS_RESOURCE. -NOTICE: /proc//oom_adj is deprecated and will be removed, please see -Documentation/feature-removal-schedule.txt. - Caveat: when a parent task is selected, the oom killer will sacrifice any first generation children with separate address spaces instead, if possible. This avoids servers and important system daemons from being killed and loses the @@ -1387,9 +1375,7 @@ minimal amount of work. ------------------------------------------------------------- This file can be used to check the current score used by the oom-killer is for -any given . Use it together with /proc//oom_adj to tune which -process should be killed in an out-of-memory situation. - +any given . 3.3 /proc//io - Display the IO accounting fields ------------------------------------------------------- diff --git a/fs/proc/base.c b/fs/proc/base.c index d295af993677..ef5c84be66f9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -873,111 +873,6 @@ static const struct file_operations proc_environ_operations = { .release = mem_release, }; -static ssize_t oom_adjust_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); - char buffer[PROC_NUMBUF]; - size_t len; - int oom_adjust = OOM_DISABLE; - unsigned long flags; - - if (!task) - return -ESRCH; - - if (lock_task_sighand(task, &flags)) { - oom_adjust = task->signal->oom_adj; - unlock_task_sighand(task, &flags); - } - - put_task_struct(task); - - len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); - - return simple_read_from_buffer(buf, count, ppos, buffer, len); -} - -static ssize_t oom_adjust_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct task_struct *task; - char buffer[PROC_NUMBUF]; - int oom_adjust; - unsigned long flags; - int err; - - memset(buffer, 0, sizeof(buffer)); - if (count > sizeof(buffer) - 1) - count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) { - err = -EFAULT; - goto out; - } - - err = kstrtoint(strstrip(buffer), 0, &oom_adjust); - if (err) - goto out; - if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && - oom_adjust != OOM_DISABLE) { - err = -EINVAL; - goto out; - } - - task = get_proc_task(file->f_path.dentry->d_inode); - if (!task) { - err = -ESRCH; - goto out; - } - - task_lock(task); - if (!task->mm) { - err = -EINVAL; - goto err_task_lock; - } - - if (!lock_task_sighand(task, &flags)) { - err = -ESRCH; - goto err_task_lock; - } - - if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { - err = -EACCES; - goto err_sighand; - } - - /* - * Warn that /proc/pid/oom_adj is deprecated, see - * Documentation/feature-removal-schedule.txt. - */ - printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", - current->comm, task_pid_nr(current), task_pid_nr(task), - task_pid_nr(task)); - task->signal->oom_adj = oom_adjust; - /* - * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum - * value is always attainable. - */ - if (task->signal->oom_adj == OOM_ADJUST_MAX) - task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX; - else - task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / - -OOM_DISABLE; - trace_oom_score_adj_update(task); -err_sighand: - unlock_task_sighand(task, &flags); -err_task_lock: - task_unlock(task); - put_task_struct(task); -out: - return err < 0 ? err : count; -} - -static const struct file_operations proc_oom_adjust_operations = { - .read = oom_adjust_read, - .write = oom_adjust_write, - .llseek = generic_file_llseek, -}; - static ssize_t oom_score_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -1051,15 +946,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) task->signal->oom_score_adj_min = oom_score_adj; trace_oom_score_adj_update(task); - /* - * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is - * always attainable. - */ - if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) - task->signal->oom_adj = OOM_DISABLE; - else - task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / - OOM_SCORE_ADJ_MAX; + err_sighand: unlock_task_sighand(task, &flags); err_task_lock: @@ -2710,7 +2597,6 @@ static const struct pid_entry tgid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), @@ -3077,7 +2963,6 @@ static const struct pid_entry tid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), diff --git a/include/linux/oom.h b/include/linux/oom.h index 49a3031fda50..d36a8221f58b 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -1,17 +1,6 @@ #ifndef __INCLUDE_LINUX_OOM_H #define __INCLUDE_LINUX_OOM_H -/* - * /proc//oom_adj is deprecated, see - * Documentation/feature-removal-schedule.txt. - * - * /proc//oom_adj set to -17 protects from the oom-killer - */ -#define OOM_DISABLE (-17) -/* inclusive */ -#define OOM_ADJUST_MIN (-16) -#define OOM_ADJUST_MAX 15 - /* * /proc//oom_score_adj set to OOM_SCORE_ADJ_MIN disables oom killing for * pid. diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c5612f0374b..c2070e92a9d6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -671,7 +671,6 @@ struct signal_struct { struct rw_semaphore group_rwsem; #endif - int oom_adj; /* OOM kill score adjustment (bit shift) */ int oom_score_adj; /* OOM kill score adjustment */ int oom_score_adj_min; /* OOM kill score adjustment minimum value. * Only settable by CAP_SYS_RESOURCE. */ diff --git a/kernel/fork.c b/kernel/fork.c index ec667f797af3..972762e01024 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1056,7 +1056,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) init_rwsem(&sig->group_rwsem); #endif - sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; sig->oom_score_adj_min = current->signal->oom_score_adj_min; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 198600861638..79e0f3e24831 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -428,8 +428,8 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, { task_lock(current); pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " - "oom_adj=%d, oom_score_adj=%d\n", - current->comm, gfp_mask, order, current->signal->oom_adj, + "oom_score_adj=%d\n", + current->comm, gfp_mask, order, current->signal->oom_score_adj); cpuset_print_task_mems_allowed(current); task_unlock(current); -- cgit v1.2.3 From 4c199a93a2d36b277a9fd209a0f2793f8460a215 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Mon, 8 Oct 2012 16:30:32 -0700 Subject: rbtree: empty nodes have no color Empty nodes have no color. We can make use of this property to simplify the code emitted by the RB_EMPTY_NODE and RB_CLEAR_NODE macros. Also, we can get rid of the rb_init_node function which had been introduced by commit 88d19cf37952 ("timers: Add rb_init_node() to allow for stack allocated rb nodes") to avoid some issue with the empty node's color not being initialized. I'm not sure what the RB_EMPTY_NODE checks in rb_prev() / rb_next() are doing there, though. axboe introduced them in commit 10fd48f2376d ("rbtree: fixed reversed RB_EMPTY_NODE and rb_next/prev"). The way I see it, the 'empty node' abstraction is only used by rbtree users to flag nodes that they haven't inserted in any rbtree, so asking the predecessor or successor of such nodes doesn't make any sense. One final rb_init_node() caller was recently added in sysctl code to implement faster sysctl name lookups. This code doesn't make use of RB_EMPTY_NODE at all, and from what I could see it only called rb_init_node() under the mistaken assumption that such initialization was required before node insertion. [sfr@canb.auug.org.au: fix net/ceph/osd_client.c build] Signed-off-by: Michel Lespinasse Cc: Andrea Arcangeli Acked-by: David Woodhouse Cc: Rik van Riel Cc: Peter Zijlstra Cc: Daniel Santos Cc: Jens Axboe Cc: "Eric W. Biederman" Cc: John Stultz Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 4 +--- include/linux/rbtree.h | 15 +++++---------- include/linux/timerqueue.h | 2 +- lib/rbtree.c | 4 ++-- net/ceph/osd_client.c | 1 - 5 files changed, 9 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index dcd56f84db7e..fddc50729632 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -168,10 +168,8 @@ static void init_header(struct ctl_table_header *head, head->node = node; if (node) { struct ctl_table *entry; - for (entry = table; entry->procname; entry++, node++) { - rb_init_node(&node->node); + for (entry = table; entry->procname; entry++, node++) node->header = head; - } } } diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index e6a807720ded..2049087c43b7 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -67,17 +67,12 @@ static inline void rb_set_color(struct rb_node *rb, int color) #define RB_ROOT (struct rb_root) { NULL, } #define rb_entry(ptr, type, member) container_of(ptr, type, member) -#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) -#define RB_EMPTY_NODE(node) (rb_parent(node) == node) -#define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) +#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) + +/* 'empty' nodes are nodes that are known not to be inserted in an rbree */ +#define RB_EMPTY_NODE(node) ((node)->rb_parent_color == (unsigned long)(node)) +#define RB_CLEAR_NODE(node) ((node)->rb_parent_color = (unsigned long)(node)) -static inline void rb_init_node(struct rb_node *rb) -{ - rb->rb_parent_color = 0; - rb->rb_right = NULL; - rb->rb_left = NULL; - RB_CLEAR_NODE(rb); -} extern void rb_insert_color(struct rb_node *, struct rb_root *); extern void rb_erase(struct rb_node *, struct rb_root *); diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index 5088727478fd..a520fd70a59f 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -39,7 +39,7 @@ struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) static inline void timerqueue_init(struct timerqueue_node *node) { - rb_init_node(&node->node); + RB_CLEAR_NODE(&node->node); } static inline void timerqueue_init_head(struct timerqueue_head *head) diff --git a/lib/rbtree.c b/lib/rbtree.c index d4175565dc2c..fe43c8c5f527 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -387,7 +387,7 @@ struct rb_node *rb_next(const struct rb_node *node) { struct rb_node *parent; - if (rb_parent(node) == node) + if (RB_EMPTY_NODE(node)) return NULL; /* If we have a right-hand child, go down and then left as far @@ -416,7 +416,7 @@ struct rb_node *rb_prev(const struct rb_node *node) { struct rb_node *parent; - if (rb_parent(node) == node) + if (RB_EMPTY_NODE(node)) return NULL; /* If we have a left-hand child, go down and then right as far diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ccbdfbba9e53..c1d756cc7448 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -221,7 +221,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, kref_init(&req->r_kref); init_completion(&req->r_completion); init_completion(&req->r_safe_completion); - rb_init_node(&req->r_node); INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); -- cgit v1.2.3 From ea5272f5c94fb2ee62f4f15a5b88eef6184cd506 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Mon, 8 Oct 2012 16:30:35 -0700 Subject: rbtree: fix incorrect rbtree node insertion in fs/proc/proc_sysctl.c The recently added code to use rbtrees in sysctl did not follow the proper rbtree interface on insertion - it was calling rb_link_node() which inserts a new node into the binary tree, but missed the call to rb_insert_color() which properly balances the rbtree and establishes all expected rbtree invariants. I found out about this only because faulty commit also used rb_init_node(), which I am removing within this patchset. But I think it's an easy mistake to make, and it makes me wonder if we should change the rbtree API so that insertions would be done with a single rb_insert() call (even if its implementation could still inline the rb_link_node() part and call a private __rb_insert_color function to do the rebalancing). Signed-off-by: Michel Lespinasse Cc: Andrea Arcangeli Acked-by: David Woodhouse Cc: Rik van Riel Cc: Peter Zijlstra Cc: Daniel Santos Cc: Jens Axboe Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index fddc50729632..a781bdf06694 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -142,6 +142,7 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) } rb_link_node(node, parent, p); + rb_insert_color(node, &head->parent->root); return 0; } -- cgit v1.2.3 From bf7ad8eeab995710c766df49c9c69a8592ca0216 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Mon, 8 Oct 2012 16:30:37 -0700 Subject: rbtree: move some implementation details from rbtree.h to rbtree.c rbtree users must use the documented APIs to manipulate the tree structure. Low-level helpers to manipulate node colors and parenthood are not part of that API, so move them to lib/rbtree.c [dwmw2@infradead.org: fix jffs2 build issue due to renamed __rb_parent_color field] Signed-off-by: Michel Lespinasse Cc: Andrea Arcangeli Acked-by: David Woodhouse Cc: Rik van Riel Cc: Peter Zijlstra Cc: Daniel Santos Cc: Jens Axboe Cc: "Eric W. Biederman" Signed-off-by: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jffs2/readinode.c | 13 ++++++++----- include/linux/rbtree.h | 34 +++++++++------------------------- lib/rbtree.c | 20 +++++++++++++++++++- 3 files changed, 36 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 1ea349fff68b..ae81b01e6fd7 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -394,8 +394,11 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, } /* Trivial function to remove the last node in the tree. Which by definition - has no right-hand -- so can be removed just by making its only child (if - any) take its place under its parent. */ + has no right-hand child — so can be removed just by making its left-hand + child (if any) take its place under its parent. Since this is only done + when we're consuming the whole tree, there's no need to use rb_erase() + and let it worry about adjusting colours and balancing the tree. That + would just be a waste of time. */ static void eat_last(struct rb_root *root, struct rb_node *node) { struct rb_node *parent = rb_parent(node); @@ -412,12 +415,12 @@ static void eat_last(struct rb_root *root, struct rb_node *node) link = &parent->rb_right; *link = node->rb_left; - /* Colour doesn't matter now. Only the parent pointer. */ if (node->rb_left) - node->rb_left->rb_parent_color = node->rb_parent_color; + node->rb_left->__rb_parent_color = node->__rb_parent_color; } -/* We put this in reverse order, so we can just use eat_last */ +/* We put the version tree in reverse order, so we can use the same eat_last() + function that we use to consume the tmpnode tree (tn_root). */ static void ver_insert(struct rb_root *ver_root, struct jffs2_tmp_dnode_info *tn) { struct rb_node **link = &ver_root->rb_node; diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 2049087c43b7..bf836a2c6a32 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -32,37 +32,19 @@ #include #include -struct rb_node -{ - unsigned long rb_parent_color; -#define RB_RED 0 -#define RB_BLACK 1 +struct rb_node { + unsigned long __rb_parent_color; struct rb_node *rb_right; struct rb_node *rb_left; } __attribute__((aligned(sizeof(long)))); /* The alignment might seem pointless, but allegedly CRIS needs it */ -struct rb_root -{ +struct rb_root { struct rb_node *rb_node; }; -#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) -#define rb_color(r) ((r)->rb_parent_color & 1) -#define rb_is_red(r) (!rb_color(r)) -#define rb_is_black(r) rb_color(r) -#define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0) -#define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0) - -static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) -{ - rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p; -} -static inline void rb_set_color(struct rb_node *rb, int color) -{ - rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; -} +#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) #define RB_ROOT (struct rb_root) { NULL, } #define rb_entry(ptr, type, member) container_of(ptr, type, member) @@ -70,8 +52,10 @@ static inline void rb_set_color(struct rb_node *rb, int color) #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) /* 'empty' nodes are nodes that are known not to be inserted in an rbree */ -#define RB_EMPTY_NODE(node) ((node)->rb_parent_color == (unsigned long)(node)) -#define RB_CLEAR_NODE(node) ((node)->rb_parent_color = (unsigned long)(node)) +#define RB_EMPTY_NODE(node) \ + ((node)->__rb_parent_color == (unsigned long)(node)) +#define RB_CLEAR_NODE(node) \ + ((node)->__rb_parent_color = (unsigned long)(node)) extern void rb_insert_color(struct rb_node *, struct rb_root *); @@ -98,7 +82,7 @@ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, struct rb_node ** rb_link) { - node->rb_parent_color = (unsigned long )parent; + node->__rb_parent_color = (unsigned long)parent; node->rb_left = node->rb_right = NULL; *rb_link = node; diff --git a/lib/rbtree.c b/lib/rbtree.c index fe43c8c5f527..ccada9abe6f5 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -23,6 +23,24 @@ #include #include +#define RB_RED 0 +#define RB_BLACK 1 + +#define rb_color(r) ((r)->__rb_parent_color & 1) +#define rb_is_red(r) (!rb_color(r)) +#define rb_is_black(r) rb_color(r) +#define rb_set_red(r) do { (r)->__rb_parent_color &= ~1; } while (0) +#define rb_set_black(r) do { (r)->__rb_parent_color |= 1; } while (0) + +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) +{ + rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; +} +static inline void rb_set_color(struct rb_node *rb, int color) +{ + rb->__rb_parent_color = (rb->__rb_parent_color & ~1) | color; +} + static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) { struct rb_node *right = node->rb_right; @@ -255,7 +273,7 @@ void rb_erase(struct rb_node *node, struct rb_root *root) rb_set_parent(old->rb_right, node); } - node->rb_parent_color = old->rb_parent_color; + node->__rb_parent_color = old->__rb_parent_color; node->rb_left = old->rb_left; rb_set_parent(old->rb_left, node); -- cgit v1.2.3 From 6b2dbba8b6ac4df26f72eda1e5ea7bab9f950e08 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Mon, 8 Oct 2012 16:31:25 -0700 Subject: mm: replace vma prio_tree with an interval tree Implement an interval tree as a replacement for the VMA prio_tree. The algorithms are similar to lib/interval_tree.c; however that code can't be directly reused as the interval endpoints are not explicitly stored in the VMA. So instead, the common algorithm is moved into a template and the details (node type, how to get interval endpoints from the node, etc) are filled in using the C preprocessor. Once the interval tree functions are available, using them as a replacement to the VMA prio tree is a relatively simple, mechanical job. Signed-off-by: Michel Lespinasse Cc: Rik van Riel Cc: Hillf Danton Cc: Peter Zijlstra Cc: Catalin Marinas Cc: Andrea Arcangeli Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/fault-armv.c | 3 +- arch/arm/mm/flush.c | 3 +- arch/parisc/kernel/cache.c | 3 +- arch/x86/mm/hugetlbpage.c | 3 +- fs/hugetlbfs/inode.c | 9 +- fs/inode.c | 2 +- include/linux/fs.h | 6 +- include/linux/interval_tree_tmpl.h | 215 +++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 30 +++--- include/linux/mm_types.h | 14 +-- kernel/events/uprobes.c | 3 +- kernel/fork.c | 2 +- lib/interval_tree.c | 166 ++-------------------------- lib/prio_tree.c | 19 +--- mm/Makefile | 4 +- mm/filemap_xip.c | 3 +- mm/fremap.c | 2 +- mm/hugetlb.c | 3 +- mm/interval_tree.c | 61 +++++++++++ mm/memory-failure.c | 3 +- mm/memory.c | 9 +- mm/mmap.c | 22 ++-- mm/nommu.c | 12 +-- mm/prio_tree.c | 208 ----------------------------------- mm/rmap.c | 18 ++-- 25 files changed, 357 insertions(+), 466 deletions(-) create mode 100644 include/linux/interval_tree_tmpl.h create mode 100644 mm/interval_tree.c delete mode 100644 mm/prio_tree.c (limited to 'fs') diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 7599e2625c7d..2a5907b5c8d2 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -134,7 +134,6 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *mpnt; - struct prio_tree_iter iter; unsigned long offset; pgoff_t pgoff; int aliases = 0; @@ -147,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, * cache coherency. */ flush_dcache_mmap_lock(mapping); - vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { /* * If this VMA is not in our MM, we can ignore it. * Note that we intentionally mask out the VMA diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 40ca11ed6e5f..1c8f7f564175 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -196,7 +196,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p { struct mm_struct *mm = current->active_mm; struct vm_area_struct *mpnt; - struct prio_tree_iter iter; pgoff_t pgoff; /* @@ -208,7 +207,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); flush_dcache_mmap_lock(mapping); - vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { unsigned long offset; /* diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 9d181890a7e3..48e16dc20102 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -276,7 +276,6 @@ void flush_dcache_page(struct page *page) { struct address_space *mapping = page_mapping(page); struct vm_area_struct *mpnt; - struct prio_tree_iter iter; unsigned long offset; unsigned long addr, old_addr = 0; pgoff_t pgoff; @@ -299,7 +298,7 @@ void flush_dcache_page(struct page *page) * to flush one address here for them all to become coherent */ flush_dcache_mmap_lock(mapping); - vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; addr = mpnt->vm_start + offset; diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index b91e48512425..937bff5cdaa7 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) struct address_space *mapping = vma->vm_file->f_mapping; pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - struct prio_tree_iter iter; struct vm_area_struct *svma; unsigned long saddr; pte_t *spte = NULL; @@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) return (pte_t *)pmd_alloc(mm, pud, addr); mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { + vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { if (svma == vma) continue; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 0a0ab8e21b19..c5bc355d8243 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -397,17 +397,16 @@ static void hugetlbfs_evict_inode(struct inode *inode) } static inline void -hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) +hugetlb_vmtruncate_list(struct rb_root *root, pgoff_t pgoff) { struct vm_area_struct *vma; - struct prio_tree_iter iter; - vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { + vma_interval_tree_foreach(vma, root, pgoff, ULONG_MAX) { unsigned long v_offset; /* * Can the expression below overflow on 32-bit arches? - * No, because the prio_tree returns us only those vmas + * No, because the interval tree returns us only those vmas * which overlap the truncated area starting at pgoff, * and no vma on a 32-bit arch can span beyond the 4GB. */ @@ -432,7 +431,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) i_size_write(inode, offset); mutex_lock(&mapping->i_mmap_mutex); - if (!prio_tree_empty(&mapping->i_mmap)) + if (!RB_EMPTY_ROOT(&mapping->i_mmap)) hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); mutex_unlock(&mapping->i_mmap_mutex); truncate_hugepages(inode, offset); diff --git a/fs/inode.c b/fs/inode.c index ac8d904b3f16..b03c71957246 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -348,7 +348,7 @@ void address_space_init_once(struct address_space *mapping) mutex_init(&mapping->i_mmap_mutex); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); - INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); + mapping->i_mmap = RB_ROOT; INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); } EXPORT_SYMBOL(address_space_init_once); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5a8a273d5b2f..c617ed024df8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -401,7 +401,7 @@ struct inodes_stat_t { #include #include #include -#include +#include #include #include #include @@ -669,7 +669,7 @@ struct address_space { struct radix_tree_root page_tree; /* radix tree of all pages */ spinlock_t tree_lock; /* and lock protecting it */ unsigned int i_mmap_writable;/* count VM_SHARED mappings */ - struct prio_tree_root i_mmap; /* tree of private and shared mappings */ + struct rb_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ struct mutex i_mmap_mutex; /* protect tree, count, list */ /* Protected by tree_lock together with the radix tree */ @@ -741,7 +741,7 @@ int mapping_tagged(struct address_space *mapping, int tag); */ static inline int mapping_mapped(struct address_space *mapping) { - return !prio_tree_empty(&mapping->i_mmap) || + return !RB_EMPTY_ROOT(&mapping->i_mmap) || !list_empty(&mapping->i_mmap_nonlinear); } diff --git a/include/linux/interval_tree_tmpl.h b/include/linux/interval_tree_tmpl.h new file mode 100644 index 000000000000..c65deda31413 --- /dev/null +++ b/include/linux/interval_tree_tmpl.h @@ -0,0 +1,215 @@ +/* + Interval Trees + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + include/linux/interval_tree_tmpl.h +*/ + +/* + * Template for implementing interval trees + * + * ITSTRUCT: struct type of the interval tree nodes + * ITRB: name of struct rb_node field within ITSTRUCT + * ITTYPE: type of the interval endpoints + * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree + * ITSTART(n): start endpoint of ITSTRUCT node n + * ITLAST(n): last endpoing of ITSTRUCT node n + * ITSTATIC: 'static' or empty + * ITPREFIX: prefix to use for the inline tree definitions + */ + +/* IT(name) -> ITPREFIX_name */ +#define _ITNAME(prefix, name) prefix ## _ ## name +#define ITNAME(prefix, name) _ITNAME(prefix, name) +#define IT(name) ITNAME(ITPREFIX, name) + +/* Callbacks for augmented rbtree insert and remove */ + +static inline ITTYPE IT(compute_subtree_last)(ITSTRUCT *node) +{ + ITTYPE max = ITLAST(node), subtree_last; + if (node->ITRB.rb_left) { + subtree_last = rb_entry(node->ITRB.rb_left, + ITSTRUCT, ITRB)->ITSUBTREE; + if (max < subtree_last) + max = subtree_last; + } + if (node->ITRB.rb_right) { + subtree_last = rb_entry(node->ITRB.rb_right, + ITSTRUCT, ITRB)->ITSUBTREE; + if (max < subtree_last) + max = subtree_last; + } + return max; +} + +static void IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop) +{ + while (rb != stop) { + ITSTRUCT *node = rb_entry(rb, ITSTRUCT, ITRB); + ITTYPE subtree_last = IT(compute_subtree_last)(node); + if (node->ITSUBTREE == subtree_last) + break; + node->ITSUBTREE = subtree_last; + rb = rb_parent(&node->ITRB); + } +} + +static void IT(augment_copy)(struct rb_node *rb_old, struct rb_node *rb_new) +{ + ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB); + ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB); + + new->ITSUBTREE = old->ITSUBTREE; +} + +static void IT(augment_rotate)(struct rb_node *rb_old, struct rb_node *rb_new) +{ + ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB); + ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB); + + new->ITSUBTREE = old->ITSUBTREE; + old->ITSUBTREE = IT(compute_subtree_last)(old); +} + +static const struct rb_augment_callbacks IT(augment_callbacks) = { + IT(augment_propagate), IT(augment_copy), IT(augment_rotate) +}; + +/* Insert / remove interval nodes from the tree */ + +ITSTATIC void IT(insert)(ITSTRUCT *node, struct rb_root *root) +{ + struct rb_node **link = &root->rb_node, *rb_parent = NULL; + ITTYPE start = ITSTART(node), last = ITLAST(node); + ITSTRUCT *parent; + + while (*link) { + rb_parent = *link; + parent = rb_entry(rb_parent, ITSTRUCT, ITRB); + if (parent->ITSUBTREE < last) + parent->ITSUBTREE = last; + if (start < ITSTART(parent)) + link = &parent->ITRB.rb_left; + else + link = &parent->ITRB.rb_right; + } + + node->ITSUBTREE = last; + rb_link_node(&node->ITRB, rb_parent, link); + rb_insert_augmented(&node->ITRB, root, &IT(augment_callbacks)); +} + +ITSTATIC void IT(remove)(ITSTRUCT *node, struct rb_root *root) +{ + rb_erase_augmented(&node->ITRB, root, &IT(augment_callbacks)); +} + +/* + * Iterate over intervals intersecting [start;last] + * + * Note that a node's interval intersects [start;last] iff: + * Cond1: ITSTART(node) <= last + * and + * Cond2: start <= ITLAST(node) + */ + +static ITSTRUCT *IT(subtree_search)(ITSTRUCT *node, ITTYPE start, ITTYPE last) +{ + while (true) { + /* + * Loop invariant: start <= node->ITSUBTREE + * (Cond2 is satisfied by one of the subtree nodes) + */ + if (node->ITRB.rb_left) { + ITSTRUCT *left = rb_entry(node->ITRB.rb_left, + ITSTRUCT, ITRB); + if (start <= left->ITSUBTREE) { + /* + * Some nodes in left subtree satisfy Cond2. + * Iterate to find the leftmost such node N. + * If it also satisfies Cond1, that's the match + * we are looking for. Otherwise, there is no + * matching interval as nodes to the right of N + * can't satisfy Cond1 either. + */ + node = left; + continue; + } + } + if (ITSTART(node) <= last) { /* Cond1 */ + if (start <= ITLAST(node)) /* Cond2 */ + return node; /* node is leftmost match */ + if (node->ITRB.rb_right) { + node = rb_entry(node->ITRB.rb_right, + ITSTRUCT, ITRB); + if (start <= node->ITSUBTREE) + continue; + } + } + return NULL; /* No match */ + } +} + +ITSTATIC ITSTRUCT *IT(iter_first)(struct rb_root *root, + ITTYPE start, ITTYPE last) +{ + ITSTRUCT *node; + + if (!root->rb_node) + return NULL; + node = rb_entry(root->rb_node, ITSTRUCT, ITRB); + if (node->ITSUBTREE < start) + return NULL; + return IT(subtree_search)(node, start, last); +} + +ITSTATIC ITSTRUCT *IT(iter_next)(ITSTRUCT *node, ITTYPE start, ITTYPE last) +{ + struct rb_node *rb = node->ITRB.rb_right, *prev; + + while (true) { + /* + * Loop invariants: + * Cond1: ITSTART(node) <= last + * rb == node->ITRB.rb_right + * + * First, search right subtree if suitable + */ + if (rb) { + ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); + if (start <= right->ITSUBTREE) + return IT(subtree_search)(right, start, last); + } + + /* Move up the tree until we come from a node's left child */ + do { + rb = rb_parent(&node->ITRB); + if (!rb) + return NULL; + prev = &node->ITRB; + node = rb_entry(rb, ITSTRUCT, ITRB); + rb = node->ITRB.rb_right; + } while (prev == rb); + + /* Check if the node intersects [start;last] */ + if (last < ITSTART(node)) /* !Cond1 */ + return NULL; + else if (start <= ITLAST(node)) /* Cond2 */ + return node; + } +} diff --git a/include/linux/mm.h b/include/linux/mm.h index 5ddb11b2b4bb..0f671ef09eba 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -1355,22 +1354,27 @@ extern void zone_pcp_reset(struct zone *zone); extern atomic_long_t mmap_pages_allocated; extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); -/* prio_tree.c */ -void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); -void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); -void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *); -struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma, - struct prio_tree_iter *iter); - -#define vma_prio_tree_foreach(vma, iter, root, begin, end) \ - for (prio_tree_iter_init(iter, root, begin, end), vma = NULL; \ - (vma = vma_prio_tree_next(vma, iter)); ) +/* interval_tree.c */ +void vma_interval_tree_add(struct vm_area_struct *vma, + struct vm_area_struct *old, + struct address_space *mapping); +void vma_interval_tree_insert(struct vm_area_struct *node, + struct rb_root *root); +void vma_interval_tree_remove(struct vm_area_struct *node, + struct rb_root *root); +struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root, + unsigned long start, unsigned long last); +struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, + unsigned long start, unsigned long last); + +#define vma_interval_tree_foreach(vma, root, start, last) \ + for (vma = vma_interval_tree_iter_first(root, start, last); \ + vma; vma = vma_interval_tree_iter_next(vma, start, last)) static inline void vma_nonlinear_insert(struct vm_area_struct *vma, struct list_head *list) { - vma->shared.vm_set.parent = NULL; - list_add_tail(&vma->shared.vm_set.list, list); + list_add_tail(&vma->shared.nonlinear, list); } /* mmap.c */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index a57a43f5ca7c..31f8a3af7d94 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -240,18 +239,15 @@ struct vm_area_struct { /* * For areas with an address space and backing store, - * linkage into the address_space->i_mmap prio tree, or - * linkage to the list of like vmas hanging off its node, or + * linkage into the address_space->i_mmap interval tree, or * linkage of vma in the address_space->i_mmap_nonlinear list. */ union { struct { - struct list_head list; - void *parent; /* aligns with prio_tree_node parent */ - struct vm_area_struct *head; - } vm_set; - - struct raw_prio_tree_node prio_tree_node; + struct rb_node rb; + unsigned long rb_subtree_last; + } linear; + struct list_head nonlinear; } shared; /* diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 912ef48d28ab..1d9c0a985960 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -735,7 +735,6 @@ static struct map_info * build_map_info(struct address_space *mapping, loff_t offset, bool is_register) { unsigned long pgoff = offset >> PAGE_SHIFT; - struct prio_tree_iter iter; struct vm_area_struct *vma; struct map_info *curr = NULL; struct map_info *prev = NULL; @@ -744,7 +743,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) again: mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { if (!valid_vma(vma, is_register)) continue; diff --git a/kernel/fork.c b/kernel/fork.c index 972762e01024..90dace52715e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -423,7 +423,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mapping->i_mmap_writable++; flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ - vma_prio_tree_add(tmp, mpnt); + vma_interval_tree_add(tmp, mpnt, mapping); flush_dcache_mmap_unlock(mapping); mutex_unlock(&mapping->i_mmap_mutex); } diff --git a/lib/interval_tree.c b/lib/interval_tree.c index 6fd540b1e499..77a793e0644b 100644 --- a/lib/interval_tree.c +++ b/lib/interval_tree.c @@ -1,159 +1,13 @@ #include #include -/* Callbacks for augmented rbtree insert and remove */ - -static inline unsigned long -compute_subtree_last(struct interval_tree_node *node) -{ - unsigned long max = node->last, subtree_last; - if (node->rb.rb_left) { - subtree_last = rb_entry(node->rb.rb_left, - struct interval_tree_node, rb)->__subtree_last; - if (max < subtree_last) - max = subtree_last; - } - if (node->rb.rb_right) { - subtree_last = rb_entry(node->rb.rb_right, - struct interval_tree_node, rb)->__subtree_last; - if (max < subtree_last) - max = subtree_last; - } - return max; -} - -RB_DECLARE_CALLBACKS(static, augment_callbacks, struct interval_tree_node, rb, - unsigned long, __subtree_last, compute_subtree_last) - -/* Insert / remove interval nodes from the tree */ - -void interval_tree_insert(struct interval_tree_node *node, - struct rb_root *root) -{ - struct rb_node **link = &root->rb_node, *rb_parent = NULL; - unsigned long start = node->start, last = node->last; - struct interval_tree_node *parent; - - while (*link) { - rb_parent = *link; - parent = rb_entry(rb_parent, struct interval_tree_node, rb); - if (parent->__subtree_last < last) - parent->__subtree_last = last; - if (start < parent->start) - link = &parent->rb.rb_left; - else - link = &parent->rb.rb_right; - } - - node->__subtree_last = last; - rb_link_node(&node->rb, rb_parent, link); - rb_insert_augmented(&node->rb, root, &augment_callbacks); -} - -void interval_tree_remove(struct interval_tree_node *node, - struct rb_root *root) -{ - rb_erase_augmented(&node->rb, root, &augment_callbacks); -} - -/* - * Iterate over intervals intersecting [start;last] - * - * Note that a node's interval intersects [start;last] iff: - * Cond1: node->start <= last - * and - * Cond2: start <= node->last - */ - -static struct interval_tree_node * -subtree_search(struct interval_tree_node *node, - unsigned long start, unsigned long last) -{ - while (true) { - /* - * Loop invariant: start <= node->__subtree_last - * (Cond2 is satisfied by one of the subtree nodes) - */ - if (node->rb.rb_left) { - struct interval_tree_node *left = - rb_entry(node->rb.rb_left, - struct interval_tree_node, rb); - if (start <= left->__subtree_last) { - /* - * Some nodes in left subtree satisfy Cond2. - * Iterate to find the leftmost such node N. - * If it also satisfies Cond1, that's the match - * we are looking for. Otherwise, there is no - * matching interval as nodes to the right of N - * can't satisfy Cond1 either. - */ - node = left; - continue; - } - } - if (node->start <= last) { /* Cond1 */ - if (start <= node->last) /* Cond2 */ - return node; /* node is leftmost match */ - if (node->rb.rb_right) { - node = rb_entry(node->rb.rb_right, - struct interval_tree_node, rb); - if (start <= node->__subtree_last) - continue; - } - } - return NULL; /* No match */ - } -} - -struct interval_tree_node * -interval_tree_iter_first(struct rb_root *root, - unsigned long start, unsigned long last) -{ - struct interval_tree_node *node; - - if (!root->rb_node) - return NULL; - node = rb_entry(root->rb_node, struct interval_tree_node, rb); - if (node->__subtree_last < start) - return NULL; - return subtree_search(node, start, last); -} - -struct interval_tree_node * -interval_tree_iter_next(struct interval_tree_node *node, - unsigned long start, unsigned long last) -{ - struct rb_node *rb = node->rb.rb_right, *prev; - - while (true) { - /* - * Loop invariants: - * Cond1: node->start <= last - * rb == node->rb.rb_right - * - * First, search right subtree if suitable - */ - if (rb) { - struct interval_tree_node *right = - rb_entry(rb, struct interval_tree_node, rb); - if (start <= right->__subtree_last) - return subtree_search(right, start, last); - } - - /* Move up the tree until we come from a node's left child */ - do { - rb = rb_parent(&node->rb); - if (!rb) - return NULL; - prev = &node->rb; - node = rb_entry(rb, struct interval_tree_node, rb); - rb = node->rb.rb_right; - } while (prev == rb); - - /* Check if the node intersects [start;last] */ - if (last < node->start) /* !Cond1 */ - return NULL; - else if (start <= node->last) /* Cond2 */ - return node; - } -} +#define ITSTRUCT struct interval_tree_node +#define ITRB rb +#define ITTYPE unsigned long +#define ITSUBTREE __subtree_last +#define ITSTART(n) ((n)->start) +#define ITLAST(n) ((n)->last) +#define ITSTATIC +#define ITPREFIX interval_tree + +#include diff --git a/lib/prio_tree.c b/lib/prio_tree.c index 4e0d2edff2b4..bba37148c15e 100644 --- a/lib/prio_tree.c +++ b/lib/prio_tree.c @@ -44,27 +44,12 @@ * The following macros are used for implementing prio_tree for i_mmap */ -#define RADIX_INDEX(vma) ((vma)->vm_pgoff) -#define VMA_SIZE(vma) (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT) -/* avoid overflow */ -#define HEAP_INDEX(vma) ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1)) - - static void get_index(const struct prio_tree_root *root, const struct prio_tree_node *node, unsigned long *radix, unsigned long *heap) { - if (root->raw) { - struct vm_area_struct *vma = prio_tree_entry( - node, struct vm_area_struct, shared.prio_tree_node); - - *radix = RADIX_INDEX(vma); - *heap = HEAP_INDEX(vma); - } - else { - *radix = node->start; - *heap = node->last; - } + *radix = node->start; + *heap = node->last; } static unsigned long index_bits_to_maxindex[BITS_PER_LONG]; diff --git a/mm/Makefile b/mm/Makefile index 92753e2d82da..6b025f80af34 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -14,9 +14,9 @@ endif obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page_alloc.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ - prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ + util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ - compaction.o $(mmu-y) + compaction.o interval_tree.o $(mmu-y) obj-y += init-mm.o diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 91750227a191..a52daee11d3f 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -167,7 +167,6 @@ __xip_unmap (struct address_space * mapping, { struct vm_area_struct *vma; struct mm_struct *mm; - struct prio_tree_iter iter; unsigned long address; pte_t *pte; pte_t pteval; @@ -184,7 +183,7 @@ __xip_unmap (struct address_space * mapping, retry: mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { mm = vma->vm_mm; address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); diff --git a/mm/fremap.c b/mm/fremap.c index 3d731a498788..3899a86851ce 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -214,7 +214,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, mutex_lock(&mapping->i_mmap_mutex); flush_dcache_mmap_lock(mapping); vma->vm_flags |= VM_NONLINEAR; - vma_prio_tree_remove(vma, &mapping->i_mmap); + vma_interval_tree_remove(vma, &mapping->i_mmap); vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); flush_dcache_mmap_unlock(mapping); mutex_unlock(&mapping->i_mmap_mutex); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f1bb534254f6..c9b40e3a9936 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2474,7 +2474,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, struct hstate *h = hstate_vma(vma); struct vm_area_struct *iter_vma; struct address_space *mapping; - struct prio_tree_iter iter; pgoff_t pgoff; /* @@ -2491,7 +2490,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * __unmap_hugepage_range() is called as the lock is already held */ mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(iter_vma, &mapping->i_mmap, pgoff, pgoff) { /* Do not unmap the current VMA */ if (iter_vma == vma) continue; diff --git a/mm/interval_tree.c b/mm/interval_tree.c new file mode 100644 index 000000000000..7dc565660e56 --- /dev/null +++ b/mm/interval_tree.c @@ -0,0 +1,61 @@ +/* + * mm/interval_tree.c - interval tree for mapping->i_mmap + * + * Copyright (C) 2012, Michel Lespinasse + * + * This file is released under the GPL v2. + */ + +#include +#include + +#define ITSTRUCT struct vm_area_struct +#define ITRB shared.linear.rb +#define ITTYPE unsigned long +#define ITSUBTREE shared.linear.rb_subtree_last +#define ITSTART(n) ((n)->vm_pgoff) +#define ITLAST(n) ((n)->vm_pgoff + \ + (((n)->vm_end - (n)->vm_start) >> PAGE_SHIFT) - 1) +#define ITSTATIC +#define ITPREFIX vma_interval_tree + +#include + +/* Insert old immediately after vma in the interval tree */ +void vma_interval_tree_add(struct vm_area_struct *vma, + struct vm_area_struct *old, + struct address_space *mapping) +{ + struct rb_node **link; + struct vm_area_struct *parent; + unsigned long last; + + if (unlikely(vma->vm_flags & VM_NONLINEAR)) { + list_add(&vma->shared.nonlinear, &old->shared.nonlinear); + return; + } + + last = ITLAST(vma); + + if (!old->shared.linear.rb.rb_right) { + parent = old; + link = &old->shared.linear.rb.rb_right; + } else { + parent = rb_entry(old->shared.linear.rb.rb_right, + struct vm_area_struct, shared.linear.rb); + if (parent->shared.linear.rb_subtree_last < last) + parent->shared.linear.rb_subtree_last = last; + while (parent->shared.linear.rb.rb_left) { + parent = rb_entry(parent->shared.linear.rb.rb_left, + struct vm_area_struct, shared.linear.rb); + if (parent->shared.linear.rb_subtree_last < last) + parent->shared.linear.rb_subtree_last = last; + } + link = &parent->shared.linear.rb.rb_left; + } + + vma->shared.linear.rb_subtree_last = last; + rb_link_node(&vma->shared.linear.rb, &parent->shared.linear.rb, link); + rb_insert_augmented(&vma->shared.linear.rb, &mapping->i_mmap, + &vma_interval_tree_augment_callbacks); +} diff --git a/mm/memory-failure.c b/mm/memory-failure.c index a6e2141a6610..c38a6257d082 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -431,7 +431,6 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, { struct vm_area_struct *vma; struct task_struct *tsk; - struct prio_tree_iter iter; struct address_space *mapping = page->mapping; mutex_lock(&mapping->i_mmap_mutex); @@ -442,7 +441,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, if (!task_early_kill(tsk)) continue; - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { /* * Send early kill signal to tasks where a vma covers diff --git a/mm/memory.c b/mm/memory.c index e09c04813186..d205e4381a34 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2801,14 +2801,13 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma, zap_page_range_single(vma, start_addr, end_addr - start_addr, details); } -static inline void unmap_mapping_range_tree(struct prio_tree_root *root, +static inline void unmap_mapping_range_tree(struct rb_root *root, struct zap_details *details) { struct vm_area_struct *vma; - struct prio_tree_iter iter; pgoff_t vba, vea, zba, zea; - vma_prio_tree_foreach(vma, &iter, root, + vma_interval_tree_foreach(vma, root, details->first_index, details->last_index) { vba = vma->vm_pgoff; @@ -2839,7 +2838,7 @@ static inline void unmap_mapping_range_list(struct list_head *head, * across *all* the pages in each nonlinear VMA, not just the pages * whose virtual address lies outside the file truncation point. */ - list_for_each_entry(vma, head, shared.vm_set.list) { + list_for_each_entry(vma, head, shared.nonlinear) { details->nonlinear_vma = vma; unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details); } @@ -2883,7 +2882,7 @@ void unmap_mapping_range(struct address_space *mapping, mutex_lock(&mapping->i_mmap_mutex); - if (unlikely(!prio_tree_empty(&mapping->i_mmap))) + if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) unmap_mapping_range_tree(&mapping->i_mmap, &details); if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); diff --git a/mm/mmap.c b/mm/mmap.c index e3c365ff1b6a..5ac533f88e99 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -199,14 +199,14 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma, flush_dcache_mmap_lock(mapping); if (unlikely(vma->vm_flags & VM_NONLINEAR)) - list_del_init(&vma->shared.vm_set.list); + list_del_init(&vma->shared.nonlinear); else - vma_prio_tree_remove(vma, &mapping->i_mmap); + vma_interval_tree_remove(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); } /* - * Unlink a file-based vm structure from its prio_tree, to hide + * Unlink a file-based vm structure from its interval tree, to hide * vma from rmap and vmtruncate before freeing its page tables. */ void unlink_file_vma(struct vm_area_struct *vma) @@ -411,7 +411,7 @@ static void __vma_link_file(struct vm_area_struct *vma) if (unlikely(vma->vm_flags & VM_NONLINEAR)) vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); else - vma_prio_tree_insert(vma, &mapping->i_mmap); + vma_interval_tree_insert(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); } } @@ -449,7 +449,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, /* * Helper for vma_adjust() in the split_vma insert case: insert a vma into the - * mm's list and rbtree. It has already been inserted into the prio_tree. + * mm's list and rbtree. It has already been inserted into the interval tree. */ static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) { @@ -491,7 +491,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, struct vm_area_struct *next = vma->vm_next; struct vm_area_struct *importer = NULL; struct address_space *mapping = NULL; - struct prio_tree_root *root = NULL; + struct rb_root *root = NULL; struct anon_vma *anon_vma = NULL; struct file *file = vma->vm_file; long adjust_next = 0; @@ -554,7 +554,7 @@ again: remove_next = 1 + (end > next->vm_end); mutex_lock(&mapping->i_mmap_mutex); if (insert) { /* - * Put into prio_tree now, so instantiated pages + * Put into interval tree now, so instantiated pages * are visible to arm/parisc __flush_dcache_page * throughout; but we cannot insert into address * space until vma start or end is updated. @@ -582,9 +582,9 @@ again: remove_next = 1 + (end > next->vm_end); if (root) { flush_dcache_mmap_lock(mapping); - vma_prio_tree_remove(vma, root); + vma_interval_tree_remove(vma, root); if (adjust_next) - vma_prio_tree_remove(next, root); + vma_interval_tree_remove(next, root); } vma->vm_start = start; @@ -597,8 +597,8 @@ again: remove_next = 1 + (end > next->vm_end); if (root) { if (adjust_next) - vma_prio_tree_insert(next, root); - vma_prio_tree_insert(vma, root); + vma_interval_tree_insert(next, root); + vma_interval_tree_insert(vma, root); flush_dcache_mmap_unlock(mapping); } diff --git a/mm/nommu.c b/mm/nommu.c index 12e84e69dd06..45131b41bcdb 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -698,7 +698,7 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) mutex_lock(&mapping->i_mmap_mutex); flush_dcache_mmap_lock(mapping); - vma_prio_tree_insert(vma, &mapping->i_mmap); + vma_interval_tree_insert(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); mutex_unlock(&mapping->i_mmap_mutex); } @@ -764,7 +764,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma) mutex_lock(&mapping->i_mmap_mutex); flush_dcache_mmap_lock(mapping); - vma_prio_tree_remove(vma, &mapping->i_mmap); + vma_interval_tree_remove(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); mutex_unlock(&mapping->i_mmap_mutex); } @@ -2044,7 +2044,6 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, size_t newsize) { struct vm_area_struct *vma; - struct prio_tree_iter iter; struct vm_region *region; pgoff_t low, high; size_t r_size, r_top; @@ -2056,8 +2055,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, mutex_lock(&inode->i_mapping->i_mmap_mutex); /* search for VMAs that fall within the dead zone */ - vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, - low, high) { + vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) { /* found one - only interested if it's shared out of the page * cache */ if (vma->vm_flags & VM_SHARED) { @@ -2073,8 +2071,8 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, * we don't check for any regions that start beyond the EOF as there * shouldn't be any */ - vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, - 0, ULONG_MAX) { + vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, + 0, ULONG_MAX) { if (!(vma->vm_flags & VM_SHARED)) continue; diff --git a/mm/prio_tree.c b/mm/prio_tree.c deleted file mode 100644 index 799dcfd7cd8c..000000000000 --- a/mm/prio_tree.c +++ /dev/null @@ -1,208 +0,0 @@ -/* - * mm/prio_tree.c - priority search tree for mapping->i_mmap - * - * Copyright (C) 2004, Rajesh Venkatasubramanian - * - * This file is released under the GPL v2. - * - * Based on the radix priority search tree proposed by Edward M. McCreight - * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985 - * - * 02Feb2004 Initial version - */ - -#include -#include -#include - -/* - * See lib/prio_tree.c for details on the general radix priority search tree - * code. - */ - -/* - * The following #defines are mirrored from lib/prio_tree.c. They're only used - * for debugging, and should be removed (along with the debugging code using - * them) when switching also VMAs to the regular prio_tree code. - */ - -#define RADIX_INDEX(vma) ((vma)->vm_pgoff) -#define VMA_SIZE(vma) (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT) -/* avoid overflow */ -#define HEAP_INDEX(vma) ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1)) - -/* - * Radix priority search tree for address_space->i_mmap - * - * For each vma that map a unique set of file pages i.e., unique [radix_index, - * heap_index] value, we have a corresponding priority search tree node. If - * multiple vmas have identical [radix_index, heap_index] value, then one of - * them is used as a tree node and others are stored in a vm_set list. The tree - * node points to the first vma (head) of the list using vm_set.head. - * - * prio_tree_root - * | - * A vm_set.head - * / \ / - * L R -> H-I-J-K-M-N-O-P-Q-S - * ^ ^ <-- vm_set.list --> - * tree nodes - * - * We need some way to identify whether a vma is a tree node, head of a vm_set - * list, or just a member of a vm_set list. We cannot use vm_flags to store - * such information. The reason is, in the above figure, it is possible that - * vm_flags' of R and H are covered by the different mmap_sems. When R is - * removed under R->mmap_sem, H replaces R as a tree node. Since we do not hold - * H->mmap_sem, we cannot use H->vm_flags for marking that H is a tree node now. - * That's why some trick involving shared.vm_set.parent is used for identifying - * tree nodes and list head nodes. - * - * vma radix priority search tree node rules: - * - * vma->shared.vm_set.parent != NULL ==> a tree node - * vma->shared.vm_set.head != NULL ==> list of others mapping same range - * vma->shared.vm_set.head == NULL ==> no others map the same range - * - * vma->shared.vm_set.parent == NULL - * vma->shared.vm_set.head != NULL ==> list head of vmas mapping same range - * vma->shared.vm_set.head == NULL ==> a list node - */ - -/* - * Add a new vma known to map the same set of pages as the old vma: - * useful for fork's dup_mmap as well as vma_prio_tree_insert below. - * Note that it just happens to work correctly on i_mmap_nonlinear too. - */ -void vma_prio_tree_add(struct vm_area_struct *vma, struct vm_area_struct *old) -{ - /* Leave these BUG_ONs till prio_tree patch stabilizes */ - BUG_ON(RADIX_INDEX(vma) != RADIX_INDEX(old)); - BUG_ON(HEAP_INDEX(vma) != HEAP_INDEX(old)); - - vma->shared.vm_set.head = NULL; - vma->shared.vm_set.parent = NULL; - - if (!old->shared.vm_set.parent) - list_add(&vma->shared.vm_set.list, - &old->shared.vm_set.list); - else if (old->shared.vm_set.head) - list_add_tail(&vma->shared.vm_set.list, - &old->shared.vm_set.head->shared.vm_set.list); - else { - INIT_LIST_HEAD(&vma->shared.vm_set.list); - vma->shared.vm_set.head = old; - old->shared.vm_set.head = vma; - } -} - -void vma_prio_tree_insert(struct vm_area_struct *vma, - struct prio_tree_root *root) -{ - struct prio_tree_node *ptr; - struct vm_area_struct *old; - - vma->shared.vm_set.head = NULL; - - ptr = raw_prio_tree_insert(root, &vma->shared.prio_tree_node); - if (ptr != (struct prio_tree_node *) &vma->shared.prio_tree_node) { - old = prio_tree_entry(ptr, struct vm_area_struct, - shared.prio_tree_node); - vma_prio_tree_add(vma, old); - } -} - -void vma_prio_tree_remove(struct vm_area_struct *vma, - struct prio_tree_root *root) -{ - struct vm_area_struct *node, *head, *new_head; - - if (!vma->shared.vm_set.head) { - if (!vma->shared.vm_set.parent) - list_del_init(&vma->shared.vm_set.list); - else - raw_prio_tree_remove(root, &vma->shared.prio_tree_node); - } else { - /* Leave this BUG_ON till prio_tree patch stabilizes */ - BUG_ON(vma->shared.vm_set.head->shared.vm_set.head != vma); - if (vma->shared.vm_set.parent) { - head = vma->shared.vm_set.head; - if (!list_empty(&head->shared.vm_set.list)) { - new_head = list_entry( - head->shared.vm_set.list.next, - struct vm_area_struct, - shared.vm_set.list); - list_del_init(&head->shared.vm_set.list); - } else - new_head = NULL; - - raw_prio_tree_replace(root, &vma->shared.prio_tree_node, - &head->shared.prio_tree_node); - head->shared.vm_set.head = new_head; - if (new_head) - new_head->shared.vm_set.head = head; - - } else { - node = vma->shared.vm_set.head; - if (!list_empty(&vma->shared.vm_set.list)) { - new_head = list_entry( - vma->shared.vm_set.list.next, - struct vm_area_struct, - shared.vm_set.list); - list_del_init(&vma->shared.vm_set.list); - node->shared.vm_set.head = new_head; - new_head->shared.vm_set.head = node; - } else - node->shared.vm_set.head = NULL; - } - } -} - -/* - * Helper function to enumerate vmas that map a given file page or a set of - * contiguous file pages. The function returns vmas that at least map a single - * page in the given range of contiguous file pages. - */ -struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma, - struct prio_tree_iter *iter) -{ - struct prio_tree_node *ptr; - struct vm_area_struct *next; - - if (!vma) { - /* - * First call is with NULL vma - */ - ptr = prio_tree_next(iter); - if (ptr) { - next = prio_tree_entry(ptr, struct vm_area_struct, - shared.prio_tree_node); - prefetch(next->shared.vm_set.head); - return next; - } else - return NULL; - } - - if (vma->shared.vm_set.parent) { - if (vma->shared.vm_set.head) { - next = vma->shared.vm_set.head; - prefetch(next->shared.vm_set.list.next); - return next; - } - } else { - next = list_entry(vma->shared.vm_set.list.next, - struct vm_area_struct, shared.vm_set.list); - if (!next->shared.vm_set.head) { - prefetch(next->shared.vm_set.list.next); - return next; - } - } - - ptr = prio_tree_next(iter); - if (ptr) { - next = prio_tree_entry(ptr, struct vm_area_struct, - shared.prio_tree_node); - prefetch(next->shared.vm_set.head); - return next; - } else - return NULL; -} diff --git a/mm/rmap.c b/mm/rmap.c index 0f3b7cda2a24..7b5b51d25fc5 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -820,7 +820,6 @@ static int page_referenced_file(struct page *page, struct address_space *mapping = page->mapping; pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct vm_area_struct *vma; - struct prio_tree_iter iter; int referenced = 0; /* @@ -846,7 +845,7 @@ static int page_referenced_file(struct page *page, */ mapcount = page_mapcount(page); - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { unsigned long address = vma_address(page, vma); if (address == -EFAULT) continue; @@ -945,13 +944,12 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page) { pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct vm_area_struct *vma; - struct prio_tree_iter iter; int ret = 0; BUG_ON(PageAnon(page)); mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { if (vma->vm_flags & VM_SHARED) { unsigned long address = vma_address(page, vma); if (address == -EFAULT) @@ -1547,7 +1545,6 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) struct address_space *mapping = page->mapping; pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct vm_area_struct *vma; - struct prio_tree_iter iter; int ret = SWAP_AGAIN; unsigned long cursor; unsigned long max_nl_cursor = 0; @@ -1555,7 +1552,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) unsigned int mapcount; mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { unsigned long address = vma_address(page, vma); if (address == -EFAULT) continue; @@ -1576,7 +1573,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) goto out; list_for_each_entry(vma, &mapping->i_mmap_nonlinear, - shared.vm_set.list) { + shared.nonlinear) { cursor = (unsigned long) vma->vm_private_data; if (cursor > max_nl_cursor) max_nl_cursor = cursor; @@ -1608,7 +1605,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) do { list_for_each_entry(vma, &mapping->i_mmap_nonlinear, - shared.vm_set.list) { + shared.nonlinear) { cursor = (unsigned long) vma->vm_private_data; while ( cursor < max_nl_cursor && cursor < vma->vm_end - vma->vm_start) { @@ -1631,7 +1628,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) * in locked vmas). Reset cursor on all unreserved nonlinear * vmas, now forgetting on which ones it had fallen behind. */ - list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) + list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear) vma->vm_private_data = NULL; out: mutex_unlock(&mapping->i_mmap_mutex); @@ -1748,13 +1745,12 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *, struct address_space *mapping = page->mapping; pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct vm_area_struct *vma; - struct prio_tree_iter iter; int ret = SWAP_AGAIN; if (!mapping) return ret; mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { unsigned long address = vma_address(page, vma); if (address == -EFAULT) continue; -- cgit v1.2.3 From 38a76013ad809beb0b52f60d365c960d035bd83c Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Mon, 8 Oct 2012 16:31:50 -0700 Subject: mm: avoid taking rmap locks in move_ptes() During mremap(), the destination VMA is generally placed after the original vma in rmap traversal order: in move_vma(), we always have new_pgoff >= vma->vm_pgoff, and as a result new_vma->vm_pgoff >= vma->vm_pgoff unless vma_merge() merged the new vma with an adjacent one. When the destination VMA is placed after the original in rmap traversal order, we can avoid taking the rmap locks in move_ptes(). Essentially, this reintroduces the optimization that had been disabled in "mm anon rmap: remove anon_vma_moveto_tail". The difference is that we don't try to impose the rmap traversal order; instead we just rely on things being in the desired order in the common case and fall back to taking locks in the uncommon case. Also we skip the i_mmap_mutex in addition to the anon_vma lock: in both cases, the vmas are traversed in increasing vm_pgoff order with ties resolved in tree insertion order. Signed-off-by: Michel Lespinasse Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Peter Zijlstra Cc: Daniel Santos Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- include/linux/mm.h | 6 ++++-- mm/mmap.c | 7 +++++-- mm/mremap.c | 57 +++++++++++++++++++++++++++++++++++++----------------- 4 files changed, 49 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 19f4fb80cd17..4f2bebc276c5 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -603,7 +603,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * process cleanup to remove whatever mess we made. */ if (length != move_page_tables(vma, old_start, - vma, new_start, length)) + vma, new_start, length, false)) return -ENOMEM; lru_add_drain(); diff --git a/include/linux/mm.h b/include/linux/mm.h index 0e6f9c9f2123..0d5f823ce3fc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1060,7 +1060,8 @@ vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group); extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, - unsigned long new_addr, unsigned long len); + unsigned long new_addr, unsigned long len, + bool need_rmap_locks); extern unsigned long do_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr); @@ -1410,7 +1411,8 @@ extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, struct rb_node **, struct rb_node *); extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, - unsigned long addr, unsigned long len, pgoff_t pgoff); + unsigned long addr, unsigned long len, pgoff_t pgoff, + bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); extern int mm_take_all_locks(struct mm_struct *mm); diff --git a/mm/mmap.c b/mm/mmap.c index 81248992120d..2d942353d681 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2371,7 +2371,8 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) * prior to moving page table entries, to effect an mremap move. */ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, - unsigned long addr, unsigned long len, pgoff_t pgoff) + unsigned long addr, unsigned long len, pgoff_t pgoff, + bool *need_rmap_locks) { struct vm_area_struct *vma = *vmap; unsigned long vma_start = vma->vm_start; @@ -2413,8 +2414,9 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, * linear if there are no pages mapped yet. */ VM_BUG_ON(faulted_in_anon_vma); - *vmap = new_vma; + *vmap = vma = new_vma; } + *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff); } else { new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (new_vma) { @@ -2434,6 +2436,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); vma_link(mm, new_vma, prev, rb_link, rb_parent); + *need_rmap_locks = false; } } return new_vma; diff --git a/mm/mremap.c b/mm/mremap.c index 5588bb6e9295..3b639a4b26bd 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -71,26 +71,42 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long old_addr, unsigned long old_end, struct vm_area_struct *new_vma, pmd_t *new_pmd, - unsigned long new_addr) + unsigned long new_addr, bool need_rmap_locks) { struct address_space *mapping = NULL; - struct anon_vma *anon_vma = vma->anon_vma; + struct anon_vma *anon_vma = NULL; struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; spinlock_t *old_ptl, *new_ptl; - if (vma->vm_file) { - /* - * Subtle point from Rajesh Venkatasubramanian: before - * moving file-based ptes, we must lock truncate_pagecache - * out, since it might clean the dst vma before the src vma, - * and we propagate stale pages into the dst afterward. - */ - mapping = vma->vm_file->f_mapping; - mutex_lock(&mapping->i_mmap_mutex); + /* + * When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma + * locks to ensure that rmap will always observe either the old or the + * new ptes. This is the easiest way to avoid races with + * truncate_pagecache(), page migration, etc... + * + * When need_rmap_locks is false, we use other ways to avoid + * such races: + * + * - During exec() shift_arg_pages(), we use a specially tagged vma + * which rmap call sites look for using is_vma_temporary_stack(). + * + * - During mremap(), new_vma is often known to be placed after vma + * in rmap traversal order. This ensures rmap will always observe + * either the old pte, or the new pte, or both (the page table locks + * serialize access to individual ptes, but only rmap traversal + * order guarantees that we won't miss both the old and new ptes). + */ + if (need_rmap_locks) { + if (vma->vm_file) { + mapping = vma->vm_file->f_mapping; + mutex_lock(&mapping->i_mmap_mutex); + } + if (vma->anon_vma) { + anon_vma = vma->anon_vma; + anon_vma_lock(anon_vma); + } } - if (anon_vma) - anon_vma_lock(anon_vma); /* * We don't have to worry about the ordering of src and dst @@ -127,7 +143,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, - unsigned long new_addr, unsigned long len) + unsigned long new_addr, unsigned long len, + bool need_rmap_locks) { unsigned long extent, next, old_end; pmd_t *old_pmd, *new_pmd; @@ -174,7 +191,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (extent > LATENCY_LIMIT) extent = LATENCY_LIMIT; move_ptes(vma, old_pmd, old_addr, old_addr + extent, - new_vma, new_pmd, new_addr); + new_vma, new_pmd, new_addr, need_rmap_locks); need_flush = true; } if (likely(need_flush)) @@ -198,6 +215,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, unsigned long hiwater_vm; int split = 0; int err; + bool need_rmap_locks; /* * We'd prefer to avoid failure later on in do_munmap: @@ -219,18 +237,21 @@ static unsigned long move_vma(struct vm_area_struct *vma, return err; new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); - new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff); + new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff, + &need_rmap_locks); if (!new_vma) return -ENOMEM; - moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len); + moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len, + need_rmap_locks); if (moved_len < old_len) { /* * On error, move entries back from new area to old, * which will succeed since page tables still there, * and then proceed to unmap new area instead of old. */ - move_page_tables(new_vma, new_addr, vma, old_addr, moved_len); + move_page_tables(new_vma, new_addr, vma, old_addr, moved_len, + true); vma = new_vma; old_len = new_len; old_addr = new_addr; -- cgit v1.2.3 From cd8ed2a45a401cb692d769e92de7d73aa42fabce Mon Sep 17 00:00:00 2001 From: Yan Hong Date: Mon, 8 Oct 2012 16:33:45 -0700 Subject: fs/fs-writeback.c: remove unneccesary parameter of __writeback_single_inode() The parameter 'wb' is never used in this function. Signed-off-by: Yan Hong Acked-by: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8e1d7b9e4a33..401b6c6248ae 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -439,8 +439,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, * setting I_SYNC flag and calling inode_sync_complete() to clear it. */ static int -__writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, - struct writeback_control *wbc) +__writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { struct address_space *mapping = inode->i_mapping; long nr_to_write = wbc->nr_to_write; @@ -527,7 +526,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, inode->i_state |= I_SYNC; spin_unlock(&inode->i_lock); - ret = __writeback_single_inode(inode, wb, wbc); + ret = __writeback_single_inode(inode, wbc); spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); @@ -670,7 +669,7 @@ static long writeback_sb_inodes(struct super_block *sb, * We use I_SYNC to pin the inode in memory. While it is set * evict_inode() will wait so the inode cannot be freed. */ - __writeback_single_inode(inode, wb, &wbc); + __writeback_single_inode(inode, &wbc); work->nr_pages -= write_chunk - wbc.nr_to_write; wrote += write_chunk - wbc.nr_to_write; -- cgit v1.2.3 From 7a71932d5676b7410ab64d149bad8bde6b0d8632 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 8 Oct 2012 16:33:47 -0700 Subject: kpageflags: fix wrong KPF_THP on non-huge compound pages KPF_THP can be set on non-huge compound pages (like slab pages or pages allocated by drivers with __GFP_COMP) because PageTransCompound only checks PG_head and PG_tail. Obviously this is a bug and breaks user space applications which look for thp via /proc/kpageflags. This patch rules out setting KPF_THP wrongly by additionally checking PageLRU on the head pages. Signed-off-by: Naoya Horiguchi Acked-by: KOSAKI Motohiro Acked-by: David Rientjes Reviewed-by: Fengguang Wu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/page.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/page.c b/fs/proc/page.c index 7fcd0d60a968..b8730d9ebaee 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -115,7 +115,13 @@ u64 stable_page_flags(struct page *page) u |= 1 << KPF_COMPOUND_TAIL; if (PageHuge(page)) u |= 1 << KPF_HUGE; - else if (PageTransCompound(page)) + /* + * PageTransCompound can be true for non-huge compound pages (slab + * pages or pages allocated by drivers with __GFP_COMP) because it + * just checks PG_head/PG_tail, so we need to check PageLRU to make + * sure a given page is a thp, not a non-huge compound page. + */ + else if (PageTransCompound(page) && PageLRU(compound_trans_head(page))) u |= 1 << KPF_THP; /* -- cgit v1.2.3