summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/fs_probe.c4
-rw-r--r--fs/afs/server.c2
-rw-r--r--fs/binfmt_elf.c299
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/binfmt_misc.c8
-rw-r--r--fs/btrfs/ctree.c36
-rw-r--r--fs/btrfs/ioctl.c23
-rw-r--r--fs/btrfs/qgroup.c9
-rw-r--r--fs/btrfs/send.c24
-rw-r--r--fs/btrfs/sysfs.c7
-rw-r--r--fs/btrfs/tree-log.c59
-rw-r--r--fs/btrfs/zoned.c9
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/locks.c4
-rw-r--r--fs/ceph/mdsmap.c2
-rw-r--r--fs/cifs/cifsfs.c4
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/sess.c4
-rw-r--r--fs/coredump.c6
-rw-r--r--fs/debugfs/file.c28
-rw-r--r--fs/exec.c34
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext4/extents.c18
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/mmp.c8
-rw-r--r--fs/ext4/super.c5
-rw-r--r--fs/f2fs/gc.c2
-rw-r--r--fs/f2fs/segment.c8
-rw-r--r--fs/fat/nfs.c4
-rw-r--r--fs/file.c11
-rw-r--r--fs/fs-writeback.c30
-rw-r--r--fs/fscache/cookie.c8
-rw-r--r--fs/fscache/volume.c7
-rw-r--r--fs/fuse/file.c37
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfs/trans.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h2
-rw-r--r--fs/hfsplus/inode.c4
-rw-r--r--fs/hfsplus/options.c4
-rw-r--r--fs/ksmbd/vfs.c8
-rw-r--r--fs/libfs.c22
-rw-r--r--fs/lockd/svcsubs.c4
-rw-r--r--fs/locks.c50
-rw-r--r--fs/namei.c3
-rw-r--r--fs/nfs/delegation.c2
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/nfsd/nfs4state.c6
-rw-r--r--fs/nfsd/vfs.c11
-rw-r--r--fs/nilfs2/dat.c7
-rw-r--r--fs/nilfs2/sufile.c8
-rw-r--r--fs/nilfs2/the_nilfs.c73
-rw-r--r--fs/ocfs2/cluster/heartbeat.c38
-rw-r--r--fs/ocfs2/cluster/heartbeat.h2
-rw-r--r--fs/ocfs2/cluster/netdebug.c2
-rw-r--r--fs/ocfs2/cluster/nodemanager.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c6
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c19
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c30
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/journal.c2
-rw-r--r--fs/ocfs2/journal.h1
-rw-r--r--fs/ocfs2/ocfs2.h3
-rw-r--r--fs/ocfs2/stack_o2cb.c6
-rw-r--r--fs/ocfs2/stackglue.c8
-rw-r--r--fs/ocfs2/super.c5
-rw-r--r--fs/proc/cmdline.c6
-rw-r--r--fs/proc/consoles.c21
-rw-r--r--fs/proc/fd.c45
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/vmcore.c1
-rw-r--r--fs/pstore/platform.c25
-rw-r--r--fs/pstore/ram.c44
-rw-r--r--fs/pstore/ram_core.c20
-rw-r--r--fs/pstore/ram_internal.h98
-rw-r--r--fs/pstore/zone.c2
-rw-r--r--fs/read_write.c19
-rw-r--r--fs/squashfs/Kconfig51
-rw-r--r--fs/squashfs/block.c2
-rw-r--r--fs/squashfs/decompressor.c2
-rw-r--r--fs/squashfs/decompressor_multi.c20
-rw-r--r--fs/squashfs/decompressor_multi_percpu.c23
-rw-r--r--fs/squashfs/decompressor_single.c15
-rw-r--r--fs/squashfs/squashfs.h23
-rw-r--r--fs/squashfs/squashfs_fs_sb.h4
-rw-r--r--fs/squashfs/super.c100
-rw-r--r--fs/ubifs/debug.c8
-rw-r--r--fs/ubifs/lpt_commit.c14
-rw-r--r--fs/ubifs/tnc_commit.c2
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c2
-rw-r--r--fs/xfs/xfs_error.c2
-rw-r--r--fs/zonefs/super.c23
-rw-r--r--fs/zonefs/zonefs.h6
96 files changed, 1071 insertions, 570 deletions
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index c0031a3ab42f..3ac5fcf98d0d 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -167,8 +167,8 @@ responded:
clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
}
- if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
- rtt_us < server->probe.rtt) {
+ rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
+ if (rtt_us < server->probe.rtt) {
server->probe.rtt = rtt_us;
server->rtt = rtt_us;
alist->preferred = index;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 4981baf97835..b5237206eac3 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -406,7 +406,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server,
if (!server)
return;
- a = atomic_inc_return(&server->active);
+ a = atomic_read(&server->active);
zero = __refcount_dec_and_test(&server->ref, &r);
trace_afs_server(debug_id, r - 1, a, reason);
if (unlikely(zero))
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6a11025e5850..de63572a9404 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -248,7 +248,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
} while (0)
#ifdef ARCH_DLINFO
- /*
+ /*
* ARCH_DLINFO must come first so PPC can do its special alignment of
* AUXV.
* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
@@ -456,13 +456,13 @@ static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
*
* Loads ELF program headers from the binary file elf_file, which has the ELF
* header pointed to by elf_ex, into a newly allocated array. The caller is
- * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
+ * responsible for freeing the allocated data. Returns NULL upon failure.
*/
static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
struct file *elf_file)
{
struct elf_phdr *elf_phdata = NULL;
- int retval, err = -1;
+ int retval = -1;
unsigned int size;
/*
@@ -484,15 +484,9 @@ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
/* Read in the program headers */
retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
- if (retval < 0) {
- err = retval;
- goto out;
- }
- /* Success! */
- err = 0;
out:
- if (err) {
+ if (retval) {
kfree(elf_phdata);
elf_phdata = NULL;
}
@@ -1020,7 +1014,7 @@ out_free_interp:
executable_stack);
if (retval < 0)
goto out_free_dentry;
-
+
elf_bss = 0;
elf_brk = 0;
@@ -1043,7 +1037,7 @@ out_free_interp:
if (unlikely (elf_brk > elf_bss)) {
unsigned long nbyte;
-
+
/* There was a PT_LOAD segment with p_memsz > p_filesz
before this one. Map anonymous pages, if needed,
and clear the area. */
@@ -1166,7 +1160,7 @@ out_free_interp:
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
- retval = IS_ERR((void *)error) ?
+ retval = IS_ERR_VALUE(error) ?
PTR_ERR((void*)error) : -EINVAL;
goto out_free_dentry;
}
@@ -1251,7 +1245,7 @@ out_free_interp:
interpreter,
load_bias, interp_elf_phdata,
&arch_state);
- if (!IS_ERR((void *)elf_entry)) {
+ if (!IS_ERR_VALUE(elf_entry)) {
/*
* load_elf_interp() returns relocation
* adjustment
@@ -1260,7 +1254,7 @@ out_free_interp:
elf_entry += interp_elf_ex->e_entry;
}
if (BAD_ADDR(elf_entry)) {
- retval = IS_ERR((void *)elf_entry) ?
+ retval = IS_ERR_VALUE(elf_entry) ?
(int)elf_entry : -EINVAL;
goto out_free_dentry;
}
@@ -1521,7 +1515,7 @@ static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
phdr->p_align = 0;
}
-static void fill_note(struct memelfnote *note, const char *name, int type,
+static void fill_note(struct memelfnote *note, const char *name, int type,
unsigned int sz, void *data)
{
note->name = name;
@@ -1724,7 +1718,6 @@ static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm
return 0;
}
-#ifdef CORE_DUMP_USE_REGSET
#include <linux/regset.h>
struct elf_thread_core_info {
@@ -1745,6 +1738,7 @@ struct elf_note_info {
int thread_notes;
};
+#ifdef CORE_DUMP_USE_REGSET
/*
* When a regset has a writeback hook, we call it on each thread before
* dumping user memory. On register window machines, this makes sure the
@@ -1824,34 +1818,58 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
return 1;
}
+#else
+static int fill_thread_core_info(struct elf_thread_core_info *t,
+ const struct user_regset_view *view,
+ long signr, struct elf_note_info *info)
+{
+ struct task_struct *p = t->task;
+ elf_fpregset_t *fpu;
+
+ fill_prstatus(&t->prstatus.common, p, signr);
+ elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
+
+ fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
+ &(t->prstatus));
+ info->size += notesize(&t->notes[0]);
+
+ fpu = kzalloc(sizeof(elf_fpregset_t), GFP_KERNEL);
+ if (!fpu || !elf_core_copy_task_fpregs(p, fpu)) {
+ kfree(fpu);
+ return 1;
+ }
+
+ t->prstatus.pr_fpvalid = 1;
+ fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
+ info->size += notesize(&t->notes[1]);
+
+ return 1;
+}
+#endif
static int fill_note_info(struct elfhdr *elf, int phdrs,
struct elf_note_info *info,
struct coredump_params *cprm)
{
struct task_struct *dump_task = current;
- const struct user_regset_view *view = task_user_regset_view(dump_task);
+ const struct user_regset_view *view;
struct elf_thread_core_info *t;
struct elf_prpsinfo *psinfo;
struct core_thread *ct;
- unsigned int i;
-
- info->size = 0;
- info->thread = NULL;
psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
- if (psinfo == NULL) {
- info->psinfo.data = NULL; /* So we don't free this wrongly */
+ if (!psinfo)
return 0;
- }
-
fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
+#ifdef CORE_DUMP_USE_REGSET
+ view = task_user_regset_view(dump_task);
+
/*
* Figure out how many notes we're going to need for each thread.
*/
info->thread_notes = 0;
- for (i = 0; i < view->n; ++i)
+ for (int i = 0; i < view->n; ++i)
if (view->regsets[i].core_note_type != 0)
++info->thread_notes;
@@ -1870,11 +1888,23 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
*/
fill_elf_header(elf, phdrs,
view->e_machine, view->e_flags);
+#else
+ view = NULL;
+ info->thread_notes = 2;
+ fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
+#endif
/*
* Allocate a structure for each thread.
*/
- for (ct = &dump_task->signal->core_state->dumper; ct; ct = ct->next) {
+ info->thread = kzalloc(offsetof(struct elf_thread_core_info,
+ notes[info->thread_notes]),
+ GFP_KERNEL);
+ if (unlikely(!info->thread))
+ return 0;
+
+ info->thread->task = dump_task;
+ for (ct = dump_task->signal->core_state->dumper.next; ct; ct = ct->next) {
t = kzalloc(offsetof(struct elf_thread_core_info,
notes[info->thread_notes]),
GFP_KERNEL);
@@ -1882,17 +1912,8 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
return 0;
t->task = ct->task;
- if (ct->task == dump_task || !info->thread) {
- t->next = info->thread;
- info->thread = t;
- } else {
- /*
- * Make sure to keep the original task at
- * the head of the list.
- */
- t->next = info->thread->next;
- info->thread->next = t;
- }
+ t->next = info->thread->next;
+ info->thread->next = t;
}
/*
@@ -1920,11 +1941,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
return 1;
}
-static size_t get_note_info_size(struct elf_note_info *info)
-{
- return info->size;
-}
-
/*
* Write all the notes for each thread. When writing the first thread, the
* process-wide notes are interleaved after the first thread-specific note.
@@ -1979,197 +1995,6 @@ static void free_note_info(struct elf_note_info *info)
kvfree(info->files.data);
}
-#else
-
-/* Here is the structure in which status of each thread is captured. */
-struct elf_thread_status
-{
- struct list_head list;
- struct elf_prstatus prstatus; /* NT_PRSTATUS */
- elf_fpregset_t fpu; /* NT_PRFPREG */
- struct task_struct *thread;
- struct memelfnote notes[3];
- int num_notes;
-};
-
-/*
- * In order to add the specific thread information for the elf file format,
- * we need to keep a linked list of every threads pr_status and then create
- * a single section for them in the final core file.
- */
-static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
-{
- int sz = 0;
- struct task_struct *p = t->thread;
- t->num_notes = 0;
-
- fill_prstatus(&t->prstatus.common, p, signr);
- elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
-
- fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
- &(t->prstatus));
- t->num_notes++;
- sz += notesize(&t->notes[0]);
-
- if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
- &t->fpu))) {
- fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
- &(t->fpu));
- t->num_notes++;
- sz += notesize(&t->notes[1]);
- }
- return sz;
-}
-
-struct elf_note_info {
- struct memelfnote *notes;
- struct memelfnote *notes_files;
- struct elf_prstatus *prstatus; /* NT_PRSTATUS */
- struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
- struct list_head thread_list;
- elf_fpregset_t *fpu;
- user_siginfo_t csigdata;
- int thread_status_size;
- int numnote;
-};
-
-static int elf_note_info_init(struct elf_note_info *info)
-{
- memset(info, 0, sizeof(*info));
- INIT_LIST_HEAD(&info->thread_list);
-
- /* Allocate space for ELF notes */
- info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
- if (!info->notes)
- return 0;
- info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
- if (!info->psinfo)
- return 0;
- info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
- if (!info->prstatus)
- return 0;
- info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
- if (!info->fpu)
- return 0;
- return 1;
-}
-
-static int fill_note_info(struct elfhdr *elf, int phdrs,
- struct elf_note_info *info,
- struct coredump_params *cprm)
-{
- struct core_thread *ct;
- struct elf_thread_status *ets;
-
- if (!elf_note_info_init(info))
- return 0;
-
- for (ct = current->signal->core_state->dumper.next;
- ct; ct = ct->next) {
- ets = kzalloc(sizeof(*ets), GFP_KERNEL);
- if (!ets)
- return 0;
-
- ets->thread = ct->task;
- list_add(&ets->list, &info->thread_list);
- }
-
- list_for_each_entry(ets, &info->thread_list, list) {
- int sz;
-
- sz = elf_dump_thread_status(cprm->siginfo->si_signo, ets);
- info->thread_status_size += sz;
- }
- /* now collect the dump for the current */
- memset(info->prstatus, 0, sizeof(*info->prstatus));
- fill_prstatus(&info->prstatus->common, current, cprm->siginfo->si_signo);
- elf_core_copy_regs(&info->prstatus->pr_reg, cprm->regs);
-
- /* Set up header */
- fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
-
- /*
- * Set up the notes in similar form to SVR4 core dumps made
- * with info from their /proc.
- */
-
- fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
- sizeof(*info->prstatus), info->prstatus);
- fill_psinfo(info->psinfo, current->group_leader, current->mm);
- fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
- sizeof(*info->psinfo), info->psinfo);
-
- fill_siginfo_note(info->notes + 2, &info->csigdata, cprm->siginfo);
- fill_auxv_note(info->notes + 3, current->mm);
- info->numnote = 4;
-
- if (fill_files_note(info->notes + info->numnote, cprm) == 0) {
- info->notes_files = info->notes + info->numnote;
- info->numnote++;
- }
-
- /* Try to dump the FPU. */
- info->prstatus->pr_fpvalid =
- elf_core_copy_task_fpregs(current, cprm->regs, info->fpu);
- if (info->prstatus->pr_fpvalid)
- fill_note(info->notes + info->numnote++,
- "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
- return 1;
-}
-
-static size_t get_note_info_size(struct elf_note_info *info)
-{
- int sz = 0;
- int i;
-
- for (i = 0; i < info->numnote; i++)
- sz += notesize(info->notes + i);
-
- sz += info->thread_status_size;
-
- return sz;
-}
-
-static int write_note_info(struct elf_note_info *info,
- struct coredump_params *cprm)
-{
- struct elf_thread_status *ets;
- int i;
-
- for (i = 0; i < info->numnote; i++)
- if (!writenote(info->notes + i, cprm))
- return 0;
-
- /* write out the thread status notes section */
- list_for_each_entry(ets, &info->thread_list, list) {
- for (i = 0; i < ets->num_notes; i++)
- if (!writenote(&ets->notes[i], cprm))
- return 0;
- }
-
- return 1;
-}
-
-static void free_note_info(struct elf_note_info *info)
-{
- while (!list_empty(&info->thread_list)) {
- struct list_head *tmp = info->thread_list.next;
- list_del(tmp);
- kfree(list_entry(tmp, struct elf_thread_status, list));
- }
-
- /* Free data possibly allocated by fill_files_note(): */
- if (info->notes_files)
- kvfree(info->notes_files->data);
-
- kfree(info->prstatus);
- kfree(info->psinfo);
- kfree(info->notes);
- kfree(info->fpu);
-}
-
-#endif
-
static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
elf_addr_t e_shoff, int segs)
{
@@ -2233,7 +2058,7 @@ static int elf_core_dump(struct coredump_params *cprm)
/* Write notes phdr entry */
{
- size_t sz = get_note_info_size(&info);
+ size_t sz = info.size;
/* For cell spufs */
sz += elf_coredump_extra_notes_size();
@@ -2295,7 +2120,7 @@ static int elf_core_dump(struct coredump_params *cprm)
if (!elf_core_write_extra_phdrs(cprm, offset))
goto end_coredump;
- /* write out the notes section */
+ /* write out the notes section */
if (!write_note_info(&info, cprm))
goto end_coredump;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 08d0c8797828..096e3520a0b1 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -434,8 +434,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
current->mm->start_stack = current->mm->start_brk + stack_size;
#endif
- if (create_elf_fdpic_tables(bprm, current->mm,
- &exec_params, &interp_params) < 0)
+ retval = create_elf_fdpic_tables(bprm, current->mm, &exec_params,
+ &interp_params);
+ if (retval < 0)
goto error;
kdebug("- start_code %lx", current->mm->start_code);
@@ -1603,7 +1604,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
if (!elf_core_write_extra_phdrs(cprm, offset))
goto end_coredump;
- /* write out the notes section */
+ /* write out the notes section */
if (!writenote(thread_list->notes, cprm))
goto end_coredump;
if (!writenote(&psinfo_note, cprm))
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index e1eae7ea823a..bb202ad369d5 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -44,10 +44,10 @@ static LIST_HEAD(entries);
static int enabled = 1;
enum {Enabled, Magic};
-#define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
-#define MISC_FMT_OPEN_BINARY (1 << 30)
-#define MISC_FMT_CREDENTIALS (1 << 29)
-#define MISC_FMT_OPEN_FILE (1 << 28)
+#define MISC_FMT_PRESERVE_ARGV0 (1UL << 31)
+#define MISC_FMT_OPEN_BINARY (1UL << 30)
+#define MISC_FMT_CREDENTIALS (1UL << 29)
+#define MISC_FMT_OPEN_FILE (1UL << 28)
typedef struct {
struct list_head list;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a9543f01184c..dcb510f38dda 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -4663,7 +4663,12 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
int ret;
int i;
- ASSERT(!path->nowait);
+ /*
+ * The nowait semantics are used only for write paths, where we don't
+ * use the tree mod log and sequence numbers.
+ */
+ if (time_seq)
+ ASSERT(!path->nowait);
nritems = btrfs_header_nritems(path->nodes[0]);
if (nritems == 0)
@@ -4683,7 +4688,14 @@ again:
if (path->need_commit_sem) {
path->need_commit_sem = 0;
need_commit_sem = true;
- down_read(&fs_info->commit_root_sem);
+ if (path->nowait) {
+ if (!down_read_trylock(&fs_info->commit_root_sem)) {
+ ret = -EAGAIN;
+ goto done;
+ }
+ } else {
+ down_read(&fs_info->commit_root_sem);
+ }
}
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
}
@@ -4759,7 +4771,7 @@ again:
next = c;
ret = read_block_for_search(root, path, &next, level,
slot, &key);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN && !path->nowait)
goto again;
if (ret < 0) {
@@ -4769,6 +4781,10 @@ again:
if (!path->skip_locking) {
ret = btrfs_try_tree_read_lock(next);
+ if (!ret && path->nowait) {
+ ret = -EAGAIN;
+ goto done;
+ }
if (!ret && time_seq) {
/*
* If we don't get the lock, we may be racing
@@ -4799,7 +4815,7 @@ again:
ret = read_block_for_search(root, path, &next, level,
0, &key);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN && !path->nowait)
goto again;
if (ret < 0) {
@@ -4807,8 +4823,16 @@ again:
goto done;
}
- if (!path->skip_locking)
- btrfs_tree_read_lock(next);
+ if (!path->skip_locking) {
+ if (path->nowait) {
+ if (!btrfs_try_tree_read_lock(next)) {
+ ret = -EAGAIN;
+ goto done;
+ }
+ } else {
+ btrfs_tree_read_lock(next);
+ }
+ }
}
ret = 0;
done:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a59c884c2cb0..f897be9ec1e9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3105,6 +3105,8 @@ static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp)
}
}
+ btrfs_free_path(path);
+ path = NULL;
if (copy_to_user(argp, subvol_info, sizeof(*subvol_info)))
ret = -EFAULT;
@@ -3194,6 +3196,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
}
out:
+ btrfs_free_path(path);
+
if (!ret || ret == -EOVERFLOW) {
rootrefs->num_items = found;
/* update min_treeid for next search */
@@ -3205,7 +3209,6 @@ out:
}
kfree(rootrefs);
- btrfs_free_path(path);
return ret;
}
@@ -4231,6 +4234,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
ipath->fspath->val[i] = rel_ptr;
}
+ btrfs_free_path(path);
+ path = NULL;
ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
ipath->fspath, size);
if (ret) {
@@ -4281,21 +4286,20 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
size = min_t(u32, loi->size, SZ_16M);
}
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
inodes = init_data_container(size);
if (IS_ERR(inodes)) {
ret = PTR_ERR(inodes);
- inodes = NULL;
- goto out;
+ goto out_loi;
}
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
inodes, ignore_offset);
+ btrfs_free_path(path);
if (ret == -EINVAL)
ret = -ENOENT;
if (ret < 0)
@@ -4307,7 +4311,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
ret = -EFAULT;
out:
- btrfs_free_path(path);
kvfree(inodes);
out_loi:
kfree(loi);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 9334c3157c22..b74105a10f16 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2951,14 +2951,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
dstgroup->rsv_excl = inherit->lim.rsv_excl;
- ret = update_qgroup_limit_item(trans, dstgroup);
- if (ret) {
- qgroup_mark_inconsistent(fs_info);
- btrfs_info(fs_info,
- "unable to update quota limit for %llu",
- dstgroup->qgroupid);
- goto unlock;
- }
+ qgroup_dirty(fs_info, dstgroup);
}
if (srcid) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 145c84b44fd0..1c4b693ee4a3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5702,6 +5702,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
u64 ext_len;
u64 clone_len;
u64 clone_data_offset;
+ bool crossed_src_i_size = false;
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(clone_root->root, path);
@@ -5759,8 +5760,10 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
if (key.offset >= clone_src_i_size)
break;
- if (key.offset + ext_len > clone_src_i_size)
+ if (key.offset + ext_len > clone_src_i_size) {
ext_len = clone_src_i_size - key.offset;
+ crossed_src_i_size = true;
+ }
clone_data_offset = btrfs_file_extent_offset(leaf, ei);
if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
@@ -5821,6 +5824,25 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
ret = send_clone(sctx, offset, clone_len,
clone_root);
}
+ } else if (crossed_src_i_size && clone_len < len) {
+ /*
+ * If we are at i_size of the clone source inode and we
+ * can not clone from it, terminate the loop. This is
+ * to avoid sending two write operations, one with a
+ * length matching clone_len and the final one after
+ * this loop with a length of len - clone_len.
+ *
+ * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED
+ * was passed to the send ioctl), this helps avoid
+ * sending an encoded write for an offset that is not
+ * sector size aligned, in case the i_size of the source
+ * inode is not sector size aligned. That will make the
+ * receiver fallback to decompression of the data and
+ * writing it using regular buffered IO, therefore while
+ * not incorrect, it's not optimal due decompression and
+ * possible re-compression at the receiver.
+ */
+ break;
} else {
ret = send_extent_data(sctx, dst_path, offset,
clone_len);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 699b54b3acaa..74fef1f49c35 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -2321,8 +2321,11 @@ int __init btrfs_init_sysfs(void)
#ifdef CONFIG_BTRFS_DEBUG
ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group);
- if (ret)
- goto out2;
+ if (ret) {
+ sysfs_unmerge_group(&btrfs_kset->kobj,
+ &btrfs_static_feature_attr_group);
+ goto out_remove_group;
+ }
#endif
return 0;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 813986e38258..c3cf3dabe0b1 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3694,15 +3694,29 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
u64 *last_old_dentry_offset)
{
struct btrfs_root *log = inode->root->log_root;
- struct extent_buffer *src = path->nodes[0];
- const int nritems = btrfs_header_nritems(src);
+ struct extent_buffer *src;
+ const int nritems = btrfs_header_nritems(path->nodes[0]);
const u64 ino = btrfs_ino(inode);
bool last_found = false;
int batch_start = 0;
int batch_size = 0;
int i;
- for (i = path->slots[0]; i < nritems; i++) {
+ /*
+ * We need to clone the leaf, release the read lock on it, and use the
+ * clone before modifying the log tree. See the comment at copy_items()
+ * about why we need to do this.
+ */
+ src = btrfs_clone_extent_buffer(path->nodes[0]);
+ if (!src)
+ return -ENOMEM;
+
+ i = path->slots[0];
+ btrfs_release_path(path);
+ path->nodes[0] = src;
+ path->slots[0] = i;
+
+ for (; i < nritems; i++) {
struct btrfs_dir_item *di;
struct btrfs_key key;
int ret;
@@ -4303,7 +4317,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
{
struct btrfs_root *log = inode->root->log_root;
struct btrfs_file_extent_item *extent;
- struct extent_buffer *src = src_path->nodes[0];
+ struct extent_buffer *src;
int ret = 0;
struct btrfs_key *ins_keys;
u32 *ins_sizes;
@@ -4314,6 +4328,43 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM);
const u64 i_size = i_size_read(&inode->vfs_inode);
+ /*
+ * To keep lockdep happy and avoid deadlocks, clone the source leaf and
+ * use the clone. This is because otherwise we would be changing the log
+ * tree, to insert items from the subvolume tree or insert csum items,
+ * while holding a read lock on a leaf from the subvolume tree, which
+ * creates a nasty lock dependency when COWing log tree nodes/leaves:
+ *
+ * 1) Modifying the log tree triggers an extent buffer allocation while
+ * holding a write lock on a parent extent buffer from the log tree.
+ * Allocating the pages for an extent buffer, or the extent buffer
+ * struct, can trigger inode eviction and finally the inode eviction
+ * will trigger a release/remove of a delayed node, which requires
+ * taking the delayed node's mutex;
+ *
+ * 2) Allocating a metadata extent for a log tree can trigger the async
+ * reclaim thread and make us wait for it to release enough space and
+ * unblock our reservation ticket. The reclaim thread can start
+ * flushing delayed items, and that in turn results in the need to
+ * lock delayed node mutexes and in the need to write lock extent
+ * buffers of a subvolume tree - all this while holding a write lock
+ * on the parent extent buffer in the log tree.
+ *
+ * So one task in scenario 1) running in parallel with another task in
+ * scenario 2) could lead to a deadlock, one wanting to lock a delayed
+ * node mutex while having a read lock on a leaf from the subvolume,
+ * while the other is holding the delayed node's mutex and wants to
+ * write lock the same subvolume leaf for flushing delayed items.
+ */
+ src = btrfs_clone_extent_buffer(src_path->nodes[0]);
+ if (!src)
+ return -ENOMEM;
+
+ i = src_path->slots[0];
+ btrfs_release_path(src_path);
+ src_path->nodes[0] = src;
+ src_path->slots[0] = i;
+
ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
nr * sizeof(u32), GFP_NOFS);
if (!ins_data)
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 1912abf6d020..c9e2b0c85309 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -134,7 +134,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
super[i] = page_address(page[i]);
}
- if (super[0]->generation > super[1]->generation)
+ if (btrfs_super_generation(super[0]) >
+ btrfs_super_generation(super[1]))
sector = zones[1].start;
else
sector = zones[0].start;
@@ -466,7 +467,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
goto out;
}
- zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
+ zones = kvcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
if (!zones) {
ret = -ENOMEM;
goto out;
@@ -585,7 +586,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
}
- kfree(zones);
+ kvfree(zones);
switch (bdev_zoned_model(bdev)) {
case BLK_ZONED_HM:
@@ -617,7 +618,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
return 0;
out:
- kfree(zones);
+ kvfree(zones);
out_free_zone_info:
btrfs_destroy_dev_zone_info(device);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index bad9eeb6a1a5..d53399966a2d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -362,7 +362,7 @@ static int ceph_fill_fragtree(struct inode *inode,
if (nsplits != ci->i_fragtree_nsplits) {
update = true;
} else if (nsplits) {
- i = prandom_u32_max(nsplits);
+ i = get_random_u32_below(nsplits);
id = le32_to_cpu(fragtree->splits[i].frag);
if (!__ceph_find_frag(ci, id))
update = true;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 3e2843e86e27..f3b461c708a8 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -364,7 +364,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
*fcntl_count = 0;
*flock_count = 0;
- ctx = inode->i_flctx;
+ ctx = locks_inode_context(inode);
if (ctx) {
spin_lock(&ctx->flc_lock);
list_for_each_entry(lock, &ctx->flc_posix, fl_list)
@@ -418,7 +418,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
int num_fcntl_locks, int num_flock_locks)
{
struct file_lock *lock;
- struct file_lock_context *ctx = inode->i_flctx;
+ struct file_lock_context *ctx = locks_inode_context(inode);
int err = 0;
int seen_fcntl = 0;
int seen_flock = 0;
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 3fbabc98e1f7..7dac21ee6ce7 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -29,7 +29,7 @@ static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy)
return -1;
/* pick */
- n = prandom_u32_max(n);
+ n = get_random_u32_below(n);
for (j = 0, i = 0; i < m->possible_max_rank; i++) {
if (CEPH_MDS_IS_READY(i, ignore_laggy))
j++;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index fe220686bba4..712a43161448 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1281,7 +1281,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
rc = filemap_write_and_wait_range(src_inode->i_mapping, off,
off + len - 1);
if (rc)
- goto out;
+ goto unlock;
/* should we flush first and last page first */
truncate_inode_pages(&target_inode->i_data, 0);
@@ -1297,6 +1297,8 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
* that target is updated on the server
*/
CIFS_I(target_inode)->time = 0;
+
+unlock:
/* although unlocking in the reverse order from locking is not
* strictly necessary here it is a little cleaner to be consistent
*/
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 209dfc06fd6d..87b56b1ae117 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1413,7 +1413,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
struct inode *inode = d_inode(cfile->dentry);
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct file_lock *flock;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
unsigned int count = 0, i;
int rc = 0, xid, type;
struct list_head locks_to_send, *el;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 92e4278ec35d..9e7d9f0baa18 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -302,14 +302,14 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
/* now drop the ref to the current iface */
if (old_iface && iface) {
- kref_put(&old_iface->refcount, release_iface);
cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n",
&old_iface->sockaddr,
&iface->sockaddr);
- } else if (old_iface) {
kref_put(&old_iface->refcount, release_iface);
+ } else if (old_iface) {
cifs_dbg(FYI, "releasing ref to iface: %pIS\n",
&old_iface->sockaddr);
+ kref_put(&old_iface->refcount, release_iface);
} else {
WARN_ON(!iface);
cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr);
diff --git a/fs/coredump.c b/fs/coredump.c
index 095ed821c8ac..9a745d08c57f 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -325,6 +325,10 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
err = cn_printf(cn, "%lu",
rlimit(RLIMIT_CORE));
break;
+ /* CPU the task ran on */
+ case 'C':
+ err = cn_printf(cn, "%d", cprm->cpu);
+ break;
default:
break;
}
@@ -525,7 +529,6 @@ void do_coredump(const kernel_siginfo_t *siginfo)
static atomic_t core_dump_count = ATOMIC_INIT(0);
struct coredump_params cprm = {
.siginfo = siginfo,
- .regs = signal_pt_regs(),
.limit = rlimit(RLIMIT_CORE),
/*
* We must use the same mm->flags while dumping core to avoid
@@ -534,6 +537,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
*/
.mm_flags = mm->flags,
.vma_meta = NULL,
+ .cpu = raw_smp_processor_id(),
};
audit_core_dumps(siginfo->si_signo);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index ddb3fc258df9..b54f470e0d03 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -378,8 +378,8 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf,
}
EXPORT_SYMBOL_GPL(debugfs_attr_read);
-ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
- size_t len, loff_t *ppos)
+static ssize_t debugfs_attr_write_xsigned(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos, bool is_signed)
{
struct dentry *dentry = F_DENTRY(file);
ssize_t ret;
@@ -387,12 +387,28 @@ ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
ret = debugfs_file_get(dentry);
if (unlikely(ret))
return ret;
- ret = simple_attr_write(file, buf, len, ppos);
+ if (is_signed)
+ ret = simple_attr_write_signed(file, buf, len, ppos);
+ else
+ ret = simple_attr_write(file, buf, len, ppos);
debugfs_file_put(dentry);
return ret;
}
+
+ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return debugfs_attr_write_xsigned(file, buf, len, ppos, false);
+}
EXPORT_SYMBOL_GPL(debugfs_attr_write);
+ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return debugfs_attr_write_xsigned(file, buf, len, ppos, true);
+}
+EXPORT_SYMBOL_GPL(debugfs_attr_write_signed);
+
static struct dentry *debugfs_create_mode_unsafe(const char *name, umode_t mode,
struct dentry *parent, void *value,
const struct file_operations *fops,
@@ -738,11 +754,11 @@ static int debugfs_atomic_t_get(void *data, u64 *val)
*val = atomic_read((atomic_t *)data);
return 0;
}
-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t, debugfs_atomic_t_get,
debugfs_atomic_t_set, "%lld\n");
-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL,
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_ro, debugfs_atomic_t_get, NULL,
"%lld\n");
-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set,
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_wo, NULL, debugfs_atomic_t_set,
"%lld\n");
/**
diff --git a/fs/exec.c b/fs/exec.c
index a0b1f0337a62..089a743f636b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -64,6 +64,7 @@
#include <linux/io_uring.h>
#include <linux/syscall_user_dispatch.h>
#include <linux/coredump.h>
+#include <linux/time_namespace.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -171,7 +172,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
exit:
fput(file);
out:
- return error;
+ return error;
}
#endif /* #ifdef CONFIG_USELIB */
@@ -199,7 +200,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
{
struct page *page;
int ret;
- unsigned int gup_flags = FOLL_FORCE;
+ unsigned int gup_flags = 0;
#ifdef CONFIG_STACK_GROWSUP
if (write) {
@@ -842,16 +843,13 @@ int setup_arg_pages(struct linux_binprm *bprm,
* will align it up.
*/
rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
+
+ stack_expand = min(rlim_stack, stack_size + stack_expand);
+
#ifdef CONFIG_STACK_GROWSUP
- if (stack_size + stack_expand > rlim_stack)
- stack_base = vma->vm_start + rlim_stack;
- else
- stack_base = vma->vm_end + stack_expand;
+ stack_base = vma->vm_start + stack_expand;
#else
- if (stack_size + stack_expand > rlim_stack)
- stack_base = vma->vm_end - rlim_stack;
- else
- stack_base = vma->vm_start - stack_expand;
+ stack_base = vma->vm_end - stack_expand;
#endif
current->mm->start_stack = bprm->p;
ret = expand_stack(vma, stack_base);
@@ -1297,6 +1295,10 @@ int begin_new_exec(struct linux_binprm * bprm)
bprm->mm = NULL;
+ retval = exec_task_namespaces();
+ if (retval)
+ goto out_unlock;
+
#ifdef CONFIG_POSIX_TIMERS
spin_lock_irq(&me->sighand->siglock);
posix_cpu_timers_exit(me);
@@ -1568,6 +1570,12 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
if (task_no_new_privs(current))
bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
+ /*
+ * If another task is sharing our fs, we cannot safely
+ * suid exec because the differently privileged task
+ * will be able to manipulate the current directory, etc.
+ * It would be nice to force an unshare instead...
+ */
t = p;
n_fs = 1;
spin_lock(&p->fs->lock);
@@ -1748,6 +1756,7 @@ static int search_binary_handler(struct linux_binprm *bprm)
return retval;
}
+/* binfmt handlers will call back into begin_new_exec() on success. */
static int exec_binprm(struct linux_binprm *bprm)
{
pid_t old_pid, old_vpid;
@@ -1806,6 +1815,11 @@ static int bprm_execve(struct linux_binprm *bprm,
if (retval)
return retval;
+ /*
+ * Check for unsafe execution states before exec_binprm(), which
+ * will call back into begin_new_exec(), into bprm_creds_from_file(),
+ * where setuid-ness is evaluated.
+ */
check_unsafe_exec(bprm);
current->in_execve = 1;
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index f4944c4dee60..78b8686d9a4a 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -277,7 +277,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
int best_ndir = inodes_per_group;
int best_group = -1;
- parent_group = prandom_u32_max(ngroups);
+ parent_group = get_random_u32_below(ngroups);
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
desc = ext2_get_group_desc (sb, group, NULL);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f1956288307f..6c399a8b22b3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5184,6 +5184,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
* and it is decreased till we reach start.
*/
again:
+ ret = 0;
if (SHIFT == SHIFT_LEFT)
iterator = &start;
else
@@ -5227,14 +5228,21 @@ again:
ext4_ext_get_actual_len(extent);
} else {
extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
- if (le32_to_cpu(extent->ee_block) > 0)
+ if (le32_to_cpu(extent->ee_block) > start)
*iterator = le32_to_cpu(extent->ee_block) - 1;
- else
- /* Beginning is reached, end of the loop */
+ else if (le32_to_cpu(extent->ee_block) == start)
iterator = NULL;
- /* Update path extent in case we need to stop */
- while (le32_to_cpu(extent->ee_block) < start)
+ else {
+ extent = EXT_LAST_EXTENT(path[depth].p_hdr);
+ while (le32_to_cpu(extent->ee_block) >= start)
+ extent--;
+
+ if (extent == EXT_LAST_EXTENT(path[depth].p_hdr))
+ break;
+
extent++;
+ iterator = NULL;
+ }
path[depth].p_ext = extent;
}
ret = ext4_ext_shift_path_extents(path, shift, inode,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index e9bc46684106..9fc1af8e19a3 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -465,7 +465,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4fs_dirhash(parent, qstr->name, qstr->len, &hinfo);
parent_group = hinfo.hash % ngroups;
} else
- parent_group = prandom_u32_max(ngroups);
+ parent_group = get_random_u32_below(ngroups);
for (i = 0; i < ngroups; i++) {
g = (parent_group + i) % ngroups;
get_orlov_stats(sb, g, flex_size, &stats);
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 588cb09c5291..4681fff6665f 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -262,13 +262,7 @@ void ext4_stop_mmpd(struct ext4_sb_info *sbi)
*/
static unsigned int mmp_new_seq(void)
{
- u32 new_seq;
-
- do {
- new_seq = get_random_u32();
- } while (new_seq > EXT4_MMP_SEQ_MAX);
-
- return new_seq;
+ return get_random_u32_below(EXT4_MMP_SEQ_MAX + 1);
}
/*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7cdd2138c897..63ef74eb8091 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3778,7 +3778,7 @@ cont_thread:
}
if (!progress) {
elr->lr_next_sched = jiffies +
- prandom_u32_max(EXT4_DEF_LI_MAX_START_DELAY * HZ);
+ get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
}
if (time_before(elr->lr_next_sched, next_wakeup))
next_wakeup = elr->lr_next_sched;
@@ -3925,8 +3925,7 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
* spread the inode table initialization requests
* better.
*/
- elr->lr_next_sched = jiffies + prandom_u32_max(
- EXT4_DEF_LI_MAX_START_DELAY * HZ);
+ elr->lr_next_sched = jiffies + get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
return elr;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 4546e01b2ee0..536d332d9e2e 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -282,7 +282,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
/* let's select beginning hot/small space first in no_heap mode*/
if (f2fs_need_rand_seg(sbi))
- p->offset = prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec);
+ p->offset = get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
else if (test_opt(sbi, NOHEAP) &&
(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
p->offset = 0;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index acf3d3fa4363..b304692c0cf5 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2534,7 +2534,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
sanity_check_seg_type(sbi, seg_type);
if (f2fs_need_rand_seg(sbi))
- return prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec);
+ return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
/* if segs_per_sec is large than 1, we need to keep original policy. */
if (__is_large_section(sbi))
@@ -2588,7 +2588,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
curseg->alloc_type = LFS;
if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
curseg->fragment_remained_chunk =
- prandom_u32_max(sbi->max_fragment_chunk) + 1;
+ get_random_u32_inclusive(1, sbi->max_fragment_chunk);
}
static int __next_free_blkoff(struct f2fs_sb_info *sbi,
@@ -2625,9 +2625,9 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
/* To allocate block chunks in different sizes, use random number */
if (--seg->fragment_remained_chunk <= 0) {
seg->fragment_remained_chunk =
- prandom_u32_max(sbi->max_fragment_chunk) + 1;
+ get_random_u32_inclusive(1, sbi->max_fragment_chunk);
seg->next_blkoff +=
- prandom_u32_max(sbi->max_fragment_hole) + 1;
+ get_random_u32_inclusive(1, sbi->max_fragment_hole);
}
}
}
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c
index af191371c352..3626eb585a98 100644
--- a/fs/fat/nfs.c
+++ b/fs/fat/nfs.c
@@ -17,7 +17,7 @@ struct fat_fid {
#define FAT_FID_SIZE_WITHOUT_PARENT 3
#define FAT_FID_SIZE_WITH_PARENT (sizeof(struct fat_fid)/sizeof(u32))
-/**
+/*
* Look up a directory inode given its starting cluster.
*/
static struct inode *fat_dget(struct super_block *sb, int i_logstart)
@@ -135,7 +135,7 @@ fat_encode_fh_nostale(struct inode *inode, __u32 *fh, int *lenp,
return type;
}
-/**
+/*
* Map a NFS file handle to a corresponding dentry.
* The dentry may or may not be connected to the filesystem root.
*/
diff --git a/fs/file.c b/fs/file.c
index 5f9c802a5d8d..c942c89ca4cd 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -1003,7 +1003,16 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
struct files_struct *files = current->files;
struct file *file;
- if (atomic_read(&files->count) == 1) {
+ /*
+ * If another thread is concurrently calling close_fd() followed
+ * by put_files_struct(), we must not observe the old table
+ * entry combined with the new refcount - otherwise we could
+ * return a file that is concurrently being freed.
+ *
+ * atomic_read_acquire() pairs with atomic_dec_and_test() in
+ * put_files_struct().
+ */
+ if (atomic_read_acquire(&files->count) == 1) {
file = files_lookup_fd_raw(files, fd);
if (!file || unlikely(file->f_mode & mask))
return 0;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 443f83382b9b..9958d4020771 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1712,18 +1712,26 @@ static int writeback_single_inode(struct inode *inode,
wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
/*
- * If the inode is now fully clean, then it can be safely removed from
- * its writeback list (if any). Otherwise the flusher threads are
- * responsible for the writeback lists.
+ * If the inode is freeing, its i_io_list shoudn't be updated
+ * as it can be finally deleted at this moment.
*/
- if (!(inode->i_state & I_DIRTY_ALL))
- inode_cgwb_move_to_attached(inode, wb);
- else if (!(inode->i_state & I_SYNC_QUEUED)) {
- if ((inode->i_state & I_DIRTY))
- redirty_tail_locked(inode, wb);
- else if (inode->i_state & I_DIRTY_TIME) {
- inode->dirtied_when = jiffies;
- inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
+ if (!(inode->i_state & I_FREEING)) {
+ /*
+ * If the inode is now fully clean, then it can be safely
+ * removed from its writeback list (if any). Otherwise the
+ * flusher threads are responsible for the writeback lists.
+ */
+ if (!(inode->i_state & I_DIRTY_ALL))
+ inode_cgwb_move_to_attached(inode, wb);
+ else if (!(inode->i_state & I_SYNC_QUEUED)) {
+ if ((inode->i_state & I_DIRTY))
+ redirty_tail_locked(inode, wb);
+ else if (inode->i_state & I_DIRTY_TIME) {
+ inode->dirtied_when = jiffies;
+ inode_io_list_move_locked(inode,
+ wb,
+ &wb->b_dirty_time);
+ }
}
}
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 451d8a077e12..bce2492186d0 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -605,6 +605,14 @@ again:
set_bit(FSCACHE_COOKIE_DO_PREP_TO_WRITE, &cookie->flags);
queue = true;
}
+ /*
+ * We could race with cookie_lru which may set LRU_DISCARD bit
+ * but has yet to run the cookie state machine. If this happens
+ * and another thread tries to use the cookie, clear LRU_DISCARD
+ * so we don't end up withdrawing the cookie while in use.
+ */
+ if (test_and_clear_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags))
+ fscache_see_cookie(cookie, fscache_cookie_see_lru_discard_clear);
break;
case FSCACHE_COOKIE_STATE_FAILED:
diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c
index a058e0136bfe..ab8ceddf9efa 100644
--- a/fs/fscache/volume.c
+++ b/fs/fscache/volume.c
@@ -203,7 +203,11 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
struct fscache_volume *volume;
struct fscache_cache *cache;
size_t klen, hlen;
- char *key;
+ u8 *key;
+
+ klen = strlen(volume_key);
+ if (klen > NAME_MAX)
+ return NULL;
if (!coherency_data)
coherency_len = 0;
@@ -229,7 +233,6 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
/* Stick the length on the front of the key and pad it out to make
* hashing easier.
*/
- klen = strlen(volume_key);
hlen = round_up(1 + klen + 1, sizeof(__le32));
key = kzalloc(hlen, GFP_KERNEL);
if (!key)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 71bfb663aac5..89f4741728ba 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2963,11 +2963,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
.mode = mode
};
int err;
- bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
- (mode & (FALLOC_FL_PUNCH_HOLE |
- FALLOC_FL_ZERO_RANGE));
-
- bool block_faults = FUSE_IS_DAX(inode) && lock_inode;
+ bool block_faults = FUSE_IS_DAX(inode) &&
+ (!(mode & FALLOC_FL_KEEP_SIZE) ||
+ (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)));
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_ZERO_RANGE))
@@ -2976,22 +2974,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (fm->fc->no_fallocate)
return -EOPNOTSUPP;
- if (lock_inode) {
- inode_lock(inode);
- if (block_faults) {
- filemap_invalidate_lock(inode->i_mapping);
- err = fuse_dax_break_layouts(inode, 0, 0);
- if (err)
- goto out;
- }
+ inode_lock(inode);
+ if (block_faults) {
+ filemap_invalidate_lock(inode->i_mapping);
+ err = fuse_dax_break_layouts(inode, 0, 0);
+ if (err)
+ goto out;
+ }
- if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) {
- loff_t endbyte = offset + length - 1;
+ if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) {
+ loff_t endbyte = offset + length - 1;
- err = fuse_writeback_range(inode, offset, endbyte);
- if (err)
- goto out;
- }
+ err = fuse_writeback_range(inode, offset, endbyte);
+ if (err)
+ goto out;
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
@@ -3039,8 +3035,7 @@ out:
if (block_faults)
filemap_invalidate_unlock(inode->i_mapping);
- if (lock_inode)
- inode_unlock(inode);
+ inode_unlock(inode);
fuse_flush_time_update(inode);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index c4526f16355d..a0746be3c1de 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -458,6 +458,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
/* panic? */
return -EIO;
+ if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN)
+ return -EIO;
fd.search_key->cat = HFS_I(main_inode)->cat_key;
if (hfs_brec_find(&fd))
/* panic? */
diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
index 39f5e343bf4d..fdb0edb8a607 100644
--- a/fs/hfs/trans.c
+++ b/fs/hfs/trans.c
@@ -109,7 +109,7 @@ void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, const struct qstr
if (nls_io) {
wchar_t ch;
- while (srclen > 0) {
+ while (srclen > 0 && dstlen > 0) {
size = nls_io->char2uni(src, srclen, &ch);
if (size < 0) {
ch = '?';
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index a5db2e3b2980..6aa919e59483 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -198,6 +198,8 @@ struct hfsplus_sb_info {
#define HFSPLUS_SB_HFSX 3
#define HFSPLUS_SB_CASEFOLD 4
#define HFSPLUS_SB_NOBARRIER 5
+#define HFSPLUS_SB_UID 6
+#define HFSPLUS_SB_GID 7
static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb)
{
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index aeab83ed1c9c..b675581aa9d0 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -192,11 +192,11 @@ static void hfsplus_get_perms(struct inode *inode,
mode = be16_to_cpu(perms->mode);
i_uid_write(inode, be32_to_cpu(perms->owner));
- if (!i_uid_read(inode) && !mode)
+ if ((test_bit(HFSPLUS_SB_UID, &sbi->flags)) || (!i_uid_read(inode) && !mode))
inode->i_uid = sbi->uid;
i_gid_write(inode, be32_to_cpu(perms->group));
- if (!i_gid_read(inode) && !mode)
+ if ((test_bit(HFSPLUS_SB_GID, &sbi->flags)) || (!i_gid_read(inode) && !mode))
inode->i_gid = sbi->gid;
if (dir) {
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 047e05c57560..c94a58762ad6 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -140,6 +140,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
if (!uid_valid(sbi->uid)) {
pr_err("invalid uid specified\n");
return 0;
+ } else {
+ set_bit(HFSPLUS_SB_UID, &sbi->flags);
}
break;
case opt_gid:
@@ -151,6 +153,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
if (!gid_valid(sbi->gid)) {
pr_err("invalid gid specified\n");
return 0;
+ } else {
+ set_bit(HFSPLUS_SB_GID, &sbi->flags);
}
break;
case opt_part:
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index 8de970d6146f..72e981c9a572 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -321,7 +321,7 @@ static int check_lock_range(struct file *filp, loff_t start, loff_t end,
unsigned char type)
{
struct file_lock *flock;
- struct file_lock_context *ctx = file_inode(filp)->i_flctx;
+ struct file_lock_context *ctx = locks_inode_context(file_inode(filp));
int error = 0;
if (!ctx || list_empty_careful(&ctx->flc_posix))
@@ -1794,9 +1794,9 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
ret = vfs_copy_file_range(src_fp->filp, src_off,
dst_fp->filp, dst_off, len, 0);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
- ret = generic_copy_file_range(src_fp->filp, src_off,
- dst_fp->filp, dst_off,
- len, 0);
+ ret = vfs_copy_file_range(src_fp->filp, src_off,
+ dst_fp->filp, dst_off, len,
+ COPY_FILE_SPLICE);
if (ret < 0)
return ret;
diff --git a/fs/libfs.c b/fs/libfs.c
index 682d56345a1c..aada4e7c8713 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -995,8 +995,8 @@ out:
EXPORT_SYMBOL_GPL(simple_attr_read);
/* interpret the buffer as a number to call the set function with */
-ssize_t simple_attr_write(struct file *file, const char __user *buf,
- size_t len, loff_t *ppos)
+static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos, bool is_signed)
{
struct simple_attr *attr;
unsigned long long val;
@@ -1017,7 +1017,10 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
goto out;
attr->set_buf[size] = '\0';
- ret = kstrtoull(attr->set_buf, 0, &val);
+ if (is_signed)
+ ret = kstrtoll(attr->set_buf, 0, &val);
+ else
+ ret = kstrtoull(attr->set_buf, 0, &val);
if (ret)
goto out;
ret = attr->set(attr->data, val);
@@ -1027,8 +1030,21 @@ out:
mutex_unlock(&attr->mutex);
return ret;
}
+
+ssize_t simple_attr_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return simple_attr_write_xsigned(file, buf, len, ppos, false);
+}
EXPORT_SYMBOL_GPL(simple_attr_write);
+ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return simple_attr_write_xsigned(file, buf, len, ppos, true);
+}
+EXPORT_SYMBOL_GPL(simple_attr_write_signed);
+
/**
* generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
* @sb: filesystem to do the file handle conversion on
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index e1c4617de771..720684345817 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -207,7 +207,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
{
struct inode *inode = nlmsvc_file_inode(file);
struct file_lock *fl;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
struct nlm_host *lockhost;
if (!flctx || list_empty_careful(&flctx->flc_posix))
@@ -262,7 +262,7 @@ nlm_file_inuse(struct nlm_file *file)
{
struct inode *inode = nlmsvc_file_inode(file);
struct file_lock *fl;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
return 1;
diff --git a/fs/locks.c b/fs/locks.c
index 607f94a0e789..8f01bee17715 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -175,7 +175,7 @@ locks_get_lock_context(struct inode *inode, int type)
struct file_lock_context *ctx;
/* paired with cmpxchg() below */
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (likely(ctx) || type == F_UNLCK)
goto out;
@@ -194,7 +194,7 @@ locks_get_lock_context(struct inode *inode, int type)
*/
if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
kmem_cache_free(flctx_cache, ctx);
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
}
out:
trace_locks_get_lock_context(inode, type, ctx);
@@ -247,7 +247,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list,
void
locks_free_lock_context(struct inode *inode)
{
- struct file_lock_context *ctx = inode->i_flctx;
+ struct file_lock_context *ctx = locks_inode_context(inode);
if (unlikely(ctx)) {
locks_check_ctx_lists(inode);
@@ -891,7 +891,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
void *owner;
void (*func)(void);
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx || list_empty_careful(&ctx->flc_posix)) {
fl->fl_type = F_UNLCK;
return;
@@ -1483,7 +1483,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
new_fl->fl_flags = type;
/* typically we will check that ctx is non-NULL before calling */
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx) {
WARN_ON_ONCE(1);
goto free_lock;
@@ -1588,7 +1588,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time)
struct file_lock_context *ctx;
struct file_lock *fl;
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
fl = list_first_entry_or_null(&ctx->flc_lease,
@@ -1634,7 +1634,7 @@ int fcntl_getlease(struct file *filp)
int type = F_UNLCK;
LIST_HEAD(dispose);
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
@@ -1823,7 +1823,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
struct file_lock_context *ctx;
LIST_HEAD(dispose);
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx) {
trace_generic_delete_lease(inode, NULL);
return error;
@@ -2096,7 +2096,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
* throw a warning to let people know that they don't actually work.
*/
if (cmd & LOCK_MAND) {
- pr_warn_once("Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n");
+ pr_warn_once("%s(%d): Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n", current->comm, current->pid);
return 0;
}
@@ -2146,6 +2146,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
*/
int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
+ WARN_ON_ONCE(filp != fl->fl_file);
if (filp->f_op->lock)
return filp->f_op->lock(filp, F_GETLK, fl);
posix_test_lock(filp, fl);
@@ -2295,6 +2296,7 @@ out:
*/
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
{
+ WARN_ON_ONCE(filp != fl->fl_file);
if (filp->f_op->lock)
return filp->f_op->lock(filp, cmd, fl);
else
@@ -2561,7 +2563,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
* posix_lock_file(). Another process could be setting a lock on this
* file at the same time, but we wouldn't remove that lock anyway.
*/
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx || list_empty(&ctx->flc_posix))
return;
@@ -2634,7 +2636,7 @@ void locks_remove_file(struct file *filp)
{
struct file_lock_context *ctx;
- ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
+ ctx = locks_inode_context(locks_inode(filp));
if (!ctx)
return;
@@ -2663,12 +2665,36 @@ void locks_remove_file(struct file *filp)
*/
int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
{
+ WARN_ON_ONCE(filp != fl->fl_file);
if (filp->f_op->lock)
return filp->f_op->lock(filp, F_CANCELLK, fl);
return 0;
}
EXPORT_SYMBOL_GPL(vfs_cancel_lock);
+/**
+ * vfs_inode_has_locks - are any file locks held on @inode?
+ * @inode: inode to check for locks
+ *
+ * Return true if there are any FL_POSIX or FL_FLOCK locks currently
+ * set on @inode.
+ */
+bool vfs_inode_has_locks(struct inode *inode)
+{
+ struct file_lock_context *ctx;
+ bool ret;
+
+ ctx = locks_inode_context(inode);
+ if (!ctx)
+ return false;
+
+ spin_lock(&ctx->flc_lock);
+ ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock);
+ spin_unlock(&ctx->flc_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(vfs_inode_has_locks);
+
#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -2839,7 +2865,7 @@ void show_fd_locks(struct seq_file *f,
struct file_lock_context *ctx;
int id = 0;
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx)
return;
diff --git a/fs/namei.c b/fs/namei.c
index 578c2110df02..9155ecb547ce 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3591,6 +3591,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns,
struct inode *dir = d_inode(parentpath->dentry);
struct inode *inode;
int error;
+ int open_flag = file->f_flags;
/* we want directory to be writable */
error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
@@ -3613,7 +3614,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns,
if (error)
return error;
inode = file_inode(file);
- if (!(file->f_flags & O_EXCL)) {
+ if (!(open_flag & O_EXCL)) {
spin_lock(&inode->i_lock);
inode->i_state |= I_LINKABLE;
spin_unlock(&inode->i_lock);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index ead8a0e06abf..cf7365581031 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -146,7 +146,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
{
struct inode *inode = state->inode;
struct file_lock *fl;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
struct list_head *list;
int status = 0;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a2d2d5d1b088..dd18344648f3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1501,7 +1501,7 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
struct file_lock *fl;
struct nfs4_lock_state *lsp;
int status = 0;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
struct list_head *list;
if (flctx == NULL)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 317cedfa52bf..16be6dae524f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -1055,7 +1055,7 @@ static unsigned int nfs_coalesce_size(struct nfs_page *prev,
if (prev) {
if (!nfs_match_open_context(nfs_req_openctx(req), nfs_req_openctx(prev)))
return 0;
- flctx = d_inode(nfs_req_openctx(req)->dentry)->i_flctx;
+ flctx = locks_inode_context(d_inode(nfs_req_openctx(req)->dentry));
if (flctx != NULL &&
!(list_empty_careful(&flctx->flc_posix) &&
list_empty_careful(&flctx->flc_flock)) &&
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f41d24b54fd1..80c240e50952 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1185,7 +1185,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct nfs_lock_context *l_ctx;
- struct file_lock_context *flctx = file_inode(file)->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(file_inode(file));
struct nfs_page *req;
int do_flush, status;
/*
@@ -1321,7 +1321,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page,
struct inode *inode, unsigned int pagelen)
{
int ret;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
struct file_lock *fl;
if (file->f_flags & O_DSYNC)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 836bd825ca4a..da8d0ea66229 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4758,7 +4758,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
static bool nfsd4_deleg_present(const struct inode *inode)
{
- struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx);
+ struct file_lock_context *ctx = locks_inode_context(inode);
return ctx && !list_empty_careful(&ctx->flc_lease);
}
@@ -5897,7 +5897,7 @@ nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
nf = stp->st_stid.sc_file;
- ctx = nf->fi_inode->i_flctx;
+ ctx = locks_inode_context(nf->fi_inode);
if (!ctx)
continue;
if (locks_owner_has_blockers(ctx, lo))
@@ -7713,7 +7713,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
}
inode = locks_inode(nf->nf_file);
- flctx = inode->i_flctx;
+ flctx = locks_inode_context(inode);
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
spin_lock(&flctx->flc_lock);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 51f453baa952..a996cbda3b88 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -596,8 +596,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
- ret = generic_copy_file_range(src, src_pos, dst, dst_pos,
- count, 0);
+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
+ COPY_FILE_SPLICE);
return ret;
}
@@ -871,10 +871,11 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct svc_rqst *rqstp = sd->u.data;
struct page *page = buf->page; // may be a compound one
unsigned offset = buf->offset;
+ struct page *last_page;
- page += offset / PAGE_SIZE;
- for (int i = sd->len; i > 0; i -= PAGE_SIZE)
- svc_rqst_replace_page(rqstp, page++);
+ last_page = page + (offset + sd->len - 1) / PAGE_SIZE;
+ for (page += offset / PAGE_SIZE; page <= last_page; page++)
+ svc_rqst_replace_page(rqstp, page);
if (rqstp->rq_res.page_len == 0) // first call
rqstp->rq_res.page_base = offset % PAGE_SIZE;
rqstp->rq_res.page_len += sd->len;
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 3b55e239705f..9930fa901039 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat,
kunmap_atomic(kaddr);
nilfs_dat_commit_entry(dat, req);
+
+ if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) {
+ nilfs_error(dat->i_sb,
+ "state inconsistency probably due to duplicate use of vblocknr = %llu",
+ (unsigned long long)req->pr_entry_nr);
+ return;
+ }
nilfs_palloc_commit_free_entry(dat, req);
}
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 77ff8e95421f..dc359b56fdfa 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -495,14 +495,22 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
{
struct buffer_head *bh;
+ void *kaddr;
+ struct nilfs_segment_usage *su;
int ret;
+ down_write(&NILFS_MDT(sufile)->mi_sem);
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
if (!ret) {
mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
+ kaddr = kmap_atomic(bh->b_page);
+ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+ nilfs_segment_usage_set_dirty(su);
+ kunmap_atomic(kaddr);
brelse(bh);
}
+ up_write(&NILFS_MDT(sufile)->mi_sem);
return ret;
}
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index c8b89b4f94e0..2064e6473d30 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -13,6 +13,7 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/random.h>
+#include <linux/log2.h>
#include <linux/crc32.h>
#include "nilfs.h"
#include "segment.h"
@@ -193,6 +194,34 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs,
}
/**
+ * nilfs_get_blocksize - get block size from raw superblock data
+ * @sb: super block instance
+ * @sbp: superblock raw data buffer
+ * @blocksize: place to store block size
+ *
+ * nilfs_get_blocksize() calculates the block size from the block size
+ * exponent information written in @sbp and stores it in @blocksize,
+ * or aborts with an error message if it's too large.
+ *
+ * Return Value: On success, 0 is returned. If the block size is too
+ * large, -EINVAL is returned.
+ */
+static int nilfs_get_blocksize(struct super_block *sb,
+ struct nilfs_super_block *sbp, int *blocksize)
+{
+ unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size);
+
+ if (unlikely(shift_bits >
+ ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS)) {
+ nilfs_err(sb, "too large filesystem blocksize: 2 ^ %u KiB",
+ shift_bits);
+ return -EINVAL;
+ }
+ *blocksize = BLOCK_SIZE << shift_bits;
+ return 0;
+}
+
+/**
* load_nilfs - load and recover the nilfs
* @nilfs: the_nilfs structure to be released
* @sb: super block instance used to recover past segment
@@ -245,11 +274,15 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime);
/* verify consistency between two super blocks */
- blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size);
+ err = nilfs_get_blocksize(sb, sbp[0], &blocksize);
+ if (err)
+ goto scan_error;
+
if (blocksize != nilfs->ns_blocksize) {
nilfs_warn(sb,
"blocksize differs between two super blocks (%d != %d)",
blocksize, nilfs->ns_blocksize);
+ err = -EINVAL;
goto scan_error;
}
@@ -443,11 +476,33 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
return crc == le32_to_cpu(sbp->s_sum);
}
-static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset)
+/**
+ * nilfs_sb2_bad_offset - check the location of the second superblock
+ * @sbp: superblock raw data buffer
+ * @offset: byte offset of second superblock calculated from device size
+ *
+ * nilfs_sb2_bad_offset() checks if the position on the second
+ * superblock is valid or not based on the filesystem parameters
+ * stored in @sbp. If @offset points to a location within the segment
+ * area, or if the parameters themselves are not normal, it is
+ * determined to be invalid.
+ *
+ * Return Value: true if invalid, false if valid.
+ */
+static bool nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset)
{
- return offset < ((le64_to_cpu(sbp->s_nsegments) *
- le32_to_cpu(sbp->s_blocks_per_segment)) <<
- (le32_to_cpu(sbp->s_log_block_size) + 10));
+ unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size);
+ u32 blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment);
+ u64 nsegments = le64_to_cpu(sbp->s_nsegments);
+ u64 index;
+
+ if (blocks_per_segment < NILFS_SEG_MIN_BLOCKS ||
+ shift_bits > ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS)
+ return true;
+
+ index = offset >> (shift_bits + BLOCK_SIZE_BITS);
+ do_div(index, blocks_per_segment);
+ return index < nsegments;
}
static void nilfs_release_super_block(struct the_nilfs *nilfs)
@@ -586,9 +641,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto failed_sbh;
- blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
- if (blocksize < NILFS_MIN_BLOCK_SIZE ||
- blocksize > NILFS_MAX_BLOCK_SIZE) {
+ err = nilfs_get_blocksize(sb, sbp, &blocksize);
+ if (err)
+ goto failed_sbh;
+
+ if (blocksize < NILFS_MIN_BLOCK_SIZE) {
nilfs_err(sb,
"couldn't mount because of unsupported filesystem blocksize %d",
blocksize);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index b13d344d40b6..60b97c92e2b2 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -335,7 +335,7 @@ static void o2hb_arm_timeout(struct o2hb_region *reg)
/* negotiate timeout must be less than write timeout. */
schedule_delayed_work(&reg->hr_nego_timeout_work,
msecs_to_jiffies(O2HB_NEGO_TIMEOUT_MS));
- memset(reg->hr_nego_node_bitmap, 0, sizeof(reg->hr_nego_node_bitmap));
+ bitmap_zero(reg->hr_nego_node_bitmap, O2NM_MAX_NODES);
}
static void o2hb_disarm_timeout(struct o2hb_region *reg)
@@ -375,7 +375,7 @@ static void o2hb_nego_timeout(struct work_struct *work)
if (reg->hr_last_hb_status)
return;
- o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
+ o2hb_fill_node_map(live_node_bitmap, O2NM_MAX_NODES);
/* lowest node as master node to make negotiate decision. */
master_node = find_first_bit(live_node_bitmap, O2NM_MAX_NODES);
@@ -386,8 +386,8 @@ static void o2hb_nego_timeout(struct work_struct *work)
config_item_name(&reg->hr_item), reg->hr_bdev);
set_bit(master_node, reg->hr_nego_node_bitmap);
}
- if (memcmp(reg->hr_nego_node_bitmap, live_node_bitmap,
- sizeof(reg->hr_nego_node_bitmap))) {
+ if (!bitmap_equal(reg->hr_nego_node_bitmap, live_node_bitmap,
+ O2NM_MAX_NODES)) {
/* check negotiate bitmap every second to do timeout
* approve decision.
*/
@@ -856,8 +856,8 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg)
* live nodes heartbeat on it. In other words, the region has been
* added to all nodes.
*/
- if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap,
- sizeof(o2hb_live_node_bitmap)))
+ if (!bitmap_equal(reg->hr_live_node_bitmap, o2hb_live_node_bitmap,
+ O2NM_MAX_NODES))
goto unlock;
printk(KERN_NOTICE "o2hb: Region %s (%pg) is now a quorum device\n",
@@ -1087,7 +1087,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
* If a node is not configured but is in the livemap, we still need
* to read the slot so as to be able to remove it from the livemap.
*/
- o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
+ o2hb_fill_node_map(live_node_bitmap, O2NM_MAX_NODES);
i = -1;
while ((i = find_next_bit(live_node_bitmap,
O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
@@ -1437,11 +1437,11 @@ void o2hb_init(void)
for (i = 0; i < ARRAY_SIZE(o2hb_live_slots); i++)
INIT_LIST_HEAD(&o2hb_live_slots[i]);
- memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
- memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap));
- memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap));
- memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
- memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
+ bitmap_zero(o2hb_live_node_bitmap, O2NM_MAX_NODES);
+ bitmap_zero(o2hb_region_bitmap, O2NM_MAX_REGIONS);
+ bitmap_zero(o2hb_live_region_bitmap, O2NM_MAX_REGIONS);
+ bitmap_zero(o2hb_quorum_region_bitmap, O2NM_MAX_REGIONS);
+ bitmap_zero(o2hb_failed_region_bitmap, O2NM_MAX_REGIONS);
o2hb_dependent_users = 0;
@@ -1450,23 +1450,21 @@ void o2hb_init(void)
/* if we're already in a callback then we're already serialized by the sem */
static void o2hb_fill_node_map_from_callback(unsigned long *map,
- unsigned bytes)
+ unsigned int bits)
{
- BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long)));
-
- memcpy(map, &o2hb_live_node_bitmap, bytes);
+ bitmap_copy(map, o2hb_live_node_bitmap, bits);
}
/*
* get a map of all nodes that are heartbeating in any regions
*/
-void o2hb_fill_node_map(unsigned long *map, unsigned bytes)
+void o2hb_fill_node_map(unsigned long *map, unsigned int bits)
{
/* callers want to serialize this map and callbacks so that they
* can trust that they don't miss nodes coming to the party */
down_read(&o2hb_callback_sem);
spin_lock(&o2hb_live_lock);
- o2hb_fill_node_map_from_callback(map, bytes);
+ o2hb_fill_node_map_from_callback(map, bits);
spin_unlock(&o2hb_live_lock);
up_read(&o2hb_callback_sem);
}
@@ -2460,7 +2458,7 @@ int o2hb_check_node_heartbeating_no_sem(u8 node_num)
unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
spin_lock(&o2hb_live_lock);
- o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map));
+ o2hb_fill_node_map_from_callback(testing_map, O2NM_MAX_NODES);
spin_unlock(&o2hb_live_lock);
if (!test_bit(node_num, testing_map)) {
mlog(ML_HEARTBEAT,
@@ -2477,7 +2475,7 @@ int o2hb_check_node_heartbeating_from_callback(u8 node_num)
{
unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
- o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map));
+ o2hb_fill_node_map_from_callback(testing_map, O2NM_MAX_NODES);
if (!test_bit(node_num, testing_map)) {
mlog(ML_HEARTBEAT,
"node (%u) does not have heartbeating enabled.\n",
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h
index 1d4100abf6f8..8ef8c1b9eeb7 100644
--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -59,7 +59,7 @@ int o2hb_register_callback(const char *region_uuid,
void o2hb_unregister_callback(const char *region_uuid,
struct o2hb_callback_func *hc);
void o2hb_fill_node_map(unsigned long *map,
- unsigned bytes);
+ unsigned int bits);
void o2hb_exit(void);
void o2hb_init(void);
int o2hb_check_node_heartbeating_no_sem(u8 node_num);
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 7524994e3199..35c05c18de59 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -438,7 +438,7 @@ static int o2net_fill_bitmap(char *buf, int len)
unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
int i = -1, out = 0;
- o2net_fill_node_map(map, sizeof(map));
+ o2net_fill_node_map(map, O2NM_MAX_NODES);
while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES)
out += scnprintf(buf + out, PAGE_SIZE - out, "%d ", i);
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 27fee68f860a..2f61d39e4e50 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -54,7 +54,7 @@ int o2nm_configured_node_map(unsigned long *map, unsigned bytes)
return -EINVAL;
read_lock(&cluster->cl_nodes_lock);
- memcpy(map, cluster->cl_nodes_bitmap, sizeof(cluster->cl_nodes_bitmap));
+ bitmap_copy(map, cluster->cl_nodes_bitmap, O2NM_MAX_NODES);
read_unlock(&cluster->cl_nodes_lock);
return 0;
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 785cabd71d67..37d222bdfc8c 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -990,14 +990,12 @@ static int o2net_tx_can_proceed(struct o2net_node *nn,
}
/* Get a map of all nodes to which this node is currently connected to */
-void o2net_fill_node_map(unsigned long *map, unsigned bytes)
+void o2net_fill_node_map(unsigned long *map, unsigned int bits)
{
struct o2net_sock_container *sc;
int node, ret;
- BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long)));
-
- memset(map, 0, bytes);
+ bitmap_zero(map, bits);
for (node = 0; node < O2NM_MAX_NODES; ++node) {
if (!o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret))
continue;
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index fd2022712167..20f790a47484 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -1094,7 +1094,7 @@ static inline enum dlm_status dlm_err_to_dlm_status(int err)
static inline void dlm_node_iter_init(unsigned long *map,
struct dlm_node_iter *iter)
{
- memcpy(iter->node_map, map, sizeof(iter->node_map));
+ bitmap_copy(iter->node_map, map, O2NM_MAX_NODES);
iter->curnode = -1;
}
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index c4eccd499db8..5c04dde99981 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1576,8 +1576,8 @@ static int dlm_should_restart_join(struct dlm_ctxt *dlm,
spin_lock(&dlm->spinlock);
/* For now, we restart the process if the node maps have
* changed at all */
- ret = memcmp(ctxt->live_map, dlm->live_nodes_map,
- sizeof(dlm->live_nodes_map));
+ ret = !bitmap_equal(ctxt->live_map, dlm->live_nodes_map,
+ O2NM_MAX_NODES);
spin_unlock(&dlm->spinlock);
if (ret)
@@ -1604,13 +1604,11 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
/* group sem locking should work for us here -- we're already
* registered for heartbeat events so filling this should be
* atomic wrt getting those handlers called. */
- o2hb_fill_node_map(dlm->live_nodes_map, sizeof(dlm->live_nodes_map));
+ o2hb_fill_node_map(dlm->live_nodes_map, O2NM_MAX_NODES);
spin_lock(&dlm->spinlock);
- memcpy(ctxt->live_map, dlm->live_nodes_map, sizeof(ctxt->live_map));
-
+ bitmap_copy(ctxt->live_map, dlm->live_nodes_map, O2NM_MAX_NODES);
__dlm_set_joining_node(dlm, dlm->node_num);
-
spin_unlock(&dlm->spinlock);
node = -1;
@@ -1643,8 +1641,7 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
* yes_resp_map. Copy that into our domain map and send a join
* assert message to clean up everyone elses state. */
spin_lock(&dlm->spinlock);
- memcpy(dlm->domain_map, ctxt->yes_resp_map,
- sizeof(ctxt->yes_resp_map));
+ bitmap_copy(dlm->domain_map, ctxt->yes_resp_map, O2NM_MAX_NODES);
set_bit(dlm->node_num, dlm->domain_map);
spin_unlock(&dlm->spinlock);
@@ -2009,9 +2006,9 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
mlog(0, "dlm->recovery_map=%p, &(dlm->recovery_map[0])=%p\n",
dlm->recovery_map, &(dlm->recovery_map[0]));
- memset(dlm->recovery_map, 0, sizeof(dlm->recovery_map));
- memset(dlm->live_nodes_map, 0, sizeof(dlm->live_nodes_map));
- memset(dlm->domain_map, 0, sizeof(dlm->domain_map));
+ bitmap_zero(dlm->recovery_map, O2NM_MAX_NODES);
+ bitmap_zero(dlm->live_nodes_map, O2NM_MAX_NODES);
+ bitmap_zero(dlm->domain_map, O2NM_MAX_NODES);
dlm->dlm_thread_task = NULL;
dlm->dlm_reco_thread_task = NULL;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 227da5b1b6ab..d610da8e2f24 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -258,12 +258,12 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle,
mle->type = type;
INIT_HLIST_NODE(&mle->master_hash_node);
INIT_LIST_HEAD(&mle->hb_events);
- memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
+ bitmap_zero(mle->maybe_map, O2NM_MAX_NODES);
spin_lock_init(&mle->spinlock);
init_waitqueue_head(&mle->wq);
atomic_set(&mle->woken, 0);
kref_init(&mle->mle_refs);
- memset(mle->response_map, 0, sizeof(mle->response_map));
+ bitmap_zero(mle->response_map, O2NM_MAX_NODES);
mle->master = O2NM_MAX_NODES;
mle->new_master = O2NM_MAX_NODES;
mle->inuse = 0;
@@ -290,8 +290,8 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle,
atomic_inc(&dlm->mle_cur_count[mle->type]);
/* copy off the node_map and register hb callbacks on our copy */
- memcpy(mle->node_map, dlm->domain_map, sizeof(mle->node_map));
- memcpy(mle->vote_map, dlm->domain_map, sizeof(mle->vote_map));
+ bitmap_copy(mle->node_map, dlm->domain_map, O2NM_MAX_NODES);
+ bitmap_copy(mle->vote_map, dlm->domain_map, O2NM_MAX_NODES);
clear_bit(dlm->node_num, mle->vote_map);
clear_bit(dlm->node_num, mle->node_map);
@@ -572,7 +572,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
spin_unlock(&dlm->track_lock);
memset(res->lvb, 0, DLM_LVB_LEN);
- memset(res->refmap, 0, sizeof(res->refmap));
+ bitmap_zero(res->refmap, O2NM_MAX_NODES);
}
struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
@@ -1036,10 +1036,10 @@ recheck:
spin_lock(&mle->spinlock);
m = mle->master;
- map_changed = (memcmp(mle->vote_map, mle->node_map,
- sizeof(mle->vote_map)) != 0);
- voting_done = (memcmp(mle->vote_map, mle->response_map,
- sizeof(mle->vote_map)) == 0);
+ map_changed = !bitmap_equal(mle->vote_map, mle->node_map,
+ O2NM_MAX_NODES);
+ voting_done = bitmap_equal(mle->vote_map, mle->response_map,
+ O2NM_MAX_NODES);
/* restart if we hit any errors */
if (map_changed) {
@@ -1277,11 +1277,11 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
/* now blank out everything, as if we had never
* contacted anyone */
- memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
- memset(mle->response_map, 0, sizeof(mle->response_map));
+ bitmap_zero(mle->maybe_map, O2NM_MAX_NODES);
+ bitmap_zero(mle->response_map, O2NM_MAX_NODES);
/* reset the vote_map to the current node_map */
- memcpy(mle->vote_map, mle->node_map,
- sizeof(mle->node_map));
+ bitmap_copy(mle->vote_map, mle->node_map,
+ O2NM_MAX_NODES);
/* put myself into the maybe map */
if (mle->type != DLM_MLE_BLOCK)
set_bit(dlm->node_num, mle->maybe_map);
@@ -2094,7 +2094,7 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
flags = item->u.am.flags;
spin_lock(&dlm->spinlock);
- memcpy(nodemap, dlm->domain_map, sizeof(nodemap));
+ bitmap_copy(nodemap, dlm->domain_map, O2NM_MAX_NODES);
spin_unlock(&dlm->spinlock);
clear_bit(dlm->node_num, nodemap);
@@ -3447,7 +3447,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
ret = 0;
}
- memset(iter.node_map, 0, sizeof(iter.node_map));
+ bitmap_zero(iter.node_map, O2NM_MAX_NODES);
set_bit(old_master, iter.node_map);
mlog(0, "doing assert master of %.*s back to %u\n",
res->lockname.len, res->lockname.name, old_master);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 52ad342fec3e..50da8af988c1 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -733,7 +733,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
struct dlm_reco_node_data *ndata;
spin_lock(&dlm->spinlock);
- memcpy(dlm->reco.node_map, dlm->domain_map, sizeof(dlm->domain_map));
+ bitmap_copy(dlm->reco.node_map, dlm->domain_map, O2NM_MAX_NODES);
/* nodes can only be removed (by dying) after dropping
* this lock, and death will be trapped later, so this should do */
spin_unlock(&dlm->spinlock);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 126671e6caed..3fb98b4569a2 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -157,7 +157,7 @@ static void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
replay_map->rm_state = REPLAY_DONE;
}
-static void ocfs2_free_replay_slots(struct ocfs2_super *osb)
+void ocfs2_free_replay_slots(struct ocfs2_super *osb)
{
struct ocfs2_replay_map *replay_map = osb->replay_map;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 969d0aa28718..41c382f68529 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -150,6 +150,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb);
void ocfs2_recovery_exit(struct ocfs2_super *osb);
int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
+void ocfs2_free_replay_slots(struct ocfs2_super *osb);
/*
* Journal Control:
* Initialize, Load, Shutdown, Wipe a journal.
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 740b64238312..a503c553bab2 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -560,8 +560,7 @@ static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di)
u32 nlink = le16_to_cpu(di->i_links_count);
u32 hi = le16_to_cpu(di->i_links_count_hi);
- if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL))
- nlink |= (hi << OCFS2_LINKS_HI_SHIFT);
+ nlink |= (hi << OCFS2_LINKS_HI_SHIFT);
return nlink;
}
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 88f75f7f02d7..c973c03f6fd8 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -273,17 +273,17 @@ static int o2cb_cluster_check(void)
*/
#define O2CB_MAP_STABILIZE_COUNT 60
for (i = 0; i < O2CB_MAP_STABILIZE_COUNT; ++i) {
- o2hb_fill_node_map(hbmap, sizeof(hbmap));
+ o2hb_fill_node_map(hbmap, O2NM_MAX_NODES);
if (!test_bit(node_num, hbmap)) {
printk(KERN_ERR "o2cb: %s heartbeat has not been "
"started.\n", (o2hb_global_heartbeat_active() ?
"Global" : "Local"));
return -EINVAL;
}
- o2net_fill_node_map(netmap, sizeof(netmap));
+ o2net_fill_node_map(netmap, O2NM_MAX_NODES);
/* Force set the current node to allow easy compare */
set_bit(node_num, netmap);
- if (!memcmp(hbmap, netmap, sizeof(hbmap)))
+ if (bitmap_equal(hbmap, netmap, O2NM_MAX_NODES))
return 0;
if (i < O2CB_MAP_STABILIZE_COUNT - 1)
msleep(1000);
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 317126261523..a8d5ca98fa57 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -669,6 +669,8 @@ static struct ctl_table_header *ocfs2_table_header;
static int __init ocfs2_stack_glue_init(void)
{
+ int ret;
+
strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);
ocfs2_table_header = register_sysctl("fs/ocfs2/nm", ocfs2_nm_table);
@@ -678,7 +680,11 @@ static int __init ocfs2_stack_glue_init(void)
return -ENOMEM; /* or something. */
}
- return ocfs2_sysfs_init();
+ ret = ocfs2_sysfs_init();
+ if (ret)
+ unregister_sysctl_table(ocfs2_table_header);
+
+ return ret;
}
static void __exit ocfs2_stack_glue_exit(void)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 42c993e53924..0b0e6a132101 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1159,6 +1159,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
out_dismount:
atomic_set(&osb->vol_state, VOLUME_DISABLED);
wake_up(&osb->osb_mount_event);
+ ocfs2_free_replay_slots(osb);
ocfs2_dismount_volume(sb, 1);
goto out;
@@ -1822,12 +1823,14 @@ static int ocfs2_mount_volume(struct super_block *sb)
status = ocfs2_truncate_log_init(osb);
if (status < 0) {
mlog_errno(status);
- goto out_system_inodes;
+ goto out_check_volume;
}
ocfs2_super_unlock(osb, 1);
return 0;
+out_check_volume:
+ ocfs2_free_replay_slots(osb);
out_system_inodes:
if (osb->local_alloc_state == OCFS2_LA_ENABLED)
ocfs2_shutdown_local_alloc(osb);
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c
index fa762c5fbcb2..91fe1597af7b 100644
--- a/fs/proc/cmdline.c
+++ b/fs/proc/cmdline.c
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include "internal.h"
static int cmdline_proc_show(struct seq_file *m, void *v)
{
@@ -13,7 +14,10 @@ static int cmdline_proc_show(struct seq_file *m, void *v)
static int __init proc_cmdline_init(void)
{
- proc_create_single("cmdline", 0, NULL, cmdline_proc_show);
+ struct proc_dir_entry *pde;
+
+ pde = proc_create_single("cmdline", 0, NULL, cmdline_proc_show);
+ pde->size = saved_command_line_len + 1;
return 0;
}
fs_initcall(proc_cmdline_init);
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index dfe6ce3505ce..e0758fe7936d 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -33,7 +33,16 @@ static int show_console_dev(struct seq_file *m, void *v)
if (con->device) {
const struct tty_driver *driver;
int index;
+
+ /*
+ * Take console_lock to serialize device() callback with
+ * other console operations. For example, fg_console is
+ * modified under console_lock when switching vt.
+ */
+ console_lock();
driver = con->device(con, &index);
+ console_unlock();
+
if (driver) {
dev = MKDEV(driver->major, driver->minor_start);
dev += index;
@@ -63,7 +72,12 @@ static void *c_start(struct seq_file *m, loff_t *pos)
struct console *con;
loff_t off = 0;
- console_lock();
+ /*
+ * Hold the console_list_lock to guarantee safe traversal of the
+ * console list. SRCU cannot be used because there is no
+ * place to store the SRCU cookie.
+ */
+ console_list_lock();
for_each_console(con)
if (off++ == *pos)
break;
@@ -74,13 +88,14 @@ static void *c_start(struct seq_file *m, loff_t *pos)
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
struct console *con = v;
+
++*pos;
- return con->next;
+ return hlist_entry_safe(con->node.next, struct console, node);
}
static void c_stop(struct seq_file *m, void *v)
{
- console_unlock();
+ console_list_unlock();
}
static const struct seq_operations consoles_op = {
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 913bef0d2a36..fc46d6fe080c 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -7,6 +7,7 @@
#include <linux/namei.h>
#include <linux/pid.h>
#include <linux/ptrace.h>
+#include <linux/bitmap.h>
#include <linux/security.h>
#include <linux/file.h>
#include <linux/seq_file.h>
@@ -279,6 +280,30 @@ out:
return 0;
}
+static int proc_readfd_count(struct inode *inode, loff_t *count)
+{
+ struct task_struct *p = get_proc_task(inode);
+ struct fdtable *fdt;
+
+ if (!p)
+ return -ENOENT;
+
+ task_lock(p);
+ if (p->files) {
+ rcu_read_lock();
+
+ fdt = files_fdtable(p->files);
+ *count = bitmap_weight(fdt->open_fds, fdt->max_fds);
+
+ rcu_read_unlock();
+ }
+ task_unlock(p);
+
+ put_task_struct(p);
+
+ return 0;
+}
+
static int proc_readfd(struct file *file, struct dir_context *ctx)
{
return proc_readfd_common(file, ctx, proc_fd_instantiate);
@@ -319,9 +344,29 @@ int proc_fd_permission(struct user_namespace *mnt_userns,
return rv;
}
+static int proc_fd_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
+{
+ struct inode *inode = d_inode(path->dentry);
+ int rv = 0;
+
+ generic_fillattr(&init_user_ns, inode, stat);
+
+ /* If it's a directory, put the number of open fds there */
+ if (S_ISDIR(inode->i_mode)) {
+ rv = proc_readfd_count(inode, &stat->size);
+ if (rv < 0)
+ return rv;
+ }
+
+ return rv;
+}
+
const struct inode_operations proc_fd_inode_operations = {
.lookup = proc_lookupfd,
.permission = proc_fd_permission,
+ .getattr = proc_fd_getattr,
.setattr = proc_setattr,
};
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 5101131e6047..440960110a42 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -115,7 +115,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#endif
show_val_kb(m, "PageTables: ",
global_node_page_state(NR_PAGETABLE));
- show_val_kb(m, "SecPageTables: ",
+ show_val_kb(m, "SecPageTables: ",
global_node_page_state(NR_SECONDARY_PAGETABLE));
show_val_kb(m, "NFS_Unstable: ", 0);
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 5aa527ca6dbe..09a81e4b1273 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -1567,6 +1567,7 @@ static int __init vmcore_init(void)
return rc;
rc = parse_crash_elf_headers();
if (rc) {
+ elfcorehdr_free(elfcorehdr_addr);
pr_warn("Kdump: vmcore not initialized\n");
return rc;
}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 0c034ea39954..cbc0b468c1ab 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -89,6 +89,11 @@ static char *compress =
module_param(compress, charp, 0444);
MODULE_PARM_DESC(compress, "compression to use");
+/* How much of the kernel log to snapshot */
+unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES;
+module_param(kmsg_bytes, ulong, 0444);
+MODULE_PARM_DESC(kmsg_bytes, "amount of kernel log to snapshot (in bytes)");
+
/* Compression parameters */
static struct crypto_comp *tfm;
@@ -100,9 +105,6 @@ struct pstore_zbackend {
static char *big_oops_buf;
static size_t big_oops_buf_sz;
-/* How much of the console log to snapshot */
-unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES;
-
void pstore_set_kmsg_bytes(int bytes)
{
kmsg_bytes = bytes;
@@ -391,6 +393,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
const char *why;
unsigned int part = 1;
unsigned long flags = 0;
+ int saved_ret = 0;
int ret;
why = kmsg_dump_reason_str(reason);
@@ -461,12 +464,21 @@ static void pstore_dump(struct kmsg_dumper *dumper,
if (ret == 0 && reason == KMSG_DUMP_OOPS) {
pstore_new_entry = 1;
pstore_timer_kick();
+ } else {
+ /* Preserve only the first non-zero returned value. */
+ if (!saved_ret)
+ saved_ret = ret;
}
total += record.size;
part++;
}
spin_unlock_irqrestore(&psinfo->buf_lock, flags);
+
+ if (saved_ret) {
+ pr_err_once("backend (%s) writing error (%d)\n", psinfo->name,
+ saved_ret);
+ }
}
static struct kmsg_dumper pstore_dumper = {
@@ -562,8 +574,9 @@ out:
int pstore_register(struct pstore_info *psi)
{
if (backend && strcmp(backend, psi->name)) {
- pr_warn("ignoring unexpected backend '%s'\n", psi->name);
- return -EPERM;
+ pr_warn("backend '%s' already in use: ignoring '%s'\n",
+ backend, psi->name);
+ return -EBUSY;
}
/* Sanity check flags. */
@@ -662,6 +675,8 @@ void pstore_unregister(struct pstore_info *psi)
psinfo = NULL;
kfree(backend);
backend = NULL;
+
+ pr_info("Unregistered %s as persistent store backend\n", psi->name);
mutex_unlock(&psinfo_lock);
}
EXPORT_SYMBOL_GPL(pstore_unregister);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index fefe3d391d3a..9a5052431fd3 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -18,10 +18,11 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/compiler.h>
-#include <linux/pstore_ram.h>
#include <linux/of.h>
#include <linux/of_address.h>
+
#include "internal.h"
+#include "ram_internal.h"
#define RAMOOPS_KERNMSG_HDR "===="
#define MIN_MEM_SIZE 4096UL
@@ -451,20 +452,28 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
{
int i;
+ /* Free pmsg PRZ */
+ persistent_ram_free(&cxt->mprz);
+
+ /* Free console PRZ */
+ persistent_ram_free(&cxt->cprz);
+
/* Free dump PRZs */
if (cxt->dprzs) {
for (i = 0; i < cxt->max_dump_cnt; i++)
- persistent_ram_free(cxt->dprzs[i]);
+ persistent_ram_free(&cxt->dprzs[i]);
kfree(cxt->dprzs);
+ cxt->dprzs = NULL;
cxt->max_dump_cnt = 0;
}
/* Free ftrace PRZs */
if (cxt->fprzs) {
for (i = 0; i < cxt->max_ftrace_cnt; i++)
- persistent_ram_free(cxt->fprzs[i]);
+ persistent_ram_free(&cxt->fprzs[i]);
kfree(cxt->fprzs);
+ cxt->fprzs = NULL;
cxt->max_ftrace_cnt = 0;
}
}
@@ -548,9 +557,10 @@ static int ramoops_init_przs(const char *name,
while (i > 0) {
i--;
- persistent_ram_free(prz_ar[i]);
+ persistent_ram_free(&prz_ar[i]);
}
kfree(prz_ar);
+ prz_ar = NULL;
goto fail;
}
*paddr += zone_sz;
@@ -735,6 +745,7 @@ static int ramoops_probe(struct platform_device *pdev)
/* Make sure we didn't get bogus platform data pointer. */
if (!pdata) {
pr_err("NULL platform data\n");
+ err = -EINVAL;
goto fail_out;
}
@@ -742,6 +753,7 @@ static int ramoops_probe(struct platform_device *pdev)
!pdata->ftrace_size && !pdata->pmsg_size)) {
pr_err("The memory size and the record/console size must be "
"non-zero\n");
+ err = -EINVAL;
goto fail_out;
}
@@ -772,12 +784,17 @@ static int ramoops_probe(struct platform_device *pdev)
dump_mem_sz, cxt->record_size,
&cxt->max_dump_cnt, 0, 0);
if (err)
- goto fail_out;
+ goto fail_init;
err = ramoops_init_prz("console", dev, cxt, &cxt->cprz, &paddr,
cxt->console_size, 0);
if (err)
- goto fail_init_cprz;
+ goto fail_init;
+
+ err = ramoops_init_prz("pmsg", dev, cxt, &cxt->mprz, &paddr,
+ cxt->pmsg_size, 0);
+ if (err)
+ goto fail_init;
cxt->max_ftrace_cnt = (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)
? nr_cpu_ids
@@ -788,12 +805,7 @@ static int ramoops_probe(struct platform_device *pdev)
(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)
? PRZ_FLAG_NO_LOCK : 0);
if (err)
- goto fail_init_fprz;
-
- err = ramoops_init_prz("pmsg", dev, cxt, &cxt->mprz, &paddr,
- cxt->pmsg_size, 0);
- if (err)
- goto fail_init_mprz;
+ goto fail_init;
cxt->pstore.data = cxt;
/*
@@ -857,11 +869,7 @@ fail_buf:
kfree(cxt->pstore.buf);
fail_clear:
cxt->pstore.bufsize = 0;
- persistent_ram_free(cxt->mprz);
-fail_init_mprz:
-fail_init_fprz:
- persistent_ram_free(cxt->cprz);
-fail_init_cprz:
+fail_init:
ramoops_free_przs(cxt);
fail_out:
return err;
@@ -876,8 +884,6 @@ static int ramoops_remove(struct platform_device *pdev)
kfree(cxt->pstore.buf);
cxt->pstore.bufsize = 0;
- persistent_ram_free(cxt->mprz);
- persistent_ram_free(cxt->cprz);
ramoops_free_przs(cxt);
return 0;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index a89e33719fcf..966191d3a5ba 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -13,13 +13,14 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/memblock.h>
-#include <linux/pstore_ram.h>
#include <linux/rslib.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <asm/page.h>
+#include "ram_internal.h"
+
/**
* struct persistent_ram_buffer - persistent circular RAM buffer
*
@@ -439,7 +440,11 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size,
phys_addr_t addr = page_start + i * PAGE_SIZE;
pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
}
- vaddr = vmap(pages, page_count, VM_MAP, prot);
+ /*
+ * VM_IOREMAP used here to bypass this region during vread()
+ * and kmap_atomic() (i.e. kcore) to avoid __va() failures.
+ */
+ vaddr = vmap(pages, page_count, VM_MAP | VM_IOREMAP, prot);
kfree(pages);
/*
@@ -543,8 +548,14 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
return 0;
}
-void persistent_ram_free(struct persistent_ram_zone *prz)
+void persistent_ram_free(struct persistent_ram_zone **_prz)
{
+ struct persistent_ram_zone *prz;
+
+ if (!_prz)
+ return;
+
+ prz = *_prz;
if (!prz)
return;
@@ -568,6 +579,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
persistent_ram_free_old(prz);
kfree(prz->label);
kfree(prz);
+ *_prz = NULL;
}
struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
@@ -604,6 +616,6 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
return prz;
err:
- persistent_ram_free(prz);
+ persistent_ram_free(&prz);
return ERR_PTR(ret);
}
diff --git a/fs/pstore/ram_internal.h b/fs/pstore/ram_internal.h
new file mode 100644
index 000000000000..5f694698351f
--- /dev/null
+++ b/fs/pstore/ram_internal.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2010 Marco Stornelli <marco.stornelli@gmail.com>
+ * Copyright (C) 2011 Kees Cook <keescook@chromium.org>
+ * Copyright (C) 2011 Google, Inc.
+ */
+
+#include <linux/pstore_ram.h>
+
+/*
+ * Choose whether access to the RAM zone requires locking or not. If a zone
+ * can be written to from different CPUs like with ftrace for example, then
+ * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required.
+ */
+#define PRZ_FLAG_NO_LOCK BIT(0)
+/*
+ * If a PRZ should only have a single-boot lifetime, this marks it as
+ * getting wiped after its contents get copied out after boot.
+ */
+#define PRZ_FLAG_ZAP_OLD BIT(1)
+
+/**
+ * struct persistent_ram_zone - Details of a persistent RAM zone (PRZ)
+ * used as a pstore backend
+ *
+ * @paddr: physical address of the mapped RAM area
+ * @size: size of mapping
+ * @label: unique name of this PRZ
+ * @type: frontend type for this PRZ
+ * @flags: holds PRZ_FLAGS_* bits
+ *
+ * @buffer_lock:
+ * locks access to @buffer "size" bytes and "start" offset
+ * @buffer:
+ * pointer to actual RAM area managed by this PRZ
+ * @buffer_size:
+ * bytes in @buffer->data (not including any trailing ECC bytes)
+ *
+ * @par_buffer:
+ * pointer into @buffer->data containing ECC bytes for @buffer->data
+ * @par_header:
+ * pointer into @buffer->data containing ECC bytes for @buffer header
+ * (i.e. all fields up to @data)
+ * @rs_decoder:
+ * RSLIB instance for doing ECC calculations
+ * @corrected_bytes:
+ * ECC corrected bytes accounting since boot
+ * @bad_blocks:
+ * ECC uncorrectable bytes accounting since boot
+ * @ecc_info:
+ * ECC configuration details
+ *
+ * @old_log:
+ * saved copy of @buffer->data prior to most recent wipe
+ * @old_log_size:
+ * bytes contained in @old_log
+ *
+ */
+struct persistent_ram_zone {
+ phys_addr_t paddr;
+ size_t size;
+ void *vaddr;
+ char *label;
+ enum pstore_type_id type;
+ u32 flags;
+
+ raw_spinlock_t buffer_lock;
+ struct persistent_ram_buffer *buffer;
+ size_t buffer_size;
+
+ char *par_buffer;
+ char *par_header;
+ struct rs_control *rs_decoder;
+ int corrected_bytes;
+ int bad_blocks;
+ struct persistent_ram_ecc_info ecc_info;
+
+ char *old_log;
+ size_t old_log_size;
+};
+
+struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
+ u32 sig, struct persistent_ram_ecc_info *ecc_info,
+ unsigned int memtype, u32 flags, char *label);
+void persistent_ram_free(struct persistent_ram_zone **_prz);
+void persistent_ram_zap(struct persistent_ram_zone *prz);
+
+int persistent_ram_write(struct persistent_ram_zone *prz, const void *s,
+ unsigned int count);
+int persistent_ram_write_user(struct persistent_ram_zone *prz,
+ const void __user *s, unsigned int count);
+
+void persistent_ram_save_old(struct persistent_ram_zone *prz);
+size_t persistent_ram_old_size(struct persistent_ram_zone *prz);
+void *persistent_ram_old(struct persistent_ram_zone *prz);
+void persistent_ram_free_old(struct persistent_ram_zone *prz);
+ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
+ char *str, size_t len);
diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c
index 017d0d4ad329..2770746bb7aa 100644
--- a/fs/pstore/zone.c
+++ b/fs/pstore/zone.c
@@ -761,7 +761,7 @@ static inline int notrace psz_kmsg_write_record(struct psz_context *cxt,
/* avoid destroying old data, allocate a new one */
len = zone->buffer_size + sizeof(*zone->buffer);
zone->oldbuf = zone->buffer;
- zone->buffer = kzalloc(len, GFP_KERNEL);
+ zone->buffer = kzalloc(len, GFP_ATOMIC);
if (!zone->buffer) {
zone->buffer = zone->oldbuf;
return -ENOMEM;
diff --git a/fs/read_write.c b/fs/read_write.c
index 37c2f28b51e8..7a2ff6157eda 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1388,6 +1388,8 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags)
{
+ lockdep_assert(sb_write_started(file_inode(file_out)->i_sb));
+
return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
}
@@ -1424,7 +1426,9 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
* and several different sets of file_operations, but they all end up
* using the same ->copy_file_range() function pointer.
*/
- if (file_out->f_op->copy_file_range) {
+ if (flags & COPY_FILE_SPLICE) {
+ /* cross sb splice is allowed */
+ } else if (file_out->f_op->copy_file_range) {
if (file_in->f_op->copy_file_range !=
file_out->f_op->copy_file_range)
return -EXDEV;
@@ -1474,8 +1478,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
size_t len, unsigned int flags)
{
ssize_t ret;
+ bool splice = flags & COPY_FILE_SPLICE;
- if (flags != 0)
+ if (flags & ~COPY_FILE_SPLICE)
return -EINVAL;
ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
@@ -1501,14 +1506,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
* same sb using clone, but for filesystems where both clone and copy
* are supported (e.g. nfs,cifs), we only call the copy method.
*/
- if (file_out->f_op->copy_file_range) {
+ if (!splice && file_out->f_op->copy_file_range) {
ret = file_out->f_op->copy_file_range(file_in, pos_in,
file_out, pos_out,
len, flags);
goto done;
}
- if (file_in->f_op->remap_file_range &&
+ if (!splice && file_in->f_op->remap_file_range &&
file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
ret = file_in->f_op->remap_file_range(file_in, pos_in,
file_out, pos_out,
@@ -1528,6 +1533,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
* consistent story about which filesystems support copy_file_range()
* and which filesystems do not, that will allow userspace tools to
* make consistent desicions w.r.t using copy_file_range().
+ *
+ * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE.
*/
ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
flags);
@@ -1582,6 +1589,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
pos_out = f_out.file->f_pos;
}
+ ret = -EINVAL;
+ if (flags != 0)
+ goto out;
+
ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
flags);
if (ret > 0) {
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index 916e78fabcaa..60fc98bdf421 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -54,9 +54,35 @@ config SQUASHFS_FILE_DIRECT
endchoice
+config SQUASHFS_DECOMP_SINGLE
+ depends on SQUASHFS
+ def_bool n
+
+config SQUASHFS_DECOMP_MULTI
+ depends on SQUASHFS
+ def_bool n
+
+config SQUASHFS_DECOMP_MULTI_PERCPU
+ depends on SQUASHFS
+ def_bool n
+
+config SQUASHFS_CHOICE_DECOMP_BY_MOUNT
+ bool "Select the parallel decompression mode during mount"
+ depends on SQUASHFS
+ default n
+ select SQUASHFS_DECOMP_SINGLE
+ select SQUASHFS_DECOMP_MULTI
+ select SQUASHFS_DECOMP_MULTI_PERCPU
+ select SQUASHFS_MOUNT_DECOMP_THREADS
+ help
+ Compile all parallel decompression modes and specify the
+ decompression mode by setting "threads=" during mount.
+ default Decompressor parallelisation is SQUASHFS_DECOMP_SINGLE
+
choice
- prompt "Decompressor parallelisation options"
+ prompt "Select decompression parallel mode at compile time"
depends on SQUASHFS
+ depends on !SQUASHFS_CHOICE_DECOMP_BY_MOUNT
help
Squashfs now supports three parallelisation options for
decompression. Each one exhibits various trade-offs between
@@ -64,15 +90,17 @@ choice
If in doubt, select "Single threaded compression"
-config SQUASHFS_DECOMP_SINGLE
+config SQUASHFS_COMPILE_DECOMP_SINGLE
bool "Single threaded compression"
+ select SQUASHFS_DECOMP_SINGLE
help
Traditionally Squashfs has used single-threaded decompression.
Only one block (data or metadata) can be decompressed at any
one time. This limits CPU and memory usage to a minimum.
-config SQUASHFS_DECOMP_MULTI
+config SQUASHFS_COMPILE_DECOMP_MULTI
bool "Use multiple decompressors for parallel I/O"
+ select SQUASHFS_DECOMP_MULTI
help
By default Squashfs uses a single decompressor but it gives
poor performance on parallel I/O workloads when using multiple CPU
@@ -85,8 +113,9 @@ config SQUASHFS_DECOMP_MULTI
decompressors per core. It dynamically allocates decompressors
on a demand basis.
-config SQUASHFS_DECOMP_MULTI_PERCPU
+config SQUASHFS_COMPILE_DECOMP_MULTI_PERCPU
bool "Use percpu multiple decompressors for parallel I/O"
+ select SQUASHFS_DECOMP_MULTI_PERCPU
help
By default Squashfs uses a single decompressor but it gives
poor performance on parallel I/O workloads when using multiple CPU
@@ -95,9 +124,21 @@ config SQUASHFS_DECOMP_MULTI_PERCPU
This decompressor implementation uses a maximum of one
decompressor per core. It uses percpu variables to ensure
decompression is load-balanced across the cores.
-
endchoice
+config SQUASHFS_MOUNT_DECOMP_THREADS
+ bool "Add the mount parameter 'threads=' for squashfs"
+ depends on SQUASHFS
+ depends on SQUASHFS_DECOMP_MULTI
+ default n
+ help
+ Use threads= to set the decompression parallel mode and the number of threads.
+ If SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y
+ threads=<single|multi|percpu|1|2|3|...>
+ else
+ threads=<2|3|...>
+ The upper limit is num_online_cpus() * 2.
+
config SQUASHFS_XATTR
bool "Squashfs XATTR support"
depends on SQUASHFS
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 833aca92301f..bed3bb8b27fa 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -216,7 +216,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
res = -EIO;
goto out_free_bio;
}
- res = squashfs_decompress(msblk, bio, offset, length, output);
+ res = msblk->thread_ops->decompress(msblk, bio, offset, length, output);
} else {
res = copy_bio_to_actor(bio, output, offset, length);
}
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index d57bef91ab08..8893cb9b4198 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -134,7 +134,7 @@ void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags)
if (IS_ERR(comp_opts))
return comp_opts;
- stream = squashfs_decompressor_create(msblk, comp_opts);
+ stream = msblk->thread_ops->create(msblk, comp_opts);
if (IS_ERR(stream))
kfree(comp_opts);
diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c
index db9f12a3ea05..416c53eedbd1 100644
--- a/fs/squashfs/decompressor_multi.c
+++ b/fs/squashfs/decompressor_multi.c
@@ -29,12 +29,11 @@
#define MAX_DECOMPRESSOR (num_online_cpus() * 2)
-int squashfs_max_decompressors(void)
+static int squashfs_max_decompressors(void)
{
return MAX_DECOMPRESSOR;
}
-
struct squashfs_stream {
void *comp_opts;
struct list_head strm_list;
@@ -59,7 +58,7 @@ static void put_decomp_stream(struct decomp_stream *decomp_strm,
wake_up(&stream->wait);
}
-void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+static void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
void *comp_opts)
{
struct squashfs_stream *stream;
@@ -103,7 +102,7 @@ out:
}
-void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+static void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
{
struct squashfs_stream *stream = msblk->stream;
if (stream) {
@@ -145,7 +144,7 @@ static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk,
* If there is no available decomp and already full,
* let's wait for releasing decomp from other users.
*/
- if (stream->avail_decomp >= MAX_DECOMPRESSOR)
+ if (stream->avail_decomp >= msblk->max_thread_num)
goto wait;
/* Let's allocate new decomp */
@@ -161,7 +160,7 @@ static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk,
}
stream->avail_decomp++;
- WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR);
+ WARN_ON(stream->avail_decomp > msblk->max_thread_num);
mutex_unlock(&stream->mutex);
break;
@@ -180,7 +179,7 @@ wait:
}
-int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
+static int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
int offset, int length,
struct squashfs_page_actor *output)
{
@@ -195,3 +194,10 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
msblk->decompressor->name);
return res;
}
+
+const struct squashfs_decompressor_thread_ops squashfs_decompressor_multi = {
+ .create = squashfs_decompressor_create,
+ .destroy = squashfs_decompressor_destroy,
+ .decompress = squashfs_decompress,
+ .max_decompressors = squashfs_max_decompressors,
+};
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c
index b881b9283b7f..1dfadf76ed9a 100644
--- a/fs/squashfs/decompressor_multi_percpu.c
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@ -25,7 +25,7 @@ struct squashfs_stream {
local_lock_t lock;
};
-void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+static void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
void *comp_opts)
{
struct squashfs_stream *stream;
@@ -59,7 +59,7 @@ out:
return ERR_PTR(err);
}
-void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+static void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
{
struct squashfs_stream __percpu *percpu =
(struct squashfs_stream __percpu *) msblk->stream;
@@ -75,19 +75,21 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
}
}
-int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
+static int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
int offset, int length, struct squashfs_page_actor *output)
{
struct squashfs_stream *stream;
+ struct squashfs_stream __percpu *percpu =
+ (struct squashfs_stream __percpu *) msblk->stream;
int res;
- local_lock(&msblk->stream->lock);
- stream = this_cpu_ptr(msblk->stream);
+ local_lock(&percpu->lock);
+ stream = this_cpu_ptr(percpu);
res = msblk->decompressor->decompress(msblk, stream->stream, bio,
offset, length, output);
- local_unlock(&msblk->stream->lock);
+ local_unlock(&percpu->lock);
if (res < 0)
ERROR("%s decompression failed, data probably corrupt\n",
@@ -96,7 +98,14 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
return res;
}
-int squashfs_max_decompressors(void)
+static int squashfs_max_decompressors(void)
{
return num_possible_cpus();
}
+
+const struct squashfs_decompressor_thread_ops squashfs_decompressor_percpu = {
+ .create = squashfs_decompressor_create,
+ .destroy = squashfs_decompressor_destroy,
+ .decompress = squashfs_decompress,
+ .max_decompressors = squashfs_max_decompressors,
+};
diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c
index 4eb3d083d45e..6f161887710b 100644
--- a/fs/squashfs/decompressor_single.c
+++ b/fs/squashfs/decompressor_single.c
@@ -24,7 +24,7 @@ struct squashfs_stream {
struct mutex mutex;
};
-void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+static void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
void *comp_opts)
{
struct squashfs_stream *stream;
@@ -49,7 +49,7 @@ out:
return ERR_PTR(err);
}
-void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+static void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
{
struct squashfs_stream *stream = msblk->stream;
@@ -59,7 +59,7 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
}
}
-int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
+static int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
int offset, int length,
struct squashfs_page_actor *output)
{
@@ -78,7 +78,14 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
return res;
}
-int squashfs_max_decompressors(void)
+static int squashfs_max_decompressors(void)
{
return 1;
}
+
+const struct squashfs_decompressor_thread_ops squashfs_decompressor_single = {
+ .create = squashfs_decompressor_create,
+ .destroy = squashfs_decompressor_destroy,
+ .decompress = squashfs_decompress,
+ .max_decompressors = squashfs_max_decompressors,
+};
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 9783e01c8100..a6164fdf9435 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -38,11 +38,24 @@ extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
extern void *squashfs_decompressor_setup(struct super_block *, unsigned short);
/* decompressor_xxx.c */
-extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *);
-extern void squashfs_decompressor_destroy(struct squashfs_sb_info *);
-extern int squashfs_decompress(struct squashfs_sb_info *, struct bio *,
- int, int, struct squashfs_page_actor *);
-extern int squashfs_max_decompressors(void);
+
+struct squashfs_decompressor_thread_ops {
+ void * (*create)(struct squashfs_sb_info *msblk, void *comp_opts);
+ void (*destroy)(struct squashfs_sb_info *msblk);
+ int (*decompress)(struct squashfs_sb_info *msblk, struct bio *bio,
+ int offset, int length, struct squashfs_page_actor *output);
+ int (*max_decompressors)(void);
+};
+
+#ifdef CONFIG_SQUASHFS_DECOMP_SINGLE
+extern const struct squashfs_decompressor_thread_ops squashfs_decompressor_single;
+#endif
+#ifdef CONFIG_SQUASHFS_DECOMP_MULTI
+extern const struct squashfs_decompressor_thread_ops squashfs_decompressor_multi;
+#endif
+#ifdef CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU
+extern const struct squashfs_decompressor_thread_ops squashfs_decompressor_percpu;
+#endif
/* export.c */
extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 1e90c2575f9b..659082e9e51d 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -53,7 +53,7 @@ struct squashfs_sb_info {
__le64 *xattr_id_table;
struct mutex meta_index_mutex;
struct meta_index *meta_index;
- struct squashfs_stream *stream;
+ void *stream;
__le64 *inode_lookup_table;
u64 inode_table;
u64 directory_table;
@@ -66,5 +66,7 @@ struct squashfs_sb_info {
int xattr_ids;
unsigned int ids;
bool panic_on_errors;
+ const struct squashfs_decompressor_thread_ops *thread_ops;
+ int max_thread_num;
};
#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 32565dafa7f3..7d5265a39d20 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -47,10 +47,13 @@ enum Opt_errors {
enum squashfs_param {
Opt_errors,
+ Opt_threads,
};
struct squashfs_mount_opts {
enum Opt_errors errors;
+ const struct squashfs_decompressor_thread_ops *thread_ops;
+ int thread_num;
};
static const struct constant_table squashfs_param_errors[] = {
@@ -61,9 +64,66 @@ static const struct constant_table squashfs_param_errors[] = {
static const struct fs_parameter_spec squashfs_fs_parameters[] = {
fsparam_enum("errors", Opt_errors, squashfs_param_errors),
+ fsparam_string("threads", Opt_threads),
{}
};
+
+static int squashfs_parse_param_threads_str(const char *str, struct squashfs_mount_opts *opts)
+{
+#ifdef CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT
+ if (strcmp(str, "single") == 0) {
+ opts->thread_ops = &squashfs_decompressor_single;
+ return 0;
+ }
+ if (strcmp(str, "multi") == 0) {
+ opts->thread_ops = &squashfs_decompressor_multi;
+ return 0;
+ }
+ if (strcmp(str, "percpu") == 0) {
+ opts->thread_ops = &squashfs_decompressor_percpu;
+ return 0;
+ }
+#endif
+ return -EINVAL;
+}
+
+static int squashfs_parse_param_threads_num(const char *str, struct squashfs_mount_opts *opts)
+{
+#ifdef CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS
+ int ret;
+ unsigned long num;
+
+ ret = kstrtoul(str, 0, &num);
+ if (ret != 0)
+ return -EINVAL;
+ if (num > 1) {
+ opts->thread_ops = &squashfs_decompressor_multi;
+ if (num > opts->thread_ops->max_decompressors())
+ return -EINVAL;
+ opts->thread_num = (int)num;
+ return 0;
+ }
+#ifdef CONFIG_SQUASHFS_DECOMP_SINGLE
+ if (num == 1) {
+ opts->thread_ops = &squashfs_decompressor_single;
+ opts->thread_num = 1;
+ return 0;
+ }
+#endif
+#endif /* !CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS */
+ return -EINVAL;
+}
+
+static int squashfs_parse_param_threads(const char *str, struct squashfs_mount_opts *opts)
+{
+ int ret = squashfs_parse_param_threads_str(str, opts);
+
+ if (ret == 0)
+ return ret;
+ return squashfs_parse_param_threads_num(str, opts);
+}
+
static int squashfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct squashfs_mount_opts *opts = fc->fs_private;
@@ -78,6 +138,10 @@ static int squashfs_parse_param(struct fs_context *fc, struct fs_parameter *para
case Opt_errors:
opts->errors = result.uint_32;
break;
+ case Opt_threads:
+ if (squashfs_parse_param_threads(param->string, opts) != 0)
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
@@ -133,6 +197,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
return -ENOMEM;
}
msblk = sb->s_fs_info;
+ msblk->thread_ops = opts->thread_ops;
msblk->panic_on_errors = (opts->errors == Opt_errors_panic);
@@ -168,6 +233,12 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
goto failed_mount;
}
+ if (opts->thread_num == 0) {
+ msblk->max_thread_num = msblk->thread_ops->max_decompressors();
+ } else {
+ msblk->max_thread_num = opts->thread_num;
+ }
+
/* Check the MAJOR & MINOR versions and lookup compression type */
msblk->decompressor = supported_squashfs_filesystem(
fc,
@@ -252,7 +323,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
/* Allocate read_page block */
msblk->read_page = squashfs_cache_init("data",
- squashfs_max_decompressors(), msblk->block_size);
+ msblk->max_thread_num, msblk->block_size);
if (msblk->read_page == NULL) {
errorf(fc, "Failed to allocate read_page block");
goto failed_mount;
@@ -383,7 +454,7 @@ failed_mount:
squashfs_cache_delete(msblk->block_cache);
squashfs_cache_delete(msblk->fragment_cache);
squashfs_cache_delete(msblk->read_page);
- squashfs_decompressor_destroy(msblk);
+ msblk->thread_ops->destroy(msblk);
kfree(msblk->inode_lookup_table);
kfree(msblk->fragment_index);
kfree(msblk->id_table);
@@ -435,6 +506,19 @@ static int squashfs_show_options(struct seq_file *s, struct dentry *root)
else
seq_puts(s, ",errors=continue");
+#ifdef CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT
+ if (msblk->thread_ops == &squashfs_decompressor_single) {
+ seq_puts(s, ",threads=single");
+ return 0;
+ }
+ if (msblk->thread_ops == &squashfs_decompressor_percpu) {
+ seq_puts(s, ",threads=percpu");
+ return 0;
+ }
+#endif
+#ifdef CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS
+ seq_printf(s, ",threads=%d", msblk->max_thread_num);
+#endif
return 0;
}
@@ -446,6 +530,16 @@ static int squashfs_init_fs_context(struct fs_context *fc)
if (!opts)
return -ENOMEM;
+#ifdef CONFIG_SQUASHFS_DECOMP_SINGLE
+ opts->thread_ops = &squashfs_decompressor_single;
+#elif defined(CONFIG_SQUASHFS_DECOMP_MULTI)
+ opts->thread_ops = &squashfs_decompressor_multi;
+#elif defined(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU)
+ opts->thread_ops = &squashfs_decompressor_percpu;
+#else
+#error "fail: unknown squashfs decompression thread mode?"
+#endif
+ opts->thread_num = 0;
fc->fs_private = opts;
fc->ops = &squashfs_context_ops;
return 0;
@@ -478,7 +572,7 @@ static void squashfs_put_super(struct super_block *sb)
squashfs_cache_delete(sbi->block_cache);
squashfs_cache_delete(sbi->fragment_cache);
squashfs_cache_delete(sbi->read_page);
- squashfs_decompressor_destroy(sbi);
+ sbi->thread_ops->destroy(sbi);
kfree(sbi->id_table);
kfree(sbi->fragment_index);
kfree(sbi->meta_index);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 3f128b9fdfbb..9c9d3f0e36a4 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2467,7 +2467,7 @@ error_dump:
static inline int chance(unsigned int n, unsigned int out_of)
{
- return !!(prandom_u32_max(out_of) + 1 <= n);
+ return !!(get_random_u32_below(out_of) + 1 <= n);
}
@@ -2485,13 +2485,13 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
if (chance(1, 2)) {
d->pc_delay = 1;
/* Fail within 1 minute */
- delay = prandom_u32_max(60000);
+ delay = get_random_u32_below(60000);
d->pc_timeout = jiffies;
d->pc_timeout += msecs_to_jiffies(delay);
ubifs_warn(c, "failing after %lums", delay);
} else {
d->pc_delay = 2;
- delay = prandom_u32_max(10000);
+ delay = get_random_u32_below(10000);
/* Fail within 10000 operations */
d->pc_cnt_max = delay;
ubifs_warn(c, "failing after %lu calls", delay);
@@ -2571,7 +2571,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf,
unsigned int from, to, ffs = chance(1, 2);
unsigned char *p = (void *)buf;
- from = prandom_u32_max(len);
+ from = get_random_u32_below(len);
/* Corruption span max to end of write unit */
to = min(len, ALIGN(from + 1, c->max_write_size));
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index cfbc31f709f4..c4d079328b92 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1970,28 +1970,28 @@ static int dbg_populate_lsave(struct ubifs_info *c)
if (!dbg_is_chk_gen(c))
return 0;
- if (prandom_u32_max(4))
+ if (get_random_u32_below(4))
return 0;
for (i = 0; i < c->lsave_cnt; i++)
c->lsave[i] = c->main_first;
list_for_each_entry(lprops, &c->empty_list, list)
- c->lsave[prandom_u32_max(c->lsave_cnt)] = lprops->lnum;
+ c->lsave[get_random_u32_below(c->lsave_cnt)] = lprops->lnum;
list_for_each_entry(lprops, &c->freeable_list, list)
- c->lsave[prandom_u32_max(c->lsave_cnt)] = lprops->lnum;
+ c->lsave[get_random_u32_below(c->lsave_cnt)] = lprops->lnum;
list_for_each_entry(lprops, &c->frdi_idx_list, list)
- c->lsave[prandom_u32_max(c->lsave_cnt)] = lprops->lnum;
+ c->lsave[get_random_u32_below(c->lsave_cnt)] = lprops->lnum;
heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
for (i = 0; i < heap->cnt; i++)
- c->lsave[prandom_u32_max(c->lsave_cnt)] = heap->arr[i]->lnum;
+ c->lsave[get_random_u32_below(c->lsave_cnt)] = heap->arr[i]->lnum;
heap = &c->lpt_heap[LPROPS_DIRTY - 1];
for (i = 0; i < heap->cnt; i++)
- c->lsave[prandom_u32_max(c->lsave_cnt)] = heap->arr[i]->lnum;
+ c->lsave[get_random_u32_below(c->lsave_cnt)] = heap->arr[i]->lnum;
heap = &c->lpt_heap[LPROPS_FREE - 1];
for (i = 0; i < heap->cnt; i++)
- c->lsave[prandom_u32_max(c->lsave_cnt)] = heap->arr[i]->lnum;
+ c->lsave[get_random_u32_below(c->lsave_cnt)] = heap->arr[i]->lnum;
return 1;
}
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 01362ad5f804..a55e04822d16 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -700,7 +700,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
c->ilebs[c->ileb_cnt++] = lnum;
dbg_cmt("LEB %d", lnum);
}
- if (dbg_is_chk_index(c) && !prandom_u32_max(8))
+ if (dbg_is_chk_index(c) && !get_random_u32_below(8))
return -ENOSPC;
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index de79f5d07f65..989cf341779b 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1516,7 +1516,7 @@ xfs_alloc_ag_vextent_lastblock(
#ifdef DEBUG
/* Randomly don't execute the first algorithm. */
- if (prandom_u32_max(2))
+ if (get_random_u32_below(2))
return 0;
#endif
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 94db50eb706a..5118dedf9267 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -636,7 +636,7 @@ xfs_ialloc_ag_alloc(
/* randomly do sparse inode allocations */
if (xfs_has_sparseinodes(tp->t_mountp) &&
igeo->ialloc_min_blks < igeo->ialloc_blks)
- do_sparse = prandom_u32_max(2);
+ do_sparse = get_random_u32_below(2);
#endif
/*
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index c6b2aabd6f18..822e6a0e9d1a 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -279,7 +279,7 @@ xfs_errortag_test(
ASSERT(error_tag < XFS_ERRTAG_MAX);
randfactor = mp->m_errortag[error_tag];
- if (!randfactor || prandom_u32_max(randfactor))
+ if (!randfactor || get_random_u32_below(randfactor))
return false;
xfs_warn_ratelimited(mp,
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index abc9a85106f2..2c53fbb8d918 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -41,6 +41,13 @@ static void zonefs_account_active(struct inode *inode)
return;
/*
+ * For zones that transitioned to the offline or readonly condition,
+ * we only need to clear the active state.
+ */
+ if (zi->i_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY))
+ goto out;
+
+ /*
* If the zone is active, that is, if it is explicitly open or
* partially written, check if it was already accounted as active.
*/
@@ -53,6 +60,7 @@ static void zonefs_account_active(struct inode *inode)
return;
}
+out:
/* The zone is not active. If it was, update the active count */
if (zi->i_flags & ZONEFS_ZONE_ACTIVE) {
zi->i_flags &= ~ZONEFS_ZONE_ACTIVE;
@@ -324,6 +332,7 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
inode->i_flags |= S_IMMUTABLE;
inode->i_mode &= ~0777;
zone->wp = zone->start;
+ zi->i_flags |= ZONEFS_ZONE_OFFLINE;
return 0;
case BLK_ZONE_COND_READONLY:
/*
@@ -342,8 +351,10 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
zone->cond = BLK_ZONE_COND_OFFLINE;
inode->i_mode &= ~0777;
zone->wp = zone->start;
+ zi->i_flags |= ZONEFS_ZONE_OFFLINE;
return 0;
}
+ zi->i_flags |= ZONEFS_ZONE_READONLY;
inode->i_mode &= ~0222;
return i_size_read(inode);
case BLK_ZONE_COND_FULL:
@@ -1922,18 +1933,18 @@ static int __init zonefs_init(void)
if (ret)
return ret;
- ret = register_filesystem(&zonefs_type);
+ ret = zonefs_sysfs_init();
if (ret)
goto destroy_inodecache;
- ret = zonefs_sysfs_init();
+ ret = register_filesystem(&zonefs_type);
if (ret)
- goto unregister_fs;
+ goto sysfs_exit;
return 0;
-unregister_fs:
- unregister_filesystem(&zonefs_type);
+sysfs_exit:
+ zonefs_sysfs_exit();
destroy_inodecache:
zonefs_destroy_inodecache();
@@ -1942,9 +1953,9 @@ destroy_inodecache:
static void __exit zonefs_exit(void)
{
+ unregister_filesystem(&zonefs_type);
zonefs_sysfs_exit();
zonefs_destroy_inodecache();
- unregister_filesystem(&zonefs_type);
}
MODULE_AUTHOR("Damien Le Moal");
diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
index 4b3de66c3233..1dbe78119ff1 100644
--- a/fs/zonefs/zonefs.h
+++ b/fs/zonefs/zonefs.h
@@ -39,8 +39,10 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone)
return ZONEFS_ZTYPE_SEQ;
}
-#define ZONEFS_ZONE_OPEN (1 << 0)
-#define ZONEFS_ZONE_ACTIVE (1 << 1)
+#define ZONEFS_ZONE_OPEN (1U << 0)
+#define ZONEFS_ZONE_ACTIVE (1U << 1)
+#define ZONEFS_ZONE_OFFLINE (1U << 2)
+#define ZONEFS_ZONE_READONLY (1U << 3)
/*
* In-memory inode data.