summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/array.c16
-rw-r--r--fs/proc/base.c279
-rw-r--r--fs/proc/fd.c12
-rw-r--r--fs/proc/generic.c11
-rw-r--r--fs/proc/inode.c5
-rw-r--r--fs/proc/internal.h23
-rw-r--r--fs/proc/kcore.c5
-rw-r--r--fs/proc/proc_sysctl.c66
-rw-r--r--fs/proc/root.c3
-rw-r--r--fs/proc/stat.c64
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/proc/uptime.c7
-rw-r--r--fs/proc/vmcore.c8
14 files changed, 254 insertions, 251 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 51a4213afa2e..fe12b519d09b 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -401,8 +401,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
unsigned long long start_time;
unsigned long cmin_flt = 0, cmaj_flt = 0;
unsigned long min_flt = 0, maj_flt = 0;
- cputime_t cutime, cstime, utime, stime;
- cputime_t cgtime, gtime;
+ u64 cutime, cstime, utime, stime;
+ u64 cgtime, gtime;
unsigned long rsslim = 0;
char tcomm[sizeof(task->comm)];
unsigned long flags;
@@ -497,10 +497,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_put_decimal_ull(m, " ", cmin_flt);
seq_put_decimal_ull(m, " ", maj_flt);
seq_put_decimal_ull(m, " ", cmaj_flt);
- seq_put_decimal_ull(m, " ", cputime_to_clock_t(utime));
- seq_put_decimal_ull(m, " ", cputime_to_clock_t(stime));
- seq_put_decimal_ll(m, " ", cputime_to_clock_t(cutime));
- seq_put_decimal_ll(m, " ", cputime_to_clock_t(cstime));
+ seq_put_decimal_ull(m, " ", nsec_to_clock_t(utime));
+ seq_put_decimal_ull(m, " ", nsec_to_clock_t(stime));
+ seq_put_decimal_ll(m, " ", nsec_to_clock_t(cutime));
+ seq_put_decimal_ll(m, " ", nsec_to_clock_t(cstime));
seq_put_decimal_ll(m, " ", priority);
seq_put_decimal_ll(m, " ", nice);
seq_put_decimal_ll(m, " ", num_threads);
@@ -542,8 +542,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_put_decimal_ull(m, " ", task->rt_priority);
seq_put_decimal_ull(m, " ", task->policy);
seq_put_decimal_ull(m, " ", delayacct_blkio_ticks(task));
- seq_put_decimal_ull(m, " ", cputime_to_clock_t(gtime));
- seq_put_decimal_ll(m, " ", cputime_to_clock_t(cgtime));
+ seq_put_decimal_ull(m, " ", nsec_to_clock_t(gtime));
+ seq_put_decimal_ll(m, " ", nsec_to_clock_t(cgtime));
if (mm && permitted) {
seq_put_decimal_ull(m, " ", mm->start_data);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 87c9a9aacda3..1e1e182d571b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -292,101 +292,69 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
}
} else {
/*
- * Command line (1 string) occupies ARGV and maybe
- * extends into ENVP.
- */
- if (len1 + len2 <= *pos)
- goto skip_argv_envp;
- if (len1 <= *pos)
- goto skip_argv;
-
- p = arg_start + *pos;
- len = len1 - *pos;
- while (count > 0 && len > 0) {
- unsigned int _count, l;
- int nr_read;
- bool final;
-
- _count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
- if (nr_read < 0)
- rv = nr_read;
- if (nr_read <= 0)
- goto out_free_page;
-
- /*
- * Command line can be shorter than whole ARGV
- * even if last "marker" byte says it is not.
- */
- final = false;
- l = strnlen(page, nr_read);
- if (l < nr_read) {
- nr_read = l;
- final = true;
- }
-
- if (copy_to_user(buf, page, nr_read)) {
- rv = -EFAULT;
- goto out_free_page;
- }
-
- p += nr_read;
- len -= nr_read;
- buf += nr_read;
- count -= nr_read;
- rv += nr_read;
-
- if (final)
- goto out_free_page;
- }
-skip_argv:
- /*
* Command line (1 string) occupies ARGV and
* extends into ENVP.
*/
- if (len1 <= *pos) {
- p = env_start + *pos - len1;
- len = len1 + len2 - *pos;
- } else {
- p = env_start;
- len = len2;
+ struct {
+ unsigned long p;
+ unsigned long len;
+ } cmdline[2] = {
+ { .p = arg_start, .len = len1 },
+ { .p = env_start, .len = len2 },
+ };
+ loff_t pos1 = *pos;
+ unsigned int i;
+
+ i = 0;
+ while (i < 2 && pos1 >= cmdline[i].len) {
+ pos1 -= cmdline[i].len;
+ i++;
}
- while (count > 0 && len > 0) {
- unsigned int _count, l;
- int nr_read;
- bool final;
-
- _count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
- if (nr_read < 0)
- rv = nr_read;
- if (nr_read <= 0)
- goto out_free_page;
-
- /* Find EOS. */
- final = false;
- l = strnlen(page, nr_read);
- if (l < nr_read) {
- nr_read = l;
- final = true;
+ while (i < 2) {
+ p = cmdline[i].p + pos1;
+ len = cmdline[i].len - pos1;
+ while (count > 0 && len > 0) {
+ unsigned int _count, l;
+ int nr_read;
+ bool final;
+
+ _count = min3(count, len, PAGE_SIZE);
+ nr_read = access_remote_vm(mm, p, page, _count, 0);
+ if (nr_read < 0)
+ rv = nr_read;
+ if (nr_read <= 0)
+ goto out_free_page;
+
+ /*
+ * Command line can be shorter than whole ARGV
+ * even if last "marker" byte says it is not.
+ */
+ final = false;
+ l = strnlen(page, nr_read);
+ if (l < nr_read) {
+ nr_read = l;
+ final = true;
+ }
+
+ if (copy_to_user(buf, page, nr_read)) {
+ rv = -EFAULT;
+ goto out_free_page;
+ }
+
+ p += nr_read;
+ len -= nr_read;
+ buf += nr_read;
+ count -= nr_read;
+ rv += nr_read;
+
+ if (final)
+ goto out_free_page;
}
- if (copy_to_user(buf, page, nr_read)) {
- rv = -EFAULT;
- goto out_free_page;
- }
-
- p += nr_read;
- len -= nr_read;
- buf += nr_read;
- count -= nr_read;
- rv += nr_read;
-
- if (final)
- goto out_free_page;
+ /* Only first chunk can be read partially. */
+ pos1 = 0;
+ i++;
}
-skip_argv_envp:
- ;
}
out_free_page:
@@ -729,11 +697,11 @@ static int proc_pid_permission(struct inode *inode, int mask)
task = get_proc_task(inode);
if (!task)
return -ESRCH;
- has_perms = has_pid_permissions(pid, task, 1);
+ has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
put_task_struct(task);
if (!has_perms) {
- if (pid->hide_pid == 2) {
+ if (pid->hide_pid == HIDEPID_INVISIBLE) {
/*
* Let's make getdents(), stat(), and open()
* consistent with each other. If a process
@@ -798,7 +766,7 @@ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
if (!IS_ERR_OR_NULL(mm)) {
/* ensure this mm_struct can't be freed */
- atomic_inc(&mm->mm_count);
+ mmgrab(mm);
/* but do not pin its memory */
mmput(mm);
}
@@ -845,7 +813,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
return -ENOMEM;
copied = 0;
- if (!atomic_inc_not_zero(&mm->mm_users))
+ if (!mmget_not_zero(mm))
goto free;
/* Maybe we should limit FOLL_FORCE to actual ptrace users? */
@@ -953,7 +921,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
return -ENOMEM;
ret = 0;
- if (!atomic_inc_not_zero(&mm->mm_users))
+ if (!mmget_not_zero(mm))
goto free;
down_read(&mm->mmap_sem);
@@ -1096,7 +1064,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
if (p) {
if (atomic_read(&p->mm->mm_users) > 1) {
mm = p->mm;
- atomic_inc(&mm->mm_count);
+ mmgrab(mm);
}
task_unlock(p);
}
@@ -1667,12 +1635,63 @@ const struct inode_operations proc_pid_link_inode_operations = {
/* building an inode */
+void task_dump_owner(struct task_struct *task, mode_t mode,
+ kuid_t *ruid, kgid_t *rgid)
+{
+ /* Depending on the state of dumpable compute who should own a
+ * proc file for a task.
+ */
+ const struct cred *cred;
+ kuid_t uid;
+ kgid_t gid;
+
+ /* Default to the tasks effective ownership */
+ rcu_read_lock();
+ cred = __task_cred(task);
+ uid = cred->euid;
+ gid = cred->egid;
+ rcu_read_unlock();
+
+ /*
+ * Before the /proc/pid/status file was created the only way to read
+ * the effective uid of a /process was to stat /proc/pid. Reading
+ * /proc/pid/status is slow enough that procps and other packages
+ * kept stating /proc/pid. To keep the rules in /proc simple I have
+ * made this apply to all per process world readable and executable
+ * directories.
+ */
+ if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) {
+ struct mm_struct *mm;
+ task_lock(task);
+ mm = task->mm;
+ /* Make non-dumpable tasks owned by some root */
+ if (mm) {
+ if (get_dumpable(mm) != SUID_DUMP_USER) {
+ struct user_namespace *user_ns = mm->user_ns;
+
+ uid = make_kuid(user_ns, 0);
+ if (!uid_valid(uid))
+ uid = GLOBAL_ROOT_UID;
+
+ gid = make_kgid(user_ns, 0);
+ if (!gid_valid(gid))
+ gid = GLOBAL_ROOT_GID;
+ }
+ } else {
+ uid = GLOBAL_ROOT_UID;
+ gid = GLOBAL_ROOT_GID;
+ }
+ task_unlock(task);
+ }
+ *ruid = uid;
+ *rgid = gid;
+}
+
struct inode *proc_pid_make_inode(struct super_block * sb,
struct task_struct *task, umode_t mode)
{
struct inode * inode;
struct proc_inode *ei;
- const struct cred *cred;
/* We need a new inode */
@@ -1694,13 +1713,7 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
if (!ei->pid)
goto out_unlock;
- if (task_dumpable(task)) {
- rcu_read_lock();
- cred = __task_cred(task);
- inode->i_uid = cred->euid;
- inode->i_gid = cred->egid;
- rcu_read_unlock();
- }
+ task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
security_task_to_inode(task, inode);
out:
@@ -1715,7 +1728,6 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = d_inode(dentry);
struct task_struct *task;
- const struct cred *cred;
struct pid_namespace *pid = dentry->d_sb->s_fs_info;
generic_fillattr(inode, stat);
@@ -1725,7 +1737,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
stat->gid = GLOBAL_ROOT_GID;
task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task) {
- if (!has_pid_permissions(pid, task, 2)) {
+ if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
rcu_read_unlock();
/*
* This doesn't prevent learning whether PID exists,
@@ -1733,12 +1745,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
*/
return -ENOENT;
}
- if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
- task_dumpable(task)) {
- cred = __task_cred(task);
- stat->uid = cred->euid;
- stat->gid = cred->egid;
- }
+ task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
}
rcu_read_unlock();
return 0;
@@ -1754,18 +1761,11 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
* Rewrite the inode's ownerships here because the owning task may have
* performed a setuid(), etc.
*
- * Before the /proc/pid/status file was created the only way to read
- * the effective uid of a /process was to stat /proc/pid. Reading
- * /proc/pid/status is slow enough that procps and other packages
- * kept stating /proc/pid. To keep the rules in /proc simple I have
- * made this apply to all per process world readable and executable
- * directories.
*/
int pid_revalidate(struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
struct task_struct *task;
- const struct cred *cred;
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -1774,17 +1774,8 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
task = get_proc_task(inode);
if (task) {
- if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
- task_dumpable(task)) {
- rcu_read_lock();
- cred = __task_cred(task);
- inode->i_uid = cred->euid;
- inode->i_gid = cred->egid;
- rcu_read_unlock();
- } else {
- inode->i_uid = GLOBAL_ROOT_UID;
- inode->i_gid = GLOBAL_ROOT_GID;
- }
+ task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
+
inode->i_mode &= ~(S_ISUID | S_ISGID);
security_task_to_inode(task, inode);
put_task_struct(task);
@@ -1881,7 +1872,6 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
bool exact_vma_exists = false;
struct mm_struct *mm = NULL;
struct task_struct *task;
- const struct cred *cred;
struct inode *inode;
int status = 0;
@@ -1906,16 +1896,8 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
mmput(mm);
if (exact_vma_exists) {
- if (task_dumpable(task)) {
- rcu_read_lock();
- cred = __task_cred(task);
- inode->i_uid = cred->euid;
- inode->i_gid = cred->egid;
- rcu_read_unlock();
- } else {
- inode->i_uid = GLOBAL_ROOT_UID;
- inode->i_gid = GLOBAL_ROOT_GID;
- }
+ task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
+
security_task_to_inode(task, inode);
status = 1;
}
@@ -2179,7 +2161,7 @@ static const struct file_operations proc_map_files_operations = {
.llseek = generic_file_llseek,
};
-#ifdef CONFIG_CHECKPOINT_RESTORE
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
struct timers_private {
struct pid *pid;
struct task_struct *task;
@@ -2488,6 +2470,12 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
length = -ESRCH;
if (!task)
goto out_no_task;
+
+ /* A task may only write its own attributes. */
+ length = -EACCES;
+ if (current != task)
+ goto out;
+
if (count > PAGE_SIZE)
count = PAGE_SIZE;
@@ -2503,14 +2491,13 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
}
/* Guard against adverse ptrace interaction */
- length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
+ length = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
if (length < 0)
goto out_free;
- length = security_setprocattr(task,
- (char*)file->f_path.dentry->d_name.name,
+ length = security_setprocattr(file->f_path.dentry->d_name.name,
page, count);
- mutex_unlock(&task->signal->cred_guard_mutex);
+ mutex_unlock(&current->signal->cred_guard_mutex);
out_free:
kfree(page);
out:
@@ -2936,7 +2923,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
-#ifdef CONFIG_CHECKPOINT_RESTORE
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
REG("timers", S_IRUGO, proc_timers_operations),
#endif
REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
@@ -3181,7 +3168,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
int len;
cond_resched();
- if (!has_pid_permissions(ns, iter.task, 2))
+ if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
continue;
len = snprintf(name, sizeof(name), "%d", iter.tgid);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 4274f83bf100..00ce1531b2f5 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -84,7 +84,6 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
{
struct files_struct *files;
struct task_struct *task;
- const struct cred *cred;
struct inode *inode;
unsigned int fd;
@@ -108,16 +107,7 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
rcu_read_unlock();
put_files_struct(files);
- if (task_dumpable(task)) {
- rcu_read_lock();
- cred = __task_cred(task);
- inode->i_uid = cred->euid;
- inode->i_gid = cred->egid;
- rcu_read_unlock();
- } else {
- inode->i_uid = GLOBAL_ROOT_UID;
- inode->i_gid = GLOBAL_ROOT_GID;
- }
+ task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
if (S_ISLNK(inode->i_mode)) {
unsigned i_mode = S_IFLNK;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f6a01f09f79d..06c73904d497 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -57,9 +57,9 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
struct rb_node *node = dir->subdir.rb_node;
while (node) {
- struct proc_dir_entry *de = container_of(node,
- struct proc_dir_entry,
- subdir_node);
+ struct proc_dir_entry *de = rb_entry(node,
+ struct proc_dir_entry,
+ subdir_node);
int result = proc_match(len, name, de);
if (result < 0)
@@ -80,8 +80,9 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
/* Figure out where to put new node */
while (*new) {
- struct proc_dir_entry *this =
- container_of(*new, struct proc_dir_entry, subdir_node);
+ struct proc_dir_entry *this = rb_entry(*new,
+ struct proc_dir_entry,
+ subdir_node);
int result = proc_match(de->namelen, de->name, this);
parent = *new;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 842a5ff5b85c..2cc7a8030275 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -43,10 +43,11 @@ static void proc_evict_inode(struct inode *inode)
de = PDE(inode);
if (de)
pde_put(de);
+
head = PROC_I(inode)->sysctl;
if (head) {
RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
- sysctl_head_put(head);
+ proc_sys_evict_inode(inode, head);
}
}
@@ -106,7 +107,7 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root)
if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
- if (pid->hide_pid != 0)
+ if (pid->hide_pid != HIDEPID_OFF)
seq_printf(seq, ",hidepid=%u", pid->hide_pid);
return 0;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 2de5194ba378..5d6960f5f1c0 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -65,6 +65,7 @@ struct proc_inode {
struct proc_dir_entry *pde;
struct ctl_table_header *sysctl;
struct ctl_table *sysctl_entry;
+ struct list_head sysctl_inodes;
const struct proc_ns_operations *ns_ops;
struct inode vfs_inode;
};
@@ -97,20 +98,8 @@ static inline struct task_struct *get_proc_task(struct inode *inode)
return get_pid_task(proc_pid(inode), PIDTYPE_PID);
}
-static inline int task_dumpable(struct task_struct *task)
-{
- int dumpable = 0;
- struct mm_struct *mm;
-
- task_lock(task);
- mm = task->mm;
- if (mm)
- dumpable = get_dumpable(mm);
- task_unlock(task);
- if (dumpable == SUID_DUMP_USER)
- return 1;
- return 0;
-}
+void task_dump_owner(struct task_struct *task, mode_t mode,
+ kuid_t *ruid, kgid_t *rgid);
static inline unsigned name_to_int(const struct qstr *qstr)
{
@@ -249,10 +238,12 @@ extern void proc_thread_self_init(void);
*/
#ifdef CONFIG_PROC_SYSCTL
extern int proc_sys_init(void);
-extern void sysctl_head_put(struct ctl_table_header *);
+extern void proc_sys_evict_inode(struct inode *inode,
+ struct ctl_table_header *head);
#else
static inline void proc_sys_init(void) { }
-static inline void sysctl_head_put(struct ctl_table_header *head) { }
+static inline void proc_sys_evict_inode(struct inode *inode,
+ struct ctl_table_header *head) { }
#endif
/*
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 0b80ad87b4d6..ea9f3d1ae830 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -373,7 +373,10 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff;
phdr->p_vaddr = (size_t)m->addr;
- phdr->p_paddr = 0;
+ if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
+ phdr->p_paddr = __pa(m->addr);
+ else
+ phdr->p_paddr = (elf_addr_t)-1;
phdr->p_filesz = phdr->p_memsz = m->size;
phdr->p_align = PAGE_SIZE;
}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d4e37acd4821..3e64c6502dc8 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -190,6 +190,7 @@ static void init_header(struct ctl_table_header *head,
head->set = set;
head->parent = NULL;
head->node = node;
+ INIT_LIST_HEAD(&head->inodes);
if (node) {
struct ctl_table *entry;
for (entry = table; entry->procname; entry++, node++)
@@ -259,6 +260,27 @@ static void unuse_table(struct ctl_table_header *p)
complete(p->unregistering);
}
+/* called under sysctl_lock */
+static void proc_sys_prune_dcache(struct ctl_table_header *head)
+{
+ struct inode *inode, *prev = NULL;
+ struct proc_inode *ei;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) {
+ inode = igrab(&ei->vfs_inode);
+ if (inode) {
+ rcu_read_unlock();
+ iput(prev);
+ prev = inode;
+ d_prune_aliases(inode);
+ rcu_read_lock();
+ }
+ }
+ rcu_read_unlock();
+ iput(prev);
+}
+
/* called under sysctl_lock, will reacquire if has to wait */
static void start_unregistering(struct ctl_table_header *p)
{
@@ -272,31 +294,22 @@ static void start_unregistering(struct ctl_table_header *p)
p->unregistering = &wait;
spin_unlock(&sysctl_lock);
wait_for_completion(&wait);
- spin_lock(&sysctl_lock);
} else {
/* anything non-NULL; we'll never dereference it */
p->unregistering = ERR_PTR(-EINVAL);
+ spin_unlock(&sysctl_lock);
}
/*
+ * Prune dentries for unregistered sysctls: namespaced sysctls
+ * can have duplicate names and contaminate dcache very badly.
+ */
+ proc_sys_prune_dcache(p);
+ /*
* do not remove from the list until nobody holds it; walking the
* list in do_sysctl() relies on that.
*/
- erase_header(p);
-}
-
-static void sysctl_head_get(struct ctl_table_header *head)
-{
spin_lock(&sysctl_lock);
- head->count++;
- spin_unlock(&sysctl_lock);
-}
-
-void sysctl_head_put(struct ctl_table_header *head)
-{
- spin_lock(&sysctl_lock);
- if (!--head->count)
- kfree_rcu(head, rcu);
- spin_unlock(&sysctl_lock);
+ erase_header(p);
}
static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
@@ -440,10 +453,20 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
inode->i_ino = get_next_ino();
- sysctl_head_get(head);
ei = PROC_I(inode);
+
+ spin_lock(&sysctl_lock);
+ if (unlikely(head->unregistering)) {
+ spin_unlock(&sysctl_lock);
+ iput(inode);
+ inode = NULL;
+ goto out;
+ }
ei->sysctl = head;
ei->sysctl_entry = table;
+ list_add_rcu(&ei->sysctl_inodes, &head->inodes);
+ head->count++;
+ spin_unlock(&sysctl_lock);
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
inode->i_mode = table->mode;
@@ -466,6 +489,15 @@ out:
return inode;
}
+void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
+{
+ spin_lock(&sysctl_lock);
+ list_del_rcu(&PROC_I(inode)->sysctl_inodes);
+ if (!--head->count)
+ kfree_rcu(head, rcu);
+ spin_unlock(&sysctl_lock);
+}
+
static struct ctl_table_header *grab_header(struct inode *inode)
{
struct ctl_table_header *head = PROC_I(inode)->sysctl;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 1988440b2049..b90da888b81a 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -58,7 +58,8 @@ int proc_parse_options(char *options, struct pid_namespace *pid)
case Opt_hidepid:
if (match_int(&args[0], &option))
return 0;
- if (option < 0 || option > 2) {
+ if (option < HIDEPID_OFF ||
+ option > HIDEPID_INVISIBLE) {
pr_err("proc: hidepid value must be between 0 and 2.\n");
return 0;
}
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index d700c42b3572..e47c3e8c4dfe 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -21,9 +21,9 @@
#ifdef arch_idle_time
-static cputime64_t get_idle_time(int cpu)
+static u64 get_idle_time(int cpu)
{
- cputime64_t idle;
+ u64 idle;
idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
@@ -31,9 +31,9 @@ static cputime64_t get_idle_time(int cpu)
return idle;
}
-static cputime64_t get_iowait_time(int cpu)
+static u64 get_iowait_time(int cpu)
{
- cputime64_t iowait;
+ u64 iowait;
iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
if (cpu_online(cpu) && nr_iowait_cpu(cpu))
@@ -45,32 +45,32 @@ static cputime64_t get_iowait_time(int cpu)
static u64 get_idle_time(int cpu)
{
- u64 idle, idle_time = -1ULL;
+ u64 idle, idle_usecs = -1ULL;
if (cpu_online(cpu))
- idle_time = get_cpu_idle_time_us(cpu, NULL);
+ idle_usecs = get_cpu_idle_time_us(cpu, NULL);
- if (idle_time == -1ULL)
+ if (idle_usecs == -1ULL)
/* !NO_HZ or cpu offline so we can rely on cpustat.idle */
idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
else
- idle = usecs_to_cputime64(idle_time);
+ idle = idle_usecs * NSEC_PER_USEC;
return idle;
}
static u64 get_iowait_time(int cpu)
{
- u64 iowait, iowait_time = -1ULL;
+ u64 iowait, iowait_usecs = -1ULL;
if (cpu_online(cpu))
- iowait_time = get_cpu_iowait_time_us(cpu, NULL);
+ iowait_usecs = get_cpu_iowait_time_us(cpu, NULL);
- if (iowait_time == -1ULL)
+ if (iowait_usecs == -1ULL)
/* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
else
- iowait = usecs_to_cputime64(iowait_time);
+ iowait = iowait_usecs * NSEC_PER_USEC;
return iowait;
}
@@ -115,16 +115,16 @@ static int show_stat(struct seq_file *p, void *v)
}
sum += arch_irq_stat();
- seq_put_decimal_ull(p, "cpu ", cputime64_to_clock_t(user));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
+ seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(user));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(system));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice));
seq_putc(p, '\n');
for_each_online_cpu(i) {
@@ -140,16 +140,16 @@ static int show_stat(struct seq_file *p, void *v)
guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
seq_printf(p, "cpu%d", i);
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(user));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(user));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(system));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice));
seq_putc(p, '\n');
}
seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8f96a49178d0..ee3efb229ef6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -167,7 +167,7 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
return ERR_PTR(-ESRCH);
mm = priv->mm;
- if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+ if (!mm || !mmget_not_zero(mm))
return NULL;
down_read(&mm->mmap_sem);
@@ -1352,7 +1352,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long end_vaddr;
int ret = 0, copied = 0;
- if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+ if (!mm || !mmget_not_zero(mm))
goto out;
ret = -EINVAL;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 37175621e890..1ef97cfcf422 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -219,7 +219,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
return ERR_PTR(-ESRCH);
mm = priv->mm;
- if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+ if (!mm || !mmget_not_zero(mm))
return NULL;
down_read(&mm->mmap_sem);
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 33de567c25af..7981c4ffe787 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -5,23 +5,20 @@
#include <linux/seq_file.h>
#include <linux/time.h>
#include <linux/kernel_stat.h>
-#include <linux/cputime.h>
static int uptime_proc_show(struct seq_file *m, void *v)
{
struct timespec uptime;
struct timespec idle;
- u64 idletime;
u64 nsec;
u32 rem;
int i;
- idletime = 0;
+ nsec = 0;
for_each_possible_cpu(i)
- idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
+ nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
get_monotonic_boottime(&uptime);
- nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
idle.tv_nsec = rem;
seq_printf(m, "%lu.%02lu %lu.%02lu\n",
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 5105b1599981..885d445afa0d 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -265,10 +265,10 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
* On s390 the fault handler is used for memory regions that can't be mapped
* directly with remap_pfn_range().
*/
-static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int mmap_vmcore_fault(struct vm_fault *vmf)
{
#ifdef CONFIG_S390
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
pgoff_t index = vmf->pgoff;
struct page *page;
loff_t offset;
@@ -388,7 +388,7 @@ static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
}
return 0;
fail:
- do_munmap(vma->vm_mm, from, len);
+ do_munmap(vma->vm_mm, from, len, NULL);
return -EAGAIN;
}
@@ -481,7 +481,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
return 0;
fail:
- do_munmap(vma->vm_mm, vma->vm_start, len);
+ do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
return -EAGAIN;
}
#else