summaryrefslogtreecommitdiff
path: root/ipc
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-01-05 03:17:33 +0300
committerTejun Heo <tj@kernel.org>2010-01-05 03:17:33 +0300
commit32032df6c2f6c9c6b2ada2ce42322231824f70c2 (patch)
treeb1ce838a37044bb38dfc128e2116ca35630e629a /ipc
parent22b737f4c75197372d64afc6ed1bccd58c00e549 (diff)
parentc5974b835a909ff15c3b7e6cf6789b5eb919f419 (diff)
downloadlinux-32032df6c2f6c9c6b2ada2ce42322231824f70c2.tar.xz
Merge branch 'master' into percpu
Conflicts: arch/powerpc/platforms/pseries/hvCall.S include/linux/percpu.h
Diffstat (limited to 'ipc')
-rw-r--r--ipc/ipc_sysctl.c77
-rw-r--r--ipc/mq_sysctl.c7
-rw-r--r--ipc/mqueue.c2
-rw-r--r--ipc/msg.c3
-rw-r--r--ipc/sem.c214
-rw-r--r--ipc/shm.c40
6 files changed, 171 insertions, 172 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 7d3704750efc..56410faa4550 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -129,136 +129,60 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
#define proc_ipcauto_dointvec_minmax NULL
#endif
-#ifdef CONFIG_SYSCTL_SYSCALL
-/* The generic sysctl ipc data routine. */
-static int sysctl_ipc_data(ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- size_t len;
- void *data;
-
- /* Get out of I don't have a variable */
- if (!table->data || !table->maxlen)
- return -ENOTDIR;
-
- data = get_ipc(table);
- if (!data)
- return -ENOTDIR;
-
- if (oldval && oldlenp) {
- if (get_user(len, oldlenp))
- return -EFAULT;
- if (len) {
- if (len > table->maxlen)
- len = table->maxlen;
- if (copy_to_user(oldval, data, len))
- return -EFAULT;
- if (put_user(len, oldlenp))
- return -EFAULT;
- }
- }
-
- if (newval && newlen) {
- if (newlen > table->maxlen)
- newlen = table->maxlen;
-
- if (copy_from_user(data, newval, newlen))
- return -EFAULT;
- }
- return 1;
-}
-
-static int sysctl_ipc_registered_data(ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- int rc;
-
- rc = sysctl_ipc_data(table, oldval, oldlenp, newval, newlen);
-
- if (newval && newlen && rc > 0)
- /*
- * Tunable has successfully been changed from userland
- */
- unregister_ipcns_notifier(current->nsproxy->ipc_ns);
-
- return rc;
-}
-#else
-#define sysctl_ipc_data NULL
-#define sysctl_ipc_registered_data NULL
-#endif
-
static int zero;
static int one = 1;
static struct ctl_table ipc_kern_table[] = {
{
- .ctl_name = KERN_SHMMAX,
.procname = "shmmax",
.data = &init_ipc_ns.shm_ctlmax,
.maxlen = sizeof (init_ipc_ns.shm_ctlmax),
.mode = 0644,
.proc_handler = proc_ipc_doulongvec_minmax,
- .strategy = sysctl_ipc_data,
},
{
- .ctl_name = KERN_SHMALL,
.procname = "shmall",
.data = &init_ipc_ns.shm_ctlall,
.maxlen = sizeof (init_ipc_ns.shm_ctlall),
.mode = 0644,
.proc_handler = proc_ipc_doulongvec_minmax,
- .strategy = sysctl_ipc_data,
},
{
- .ctl_name = KERN_SHMMNI,
.procname = "shmmni",
.data = &init_ipc_ns.shm_ctlmni,
.maxlen = sizeof (init_ipc_ns.shm_ctlmni),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
- .strategy = sysctl_ipc_data,
},
{
- .ctl_name = KERN_MSGMAX,
.procname = "msgmax",
.data = &init_ipc_ns.msg_ctlmax,
.maxlen = sizeof (init_ipc_ns.msg_ctlmax),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
- .strategy = sysctl_ipc_data,
},
{
- .ctl_name = KERN_MSGMNI,
.procname = "msgmni",
.data = &init_ipc_ns.msg_ctlmni,
.maxlen = sizeof (init_ipc_ns.msg_ctlmni),
.mode = 0644,
.proc_handler = proc_ipc_callback_dointvec,
- .strategy = sysctl_ipc_registered_data,
},
{
- .ctl_name = KERN_MSGMNB,
.procname = "msgmnb",
.data = &init_ipc_ns.msg_ctlmnb,
.maxlen = sizeof (init_ipc_ns.msg_ctlmnb),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
- .strategy = sysctl_ipc_data,
},
{
- .ctl_name = KERN_SEM,
.procname = "sem",
.data = &init_ipc_ns.sem_ctls,
.maxlen = 4*sizeof (int),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
- .strategy = sysctl_ipc_data,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "auto_msgmni",
.data = &init_ipc_ns.auto_msgmni,
.maxlen = sizeof(int),
@@ -272,7 +196,6 @@ static struct ctl_table ipc_kern_table[] = {
static struct ctl_table ipc_root_table[] = {
{
- .ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = ipc_kern_table,
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 8a058711fc10..0c09366b96f3 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -88,7 +88,7 @@ static ctl_table mq_sysctls[] = {
.extra1 = &msg_maxsize_limit_min,
.extra2 = &msg_maxsize_limit_max,
},
- { .ctl_name = 0 }
+ {}
};
static ctl_table mq_sysctl_dir[] = {
@@ -97,17 +97,16 @@ static ctl_table mq_sysctl_dir[] = {
.mode = 0555,
.child = mq_sysctls,
},
- { .ctl_name = 0 }
+ {}
};
static ctl_table mq_sysctl_root[] = {
{
- .ctl_name = CTL_FS,
.procname = "fs",
.mode = 0555,
.child = mq_sysctl_dir,
},
- { .ctl_name = 0 }
+ {}
};
struct ctl_table_header *mq_register_sysctl_table(void)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index ee9d69707c0a..c79bd57353e7 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -32,7 +32,6 @@
#include <linux/nsproxy.h>
#include <linux/pid.h>
#include <linux/ipc_namespace.h>
-#include <linux/ima.h>
#include <net/sock.h>
#include "util.h"
@@ -734,7 +733,6 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
error = PTR_ERR(filp);
goto out_putfd;
}
- ima_counts_get(filp);
fd_install(fd, filp);
goto out_upsem;
diff --git a/ipc/msg.c b/ipc/msg.c
index 2ceab7f12fcb..af42ef8900a6 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -125,6 +125,7 @@ void msg_init_ns(struct ipc_namespace *ns)
void msg_exit_ns(struct ipc_namespace *ns)
{
free_ipcs(ns, &msg_ids(ns), freeque);
+ idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
}
#endif
@@ -412,7 +413,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
struct msqid_ds __user *buf, int version)
{
struct kern_ipc_perm *ipcp;
- struct msqid64_ds msqid64;
+ struct msqid64_ds uninitialized_var(msqid64);
struct msg_queue *msq;
int err;
diff --git a/ipc/sem.c b/ipc/sem.c
index 87c2b641fd7b..dbef95b15941 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -129,6 +129,7 @@ void sem_init_ns(struct ipc_namespace *ns)
void sem_exit_ns(struct ipc_namespace *ns)
{
free_ipcs(ns, &sem_ids(ns), freeary);
+ idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
}
#endif
@@ -240,6 +241,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
key_t key = params->key;
int nsems = params->u.nsems;
int semflg = params->flg;
+ int i;
if (!nsems)
return -EINVAL;
@@ -272,6 +274,11 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
ns->used_sems += nsems;
sma->sem_base = (struct sem *) &sma[1];
+
+ for (i = 0; i < nsems; i++)
+ INIT_LIST_HEAD(&sma->sem_base[i].sem_pending);
+
+ sma->complex_count = 0;
INIT_LIST_HEAD(&sma->sem_pending);
INIT_LIST_HEAD(&sma->list_id);
sma->sem_nsems = nsems;
@@ -397,63 +404,109 @@ undo:
return result;
}
-/* Go through the pending queue for the indicated semaphore
- * looking for tasks that can be completed.
+/*
+ * Wake up a process waiting on the sem queue with a given error.
+ * The queue is invalid (may not be accessed) after the function returns.
*/
-static void update_queue (struct sem_array * sma)
+static void wake_up_sem_queue(struct sem_queue *q, int error)
{
- int error;
- struct sem_queue * q;
+ /*
+ * Hold preempt off so that we don't get preempted and have the
+ * wakee busy-wait until we're scheduled back on. We're holding
+ * locks here so it may not strictly be needed, however if the
+ * locks become preemptible then this prevents such a problem.
+ */
+ preempt_disable();
+ q->status = IN_WAKEUP;
+ wake_up_process(q->sleeper);
+ /* hands-off: q can disappear immediately after writing q->status. */
+ smp_wmb();
+ q->status = error;
+ preempt_enable();
+}
+
+static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
+{
+ list_del(&q->list);
+ if (q->nsops == 1)
+ list_del(&q->simple_list);
+ else
+ sma->complex_count--;
+}
+
+
+/**
+ * update_queue(sma, semnum): Look for tasks that can be completed.
+ * @sma: semaphore array.
+ * @semnum: semaphore that was modified.
+ *
+ * update_queue must be called after a semaphore in a semaphore array
+ * was modified. If multiple semaphore were modified, then @semnum
+ * must be set to -1.
+ */
+static void update_queue(struct sem_array *sma, int semnum)
+{
+ struct sem_queue *q;
+ struct list_head *walk;
+ struct list_head *pending_list;
+ int offset;
+
+ /* if there are complex operations around, then knowing the semaphore
+ * that was modified doesn't help us. Assume that multiple semaphores
+ * were modified.
+ */
+ if (sma->complex_count)
+ semnum = -1;
+
+ if (semnum == -1) {
+ pending_list = &sma->sem_pending;
+ offset = offsetof(struct sem_queue, list);
+ } else {
+ pending_list = &sma->sem_base[semnum].sem_pending;
+ offset = offsetof(struct sem_queue, simple_list);
+ }
+
+again:
+ walk = pending_list->next;
+ while (walk != pending_list) {
+ int error, alter;
+
+ q = (struct sem_queue *)((char *)walk - offset);
+ walk = walk->next;
+
+ /* If we are scanning the single sop, per-semaphore list of
+ * one semaphore and that semaphore is 0, then it is not
+ * necessary to scan the "alter" entries: simple increments
+ * that affect only one entry succeed immediately and cannot
+ * be in the per semaphore pending queue, and decrements
+ * cannot be successful if the value is already 0.
+ */
+ if (semnum != -1 && sma->sem_base[semnum].semval == 0 &&
+ q->alter)
+ break;
- q = list_entry(sma->sem_pending.next, struct sem_queue, list);
- while (&q->list != &sma->sem_pending) {
error = try_atomic_semop(sma, q->sops, q->nsops,
q->undo, q->pid);
/* Does q->sleeper still need to sleep? */
- if (error <= 0) {
- struct sem_queue *n;
-
- /*
- * Continue scanning. The next operation
- * that must be checked depends on the type of the
- * completed operation:
- * - if the operation modified the array, then
- * restart from the head of the queue and
- * check for threads that might be waiting
- * for semaphore values to become 0.
- * - if the operation didn't modify the array,
- * then just continue.
- * The order of list_del() and reading ->next
- * is crucial: In the former case, the list_del()
- * must be done first [because we might be the
- * first entry in ->sem_pending], in the latter
- * case the list_del() must be done last
- * [because the list is invalid after the list_del()]
- */
- if (q->alter) {
- list_del(&q->list);
- n = list_entry(sma->sem_pending.next,
- struct sem_queue, list);
- } else {
- n = list_entry(q->list.next, struct sem_queue,
- list);
- list_del(&q->list);
- }
-
- /* wake up the waiting thread */
- q->status = IN_WAKEUP;
+ if (error > 0)
+ continue;
- wake_up_process(q->sleeper);
- /* hands-off: q will disappear immediately after
- * writing q->status.
- */
- smp_wmb();
- q->status = error;
- q = n;
- } else {
- q = list_entry(q->list.next, struct sem_queue, list);
- }
+ unlink_queue(sma, q);
+
+ /*
+ * The next operation that must be checked depends on the type
+ * of the completed operation:
+ * - if the operation modified the array, then restart from the
+ * head of the queue and check for threads that might be
+ * waiting for the new semaphore values.
+ * - if the operation didn't modify the array, then just
+ * continue.
+ */
+ alter = q->alter;
+ wake_up_sem_queue(q, error);
+ if (alter && !error)
+ goto again;
}
}
@@ -533,12 +586,8 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
/* Wake up all pending processes and let them fail with EIDRM. */
list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
- list_del(&q->list);
-
- q->status = IN_WAKEUP;
- wake_up_process(q->sleeper); /* doesn't sleep */
- smp_wmb();
- q->status = -EIDRM; /* hands-off q */
+ unlink_queue(sma, q);
+ wake_up_sem_queue(q, -EIDRM);
}
/* Remove the semaphore set from the IDR */
@@ -575,7 +624,7 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in,
static int semctl_nolock(struct ipc_namespace *ns, int semid,
int cmd, int version, union semun arg)
{
- int err = -EINVAL;
+ int err;
struct sem_array *sma;
switch(cmd) {
@@ -652,7 +701,6 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
default:
return -EINVAL;
}
- return err;
out_unlock:
sem_unlock(sma);
return err;
@@ -759,7 +807,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
}
sma->sem_ctime = get_seconds();
/* maybe some queued-up processes were waiting for this */
- update_queue(sma);
+ update_queue(sma, -1);
err = 0;
goto out_unlock;
}
@@ -801,7 +849,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
curr->sempid = task_tgid_vnr(current);
sma->sem_ctime = get_seconds();
/* maybe some queued-up processes were waiting for this */
- update_queue(sma);
+ update_queue(sma, semnum);
err = 0;
goto out_unlock;
}
@@ -961,17 +1009,31 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
return 0;
}
-static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
+static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid)
{
- struct sem_undo *walk;
+ struct sem_undo *un;
- list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) {
- if (walk->semid == semid)
- return walk;
+ list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {
+ if (un->semid == semid)
+ return un;
}
return NULL;
}
+static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
+{
+ struct sem_undo *un;
+
+ assert_spin_locked(&ulp->lock);
+
+ un = __lookup_undo(ulp, semid);
+ if (un) {
+ list_del_rcu(&un->list_proc);
+ list_add_rcu(&un->list_proc, &ulp->list_proc);
+ }
+ return un;
+}
+
/**
* find_alloc_undo - Lookup (and if not present create) undo array
* @ns: namespace
@@ -1163,7 +1225,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
if (error <= 0) {
if (alter && error == 0)
- update_queue (sma);
+ update_queue(sma, (nsops == 1) ? sops[0].sem_num : -1);
+
goto out_unlock_free;
}
@@ -1181,6 +1244,19 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
else
list_add(&queue.list, &sma->sem_pending);
+ if (nsops == 1) {
+ struct sem *curr;
+ curr = &sma->sem_base[sops->sem_num];
+
+ if (alter)
+ list_add_tail(&queue.simple_list, &curr->sem_pending);
+ else
+ list_add(&queue.simple_list, &curr->sem_pending);
+ } else {
+ INIT_LIST_HEAD(&queue.simple_list);
+ sma->complex_count++;
+ }
+
queue.status = -EINTR;
queue.sleeper = current;
current->state = TASK_INTERRUPTIBLE;
@@ -1222,7 +1298,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
*/
if (timeout && jiffies_left == 0)
error = -EAGAIN;
- list_del(&queue.list);
+ unlink_queue(sma, &queue);
out_unlock_free:
sem_unlock(sma);
@@ -1307,7 +1383,7 @@ void exit_sem(struct task_struct *tsk)
if (IS_ERR(sma))
continue;
- un = lookup_undo(ulp, semid);
+ un = __lookup_undo(ulp, semid);
if (un == NULL) {
/* exit_sem raced with IPC_RMID+semget() that created
* exactly the same semid. Nothing to do.
@@ -1351,7 +1427,7 @@ void exit_sem(struct task_struct *tsk)
}
sma->sem_otime = get_seconds();
/* maybe some queued-up processes were waiting for this */
- update_queue(sma);
+ update_queue(sma, -1);
sem_unlock(sma);
call_rcu(&un->rcu, free_un);
@@ -1365,7 +1441,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
struct sem_array *sma = it;
return seq_printf(s,
- "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
+ "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n",
sma->sem_perm.key,
sma->sem_perm.id,
sma->sem_perm.mode,
diff --git a/ipc/shm.c b/ipc/shm.c
index 464694e0aa4a..92fe9236258b 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -39,7 +39,6 @@
#include <linux/nsproxy.h>
#include <linux/mount.h>
#include <linux/ipc_namespace.h>
-#include <linux/ima.h>
#include <asm/uaccess.h>
@@ -101,6 +100,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
void shm_exit_ns(struct ipc_namespace *ns)
{
free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
+ idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
}
#endif
@@ -290,28 +290,28 @@ static unsigned long shm_get_unmapped_area(struct file *file,
unsigned long flags)
{
struct shm_file_data *sfd = shm_file_data(file);
- return get_unmapped_area(sfd->file, addr, len, pgoff, flags);
-}
-
-int is_file_shm_hugepages(struct file *file)
-{
- int ret = 0;
-
- if (file->f_op == &shm_file_operations) {
- struct shm_file_data *sfd;
- sfd = shm_file_data(file);
- ret = is_file_hugepages(sfd->file);
- }
- return ret;
+ return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
+ pgoff, flags);
}
static const struct file_operations shm_file_operations = {
.mmap = shm_mmap,
.fsync = shm_fsync,
.release = shm_release,
+};
+
+static const struct file_operations shm_file_operations_huge = {
+ .mmap = shm_mmap,
+ .fsync = shm_fsync,
+ .release = shm_release,
.get_unmapped_area = shm_get_unmapped_area,
};
+int is_file_shm_hugepages(struct file *file)
+{
+ return file->f_op == &shm_file_operations_huge;
+}
+
static const struct vm_operations_struct shm_vm_ops = {
.open = shm_open, /* callback for a new vm-area open */
.close = shm_close, /* callback for when the vm-area is released */
@@ -878,8 +878,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
if (err)
goto out_unlock;
- path.dentry = dget(shp->shm_file->f_path.dentry);
- path.mnt = shp->shm_file->f_path.mnt;
+ path = shp->shm_file->f_path;
+ path_get(&path);
shp->shm_nattch++;
size = i_size_read(path.dentry->d_inode);
shm_unlock(shp);
@@ -889,10 +889,12 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
if (!sfd)
goto out_put_dentry;
- file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations);
+ file = alloc_file(&path, f_mode,
+ is_file_hugepages(shp->shm_file) ?
+ &shm_file_operations_huge :
+ &shm_file_operations);
if (!file)
goto out_free;
- ima_counts_get(file);
file->private_data = sfd;
file->f_mapping = shp->shm_file->f_mapping;
@@ -947,7 +949,7 @@ out_unlock:
out_free:
kfree(sfd);
out_put_dentry:
- dput(path.dentry);
+ path_put(&path);
goto out_nattch;
}