summaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
Diffstat (limited to 'ipc')
-rw-r--r--ipc/Makefile2
-rw-r--r--ipc/ipc_sysctl.c77
-rw-r--r--ipc/mq_sysctl.c7
-rw-r--r--ipc/mqueue.c124
-rw-r--r--ipc/msg.c3
-rw-r--r--ipc/sem.c214
-rw-r--r--ipc/shm.c46
-rw-r--r--ipc/syscall.c99
8 files changed, 348 insertions, 224 deletions
diff --git a/ipc/Makefile b/ipc/Makefile
index 4e1955ea815d..9075e172e52c 100644
--- a/ipc/Makefile
+++ b/ipc/Makefile
@@ -3,7 +3,7 @@
#
obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o
-obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o
+obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o syscall.o
obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o
obj_mq-$(CONFIG_COMPAT) += compat_mq.o
obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 7d3704750efc..56410faa4550 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -129,136 +129,60 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
#define proc_ipcauto_dointvec_minmax NULL
#endif
-#ifdef CONFIG_SYSCTL_SYSCALL
-/* The generic sysctl ipc data routine. */
-static int sysctl_ipc_data(ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- size_t len;
- void *data;
-
- /* Get out of I don't have a variable */
- if (!table->data || !table->maxlen)
- return -ENOTDIR;
-
- data = get_ipc(table);
- if (!data)
- return -ENOTDIR;
-
- if (oldval && oldlenp) {
- if (get_user(len, oldlenp))
- return -EFAULT;
- if (len) {
- if (len > table->maxlen)
- len = table->maxlen;
- if (copy_to_user(oldval, data, len))
- return -EFAULT;
- if (put_user(len, oldlenp))
- return -EFAULT;
- }
- }
-
- if (newval && newlen) {
- if (newlen > table->maxlen)
- newlen = table->maxlen;
-
- if (copy_from_user(data, newval, newlen))
- return -EFAULT;
- }
- return 1;
-}
-
-static int sysctl_ipc_registered_data(ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- int rc;
-
- rc = sysctl_ipc_data(table, oldval, oldlenp, newval, newlen);
-
- if (newval && newlen && rc > 0)
- /*
- * Tunable has successfully been changed from userland
- */
- unregister_ipcns_notifier(current->nsproxy->ipc_ns);
-
- return rc;
-}
-#else
-#define sysctl_ipc_data NULL
-#define sysctl_ipc_registered_data NULL
-#endif
-
static int zero;
static int one = 1;
static struct ctl_table ipc_kern_table[] = {
{
- .ctl_name = KERN_SHMMAX,
.procname = "shmmax",
.data = &init_ipc_ns.shm_ctlmax,
.maxlen = sizeof (init_ipc_ns.shm_ctlmax),
.mode = 0644,
.proc_handler = proc_ipc_doulongvec_minmax,
- .strategy = sysctl_ipc_data,
},
{
- .ctl_name = KERN_SHMALL,
.procname = "shmall",
.data = &init_ipc_ns.shm_ctlall,
.maxlen = sizeof (init_ipc_ns.shm_ctlall),
.mode = 0644,
.proc_handler = proc_ipc_doulongvec_minmax,
- .strategy = sysctl_ipc_data,
},
{
- .ctl_name = KERN_SHMMNI,
.procname = "shmmni",
.data = &init_ipc_ns.shm_ctlmni,
.maxlen = sizeof (init_ipc_ns.shm_ctlmni),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
- .strategy = sysctl_ipc_data,
},
{
- .ctl_name = KERN_MSGMAX,
.procname = "msgmax",
.data = &init_ipc_ns.msg_ctlmax,
.maxlen = sizeof (init_ipc_ns.msg_ctlmax),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
- .strategy = sysctl_ipc_data,
},
{
- .ctl_name = KERN_MSGMNI,
.procname = "msgmni",
.data = &init_ipc_ns.msg_ctlmni,
.maxlen = sizeof (init_ipc_ns.msg_ctlmni),
.mode = 0644,
.proc_handler = proc_ipc_callback_dointvec,
- .strategy = sysctl_ipc_registered_data,
},
{
- .ctl_name = KERN_MSGMNB,
.procname = "msgmnb",
.data = &init_ipc_ns.msg_ctlmnb,
.maxlen = sizeof (init_ipc_ns.msg_ctlmnb),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
- .strategy = sysctl_ipc_data,
},
{
- .ctl_name = KERN_SEM,
.procname = "sem",
.data = &init_ipc_ns.sem_ctls,
.maxlen = 4*sizeof (int),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
- .strategy = sysctl_ipc_data,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "auto_msgmni",
.data = &init_ipc_ns.auto_msgmni,
.maxlen = sizeof(int),
@@ -272,7 +196,6 @@ static struct ctl_table ipc_kern_table[] = {
static struct ctl_table ipc_root_table[] = {
{
- .ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = ipc_kern_table,
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 8a058711fc10..0c09366b96f3 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -88,7 +88,7 @@ static ctl_table mq_sysctls[] = {
.extra1 = &msg_maxsize_limit_min,
.extra2 = &msg_maxsize_limit_max,
},
- { .ctl_name = 0 }
+ {}
};
static ctl_table mq_sysctl_dir[] = {
@@ -97,17 +97,16 @@ static ctl_table mq_sysctl_dir[] = {
.mode = 0555,
.child = mq_sysctls,
},
- { .ctl_name = 0 }
+ {}
};
static ctl_table mq_sysctl_root[] = {
{
- .ctl_name = CTL_FS,
.procname = "fs",
.mode = 0555,
.child = mq_sysctl_dir,
},
- { .ctl_name = 0 }
+ {}
};
struct ctl_table_header *mq_register_sysctl_table(void)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index ee9d69707c0a..e4e3f04803ca 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -32,7 +32,6 @@
#include <linux/nsproxy.h>
#include <linux/pid.h>
#include <linux/ipc_namespace.h>
-#include <linux/ima.h>
#include <net/sock.h>
#include "util.h"
@@ -135,7 +134,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
init_waitqueue_head(&info->wait_q);
INIT_LIST_HEAD(&info->e_wait_q[0].list);
INIT_LIST_HEAD(&info->e_wait_q[1].list);
- info->messages = NULL;
info->notify_owner = NULL;
info->qsize = 0;
info->user = NULL; /* set when all is ok */
@@ -147,26 +145,24 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
info->attr.mq_msgsize = attr->mq_msgsize;
}
mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
+ info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
+ if (!info->messages)
+ goto out_inode;
+
mq_bytes = (mq_msg_tblsz +
(info->attr.mq_maxmsg * info->attr.mq_msgsize));
spin_lock(&mq_lock);
if (u->mq_bytes + mq_bytes < u->mq_bytes ||
u->mq_bytes + mq_bytes >
- p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) {
+ task_rlimit(p, RLIMIT_MSGQUEUE)) {
spin_unlock(&mq_lock);
+ kfree(info->messages);
goto out_inode;
}
u->mq_bytes += mq_bytes;
spin_unlock(&mq_lock);
- info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
- if (!info->messages) {
- spin_lock(&mq_lock);
- u->mq_bytes -= mq_bytes;
- spin_unlock(&mq_lock);
- goto out_inode;
- }
/* all is ok */
info->user = get_uid(u);
} else if (S_ISDIR(mode)) {
@@ -188,7 +184,7 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
struct ipc_namespace *ns = data;
- int error = 0;
+ int error;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
@@ -206,7 +202,9 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
if (!sb->s_root) {
iput(inode);
error = -ENOMEM;
+ goto out;
}
+ error = 0;
out:
return error;
@@ -265,8 +263,9 @@ static void mqueue_delete_inode(struct inode *inode)
clear_inode(inode);
- mq_bytes = (info->attr.mq_maxmsg * sizeof(struct msg_msg *) +
- (info->attr.mq_maxmsg * info->attr.mq_msgsize));
+ /* Total amount of bytes accounted for the mqueue */
+ mq_bytes = info->attr.mq_maxmsg * (sizeof(struct msg_msg *)
+ + info->attr.mq_msgsize);
user = info->user;
if (user) {
spin_lock(&mq_lock);
@@ -605,8 +604,8 @@ static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr)
/* check for overflow */
if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg)
return 0;
- if ((unsigned long)(attr->mq_maxmsg * attr->mq_msgsize) +
- (attr->mq_maxmsg * sizeof (struct msg_msg *)) <
+ if ((unsigned long)(attr->mq_maxmsg * (attr->mq_msgsize
+ + sizeof (struct msg_msg *))) <
(unsigned long)(attr->mq_maxmsg * attr->mq_msgsize))
return 0;
return 1;
@@ -624,9 +623,10 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
int ret;
if (attr) {
- ret = -EINVAL;
- if (!mq_attr_ok(ipc_ns, attr))
+ if (!mq_attr_ok(ipc_ns, attr)) {
+ ret = -EINVAL;
goto out;
+ }
/* store for use during create */
dentry->d_fsdata = attr;
}
@@ -660,24 +660,28 @@ out:
static struct file *do_open(struct ipc_namespace *ipc_ns,
struct dentry *dentry, int oflag)
{
+ int ret;
const struct cred *cred = current_cred();
static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
MAY_READ | MAY_WRITE };
if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
- dput(dentry);
- mntput(ipc_ns->mq_mnt);
- return ERR_PTR(-EINVAL);
+ ret = -EINVAL;
+ goto err;
}
if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
- dput(dentry);
- mntput(ipc_ns->mq_mnt);
- return ERR_PTR(-EACCES);
+ ret = -EACCES;
+ goto err;
}
return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
+
+err:
+ dput(dentry);
+ mntput(ipc_ns->mq_mnt);
+ return ERR_PTR(ret);
}
SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
@@ -706,16 +710,17 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
if (IS_ERR(dentry)) {
error = PTR_ERR(dentry);
- goto out_err;
+ goto out_putfd;
}
mntget(ipc_ns->mq_mnt);
if (oflag & O_CREAT) {
if (dentry->d_inode) { /* entry already exists */
audit_inode(name, dentry);
- error = -EEXIST;
- if (oflag & O_EXCL)
+ if (oflag & O_EXCL) {
+ error = -EEXIST;
goto out;
+ }
filp = do_open(ipc_ns, dentry, oflag);
} else {
filp = do_create(ipc_ns, ipc_ns->mq_mnt->mnt_root,
@@ -723,9 +728,10 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
u_attr ? &attr : NULL);
}
} else {
- error = -ENOENT;
- if (!dentry->d_inode)
+ if (!dentry->d_inode) {
+ error = -ENOENT;
goto out;
+ }
audit_inode(name, dentry);
filp = do_open(ipc_ns, dentry, oflag);
}
@@ -734,7 +740,6 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
error = PTR_ERR(filp);
goto out_putfd;
}
- ima_counts_get(filp);
fd_install(fd, filp);
goto out_upsem;
@@ -744,7 +749,6 @@ out:
mntput(ipc_ns->mq_mnt);
out_putfd:
put_unused_fd(fd);
-out_err:
fd = error;
out_upsem:
mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
@@ -874,19 +878,24 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
audit_mq_sendrecv(mqdes, msg_len, msg_prio, p);
timeout = prepare_timeout(p);
- ret = -EBADF;
filp = fget(mqdes);
- if (unlikely(!filp))
+ if (unlikely(!filp)) {
+ ret = -EBADF;
goto out;
+ }
inode = filp->f_path.dentry->d_inode;
- if (unlikely(filp->f_op != &mqueue_file_operations))
+ if (unlikely(filp->f_op != &mqueue_file_operations)) {
+ ret = -EBADF;
goto out_fput;
+ }
info = MQUEUE_I(inode);
audit_inode(NULL, filp->f_path.dentry);
- if (unlikely(!(filp->f_mode & FMODE_WRITE)))
+ if (unlikely(!(filp->f_mode & FMODE_WRITE))) {
+ ret = -EBADF;
goto out_fput;
+ }
if (unlikely(msg_len > info->attr.mq_msgsize)) {
ret = -EMSGSIZE;
@@ -963,19 +972,24 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
audit_mq_sendrecv(mqdes, msg_len, 0, p);
timeout = prepare_timeout(p);
- ret = -EBADF;
filp = fget(mqdes);
- if (unlikely(!filp))
+ if (unlikely(!filp)) {
+ ret = -EBADF;
goto out;
+ }
inode = filp->f_path.dentry->d_inode;
- if (unlikely(filp->f_op != &mqueue_file_operations))
+ if (unlikely(filp->f_op != &mqueue_file_operations)) {
+ ret = -EBADF;
goto out_fput;
+ }
info = MQUEUE_I(inode);
audit_inode(NULL, filp->f_path.dentry);
- if (unlikely(!(filp->f_mode & FMODE_READ)))
+ if (unlikely(!(filp->f_mode & FMODE_READ))) {
+ ret = -EBADF;
goto out_fput;
+ }
/* checks if buffer is big enough */
if (unlikely(msg_len < info->attr.mq_msgsize)) {
@@ -1065,13 +1079,14 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
/* create the notify skb */
nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL);
- ret = -ENOMEM;
- if (!nc)
+ if (!nc) {
+ ret = -ENOMEM;
goto out;
- ret = -EFAULT;
+ }
if (copy_from_user(nc->data,
notification.sigev_value.sival_ptr,
NOTIFY_COOKIE_LEN)) {
+ ret = -EFAULT;
goto out;
}
@@ -1080,9 +1095,10 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
/* and attach it to the socket */
retry:
filp = fget(notification.sigev_signo);
- ret = -EBADF;
- if (!filp)
+ if (!filp) {
+ ret = -EBADF;
goto out;
+ }
sock = netlink_getsockbyfilp(filp);
fput(filp);
if (IS_ERR(sock)) {
@@ -1094,7 +1110,7 @@ retry:
timeo = MAX_SCHEDULE_TIMEOUT;
ret = netlink_attachskb(sock, nc, &timeo, NULL);
if (ret == 1)
- goto retry;
+ goto retry;
if (ret) {
sock = NULL;
nc = NULL;
@@ -1103,14 +1119,17 @@ retry:
}
}
- ret = -EBADF;
filp = fget(mqdes);
- if (!filp)
+ if (!filp) {
+ ret = -EBADF;
goto out;
+ }
inode = filp->f_path.dentry->d_inode;
- if (unlikely(filp->f_op != &mqueue_file_operations))
+ if (unlikely(filp->f_op != &mqueue_file_operations)) {
+ ret = -EBADF;
goto out_fput;
+ }
info = MQUEUE_I(inode);
ret = 0;
@@ -1173,14 +1192,17 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
return -EINVAL;
}
- ret = -EBADF;
filp = fget(mqdes);
- if (!filp)
+ if (!filp) {
+ ret = -EBADF;
goto out;
+ }
inode = filp->f_path.dentry->d_inode;
- if (unlikely(filp->f_op != &mqueue_file_operations))
+ if (unlikely(filp->f_op != &mqueue_file_operations)) {
+ ret = -EBADF;
goto out_fput;
+ }
info = MQUEUE_I(inode);
spin_lock(&info->lock);
@@ -1274,7 +1296,7 @@ static int __init init_mqueue_fs(void)
if (mqueue_inode_cachep == NULL)
return -ENOMEM;
- /* ignore failues - they are not fatal */
+ /* ignore failures - they are not fatal */
mq_sysctl_table = mq_register_sysctl_table();
error = register_filesystem(&mqueue_fs_type);
diff --git a/ipc/msg.c b/ipc/msg.c
index 2ceab7f12fcb..af42ef8900a6 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -125,6 +125,7 @@ void msg_init_ns(struct ipc_namespace *ns)
void msg_exit_ns(struct ipc_namespace *ns)
{
free_ipcs(ns, &msg_ids(ns), freeque);
+ idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
}
#endif
@@ -412,7 +413,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
struct msqid_ds __user *buf, int version)
{
struct kern_ipc_perm *ipcp;
- struct msqid64_ds msqid64;
+ struct msqid64_ds uninitialized_var(msqid64);
struct msg_queue *msq;
int err;
diff --git a/ipc/sem.c b/ipc/sem.c
index 87c2b641fd7b..dbef95b15941 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -129,6 +129,7 @@ void sem_init_ns(struct ipc_namespace *ns)
void sem_exit_ns(struct ipc_namespace *ns)
{
free_ipcs(ns, &sem_ids(ns), freeary);
+ idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
}
#endif
@@ -240,6 +241,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
key_t key = params->key;
int nsems = params->u.nsems;
int semflg = params->flg;
+ int i;
if (!nsems)
return -EINVAL;
@@ -272,6 +274,11 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
ns->used_sems += nsems;
sma->sem_base = (struct sem *) &sma[1];
+
+ for (i = 0; i < nsems; i++)
+ INIT_LIST_HEAD(&sma->sem_base[i].sem_pending);
+
+ sma->complex_count = 0;
INIT_LIST_HEAD(&sma->sem_pending);
INIT_LIST_HEAD(&sma->list_id);
sma->sem_nsems = nsems;
@@ -397,63 +404,109 @@ undo:
return result;
}
-/* Go through the pending queue for the indicated semaphore
- * looking for tasks that can be completed.
+/*
+ * Wake up a process waiting on the sem queue with a given error.
+ * The queue is invalid (may not be accessed) after the function returns.
*/
-static void update_queue (struct sem_array * sma)
+static void wake_up_sem_queue(struct sem_queue *q, int error)
{
- int error;
- struct sem_queue * q;
+ /*
+ * Hold preempt off so that we don't get preempted and have the
+ * wakee busy-wait until we're scheduled back on. We're holding
+ * locks here so it may not strictly be needed, however if the
+ * locks become preemptible then this prevents such a problem.
+ */
+ preempt_disable();
+ q->status = IN_WAKEUP;
+ wake_up_process(q->sleeper);
+ /* hands-off: q can disappear immediately after writing q->status. */
+ smp_wmb();
+ q->status = error;
+ preempt_enable();
+}
+
+static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
+{
+ list_del(&q->list);
+ if (q->nsops == 1)
+ list_del(&q->simple_list);
+ else
+ sma->complex_count--;
+}
+
+
+/**
+ * update_queue(sma, semnum): Look for tasks that can be completed.
+ * @sma: semaphore array.
+ * @semnum: semaphore that was modified.
+ *
+ * update_queue must be called after a semaphore in a semaphore array
+ * was modified. If multiple semaphore were modified, then @semnum
+ * must be set to -1.
+ */
+static void update_queue(struct sem_array *sma, int semnum)
+{
+ struct sem_queue *q;
+ struct list_head *walk;
+ struct list_head *pending_list;
+ int offset;
+
+ /* if there are complex operations around, then knowing the semaphore
+ * that was modified doesn't help us. Assume that multiple semaphores
+ * were modified.
+ */
+ if (sma->complex_count)
+ semnum = -1;
+
+ if (semnum == -1) {
+ pending_list = &sma->sem_pending;
+ offset = offsetof(struct sem_queue, list);
+ } else {
+ pending_list = &sma->sem_base[semnum].sem_pending;
+ offset = offsetof(struct sem_queue, simple_list);
+ }
+
+again:
+ walk = pending_list->next;
+ while (walk != pending_list) {
+ int error, alter;
+
+ q = (struct sem_queue *)((char *)walk - offset);
+ walk = walk->next;
+
+ /* If we are scanning the single sop, per-semaphore list of
+ * one semaphore and that semaphore is 0, then it is not
+ * necessary to scan the "alter" entries: simple increments
+ * that affect only one entry succeed immediately and cannot
+ * be in the per semaphore pending queue, and decrements
+ * cannot be successful if the value is already 0.
+ */
+ if (semnum != -1 && sma->sem_base[semnum].semval == 0 &&
+ q->alter)
+ break;
- q = list_entry(sma->sem_pending.next, struct sem_queue, list);
- while (&q->list != &sma->sem_pending) {
error = try_atomic_semop(sma, q->sops, q->nsops,
q->undo, q->pid);
/* Does q->sleeper still need to sleep? */
- if (error <= 0) {
- struct sem_queue *n;
-
- /*
- * Continue scanning. The next operation
- * that must be checked depends on the type of the
- * completed operation:
- * - if the operation modified the array, then
- * restart from the head of the queue and
- * check for threads that might be waiting
- * for semaphore values to become 0.
- * - if the operation didn't modify the array,
- * then just continue.
- * The order of list_del() and reading ->next
- * is crucial: In the former case, the list_del()
- * must be done first [because we might be the
- * first entry in ->sem_pending], in the latter
- * case the list_del() must be done last
- * [because the list is invalid after the list_del()]
- */
- if (q->alter) {
- list_del(&q->list);
- n = list_entry(sma->sem_pending.next,
- struct sem_queue, list);
- } else {
- n = list_entry(q->list.next, struct sem_queue,
- list);
- list_del(&q->list);
- }
-
- /* wake up the waiting thread */
- q->status = IN_WAKEUP;
+ if (error > 0)
+ continue;
- wake_up_process(q->sleeper);
- /* hands-off: q will disappear immediately after
- * writing q->status.
- */
- smp_wmb();
- q->status = error;
- q = n;
- } else {
- q = list_entry(q->list.next, struct sem_queue, list);
- }
+ unlink_queue(sma, q);
+
+ /*
+ * The next operation that must be checked depends on the type
+ * of the completed operation:
+ * - if the operation modified the array, then restart from the
+ * head of the queue and check for threads that might be
+ * waiting for the new semaphore values.
+ * - if the operation didn't modify the array, then just
+ * continue.
+ */
+ alter = q->alter;
+ wake_up_sem_queue(q, error);
+ if (alter && !error)
+ goto again;
}
}
@@ -533,12 +586,8 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
/* Wake up all pending processes and let them fail with EIDRM. */
list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
- list_del(&q->list);
-
- q->status = IN_WAKEUP;
- wake_up_process(q->sleeper); /* doesn't sleep */
- smp_wmb();
- q->status = -EIDRM; /* hands-off q */
+ unlink_queue(sma, q);
+ wake_up_sem_queue(q, -EIDRM);
}
/* Remove the semaphore set from the IDR */
@@ -575,7 +624,7 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in,
static int semctl_nolock(struct ipc_namespace *ns, int semid,
int cmd, int version, union semun arg)
{
- int err = -EINVAL;
+ int err;
struct sem_array *sma;
switch(cmd) {
@@ -652,7 +701,6 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
default:
return -EINVAL;
}
- return err;
out_unlock:
sem_unlock(sma);
return err;
@@ -759,7 +807,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
}
sma->sem_ctime = get_seconds();
/* maybe some queued-up processes were waiting for this */
- update_queue(sma);
+ update_queue(sma, -1);
err = 0;
goto out_unlock;
}
@@ -801,7 +849,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
curr->sempid = task_tgid_vnr(current);
sma->sem_ctime = get_seconds();
/* maybe some queued-up processes were waiting for this */
- update_queue(sma);
+ update_queue(sma, semnum);
err = 0;
goto out_unlock;
}
@@ -961,17 +1009,31 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
return 0;
}
-static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
+static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid)
{
- struct sem_undo *walk;
+ struct sem_undo *un;
- list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) {
- if (walk->semid == semid)
- return walk;
+ list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {
+ if (un->semid == semid)
+ return un;
}
return NULL;
}
+static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
+{
+ struct sem_undo *un;
+
+ assert_spin_locked(&ulp->lock);
+
+ un = __lookup_undo(ulp, semid);
+ if (un) {
+ list_del_rcu(&un->list_proc);
+ list_add_rcu(&un->list_proc, &ulp->list_proc);
+ }
+ return un;
+}
+
/**
* find_alloc_undo - Lookup (and if not present create) undo array
* @ns: namespace
@@ -1163,7 +1225,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
if (error <= 0) {
if (alter && error == 0)
- update_queue (sma);
+ update_queue(sma, (nsops == 1) ? sops[0].sem_num : -1);
+
goto out_unlock_free;
}
@@ -1181,6 +1244,19 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
else
list_add(&queue.list, &sma->sem_pending);
+ if (nsops == 1) {
+ struct sem *curr;
+ curr = &sma->sem_base[sops->sem_num];
+
+ if (alter)
+ list_add_tail(&queue.simple_list, &curr->sem_pending);
+ else
+ list_add(&queue.simple_list, &curr->sem_pending);
+ } else {
+ INIT_LIST_HEAD(&queue.simple_list);
+ sma->complex_count++;
+ }
+
queue.status = -EINTR;
queue.sleeper = current;
current->state = TASK_INTERRUPTIBLE;
@@ -1222,7 +1298,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
*/
if (timeout && jiffies_left == 0)
error = -EAGAIN;
- list_del(&queue.list);
+ unlink_queue(sma, &queue);
out_unlock_free:
sem_unlock(sma);
@@ -1307,7 +1383,7 @@ void exit_sem(struct task_struct *tsk)
if (IS_ERR(sma))
continue;
- un = lookup_undo(ulp, semid);
+ un = __lookup_undo(ulp, semid);
if (un == NULL) {
/* exit_sem raced with IPC_RMID+semget() that created
* exactly the same semid. Nothing to do.
@@ -1351,7 +1427,7 @@ void exit_sem(struct task_struct *tsk)
}
sma->sem_otime = get_seconds();
/* maybe some queued-up processes were waiting for this */
- update_queue(sma);
+ update_queue(sma, -1);
sem_unlock(sma);
call_rcu(&un->rcu, free_un);
@@ -1365,7 +1441,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
struct sem_array *sma = it;
return seq_printf(s,
- "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
+ "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n",
sma->sem_perm.key,
sma->sem_perm.id,
sma->sem_perm.mode,
diff --git a/ipc/shm.c b/ipc/shm.c
index 464694e0aa4a..1a314c89f93c 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -39,7 +39,6 @@
#include <linux/nsproxy.h>
#include <linux/mount.h>
#include <linux/ipc_namespace.h>
-#include <linux/ima.h>
#include <asm/uaccess.h>
@@ -101,6 +100,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
void shm_exit_ns(struct ipc_namespace *ns)
{
free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
+ idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
}
#endif
@@ -290,28 +290,31 @@ static unsigned long shm_get_unmapped_area(struct file *file,
unsigned long flags)
{
struct shm_file_data *sfd = shm_file_data(file);
- return get_unmapped_area(sfd->file, addr, len, pgoff, flags);
-}
-
-int is_file_shm_hugepages(struct file *file)
-{
- int ret = 0;
-
- if (file->f_op == &shm_file_operations) {
- struct shm_file_data *sfd;
- sfd = shm_file_data(file);
- ret = is_file_hugepages(sfd->file);
- }
- return ret;
+ return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
+ pgoff, flags);
}
static const struct file_operations shm_file_operations = {
.mmap = shm_mmap,
.fsync = shm_fsync,
.release = shm_release,
+#ifndef CONFIG_MMU
+ .get_unmapped_area = shm_get_unmapped_area,
+#endif
+};
+
+static const struct file_operations shm_file_operations_huge = {
+ .mmap = shm_mmap,
+ .fsync = shm_fsync,
+ .release = shm_release,
.get_unmapped_area = shm_get_unmapped_area,
};
+int is_file_shm_hugepages(struct file *file)
+{
+ return file->f_op == &shm_file_operations_huge;
+}
+
static const struct vm_operations_struct shm_vm_ops = {
.open = shm_open, /* callback for a new vm-area open */
.close = shm_close, /* callback for when the vm-area is released */
@@ -761,8 +764,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
if (euid != shp->shm_perm.uid &&
euid != shp->shm_perm.cuid)
goto out_unlock;
- if (cmd == SHM_LOCK &&
- !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
+ if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
goto out_unlock;
}
@@ -878,8 +880,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
if (err)
goto out_unlock;
- path.dentry = dget(shp->shm_file->f_path.dentry);
- path.mnt = shp->shm_file->f_path.mnt;
+ path = shp->shm_file->f_path;
+ path_get(&path);
shp->shm_nattch++;
size = i_size_read(path.dentry->d_inode);
shm_unlock(shp);
@@ -889,10 +891,12 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
if (!sfd)
goto out_put_dentry;
- file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations);
+ file = alloc_file(&path, f_mode,
+ is_file_hugepages(shp->shm_file) ?
+ &shm_file_operations_huge :
+ &shm_file_operations);
if (!file)
goto out_free;
- ima_counts_get(file);
file->private_data = sfd;
file->f_mapping = shp->shm_file->f_mapping;
@@ -947,7 +951,7 @@ out_unlock:
out_free:
kfree(sfd);
out_put_dentry:
- dput(path.dentry);
+ path_put(&path);
goto out_nattch;
}
diff --git a/ipc/syscall.c b/ipc/syscall.c
new file mode 100644
index 000000000000..355a3da9ec73
--- /dev/null
+++ b/ipc/syscall.c
@@ -0,0 +1,99 @@
+/*
+ * sys_ipc() is the old de-multiplexer for the SysV IPC calls.
+ *
+ * This is really horribly ugly, and new architectures should just wire up
+ * the individual syscalls instead.
+ */
+#include <linux/unistd.h>
+
+#ifdef __ARCH_WANT_SYS_IPC
+#include <linux/errno.h>
+#include <linux/ipc.h>
+#include <linux/shm.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, int, second,
+ unsigned long, third, void __user *, ptr, long, fifth)
+{
+ int version, ret;
+
+ version = call >> 16; /* hack for backward compatibility */
+ call &= 0xffff;
+
+ switch (call) {
+ case SEMOP:
+ return sys_semtimedop(first, (struct sembuf __user *)ptr,
+ second, NULL);
+ case SEMTIMEDOP:
+ return sys_semtimedop(first, (struct sembuf __user *)ptr,
+ second,
+ (const struct timespec __user *)fifth);
+
+ case SEMGET:
+ return sys_semget(first, second, third);
+ case SEMCTL: {
+ union semun fourth;
+ if (!ptr)
+ return -EINVAL;
+ if (get_user(fourth.__pad, (void __user * __user *) ptr))
+ return -EFAULT;
+ return sys_semctl(first, second, third, fourth);
+ }
+
+ case MSGSND:
+ return sys_msgsnd(first, (struct msgbuf __user *) ptr,
+ second, third);
+ case MSGRCV:
+ switch (version) {
+ case 0: {
+ struct ipc_kludge tmp;
+ if (!ptr)
+ return -EINVAL;
+
+ if (copy_from_user(&tmp,
+ (struct ipc_kludge __user *) ptr,
+ sizeof(tmp)))
+ return -EFAULT;
+ return sys_msgrcv(first, tmp.msgp, second,
+ tmp.msgtyp, third);
+ }
+ default:
+ return sys_msgrcv(first,
+ (struct msgbuf __user *) ptr,
+ second, fifth, third);
+ }
+ case MSGGET:
+ return sys_msgget((key_t) first, second);
+ case MSGCTL:
+ return sys_msgctl(first, second, (struct msqid_ds __user *)ptr);
+
+ case SHMAT:
+ switch (version) {
+ default: {
+ unsigned long raddr;
+ ret = do_shmat(first, (char __user *)ptr,
+ second, &raddr);
+ if (ret)
+ return ret;
+ return put_user(raddr, (unsigned long __user *) third);
+ }
+ case 1:
+ /*
+ * This was the entry point for kernel-originating calls
+ * from iBCS2 in 2.2 days.
+ */
+ return -EINVAL;
+ }
+ case SHMDT:
+ return sys_shmdt((char __user *)ptr);
+ case SHMGET:
+ return sys_shmget(first, second, third);
+ case SHMCTL:
+ return sys_shmctl(first, second,
+ (struct shmid_ds __user *) ptr);
+ default:
+ return -ENOSYS;
+ }
+}
+#endif