diff options
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/Makefile | 2 | ||||
-rw-r--r-- | ipc/ipc_sysctl.c | 77 | ||||
-rw-r--r-- | ipc/mq_sysctl.c | 7 | ||||
-rw-r--r-- | ipc/mqueue.c | 124 | ||||
-rw-r--r-- | ipc/msg.c | 3 | ||||
-rw-r--r-- | ipc/sem.c | 214 | ||||
-rw-r--r-- | ipc/shm.c | 46 | ||||
-rw-r--r-- | ipc/syscall.c | 99 |
8 files changed, 348 insertions, 224 deletions
diff --git a/ipc/Makefile b/ipc/Makefile index 4e1955ea815d..9075e172e52c 100644 --- a/ipc/Makefile +++ b/ipc/Makefile @@ -3,7 +3,7 @@ # obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o -obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o +obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o syscall.o obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o obj_mq-$(CONFIG_COMPAT) += compat_mq.o obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y) diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 7d3704750efc..56410faa4550 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -129,136 +129,60 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #define proc_ipcauto_dointvec_minmax NULL #endif -#ifdef CONFIG_SYSCTL_SYSCALL -/* The generic sysctl ipc data routine. */ -static int sysctl_ipc_data(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - size_t len; - void *data; - - /* Get out of I don't have a variable */ - if (!table->data || !table->maxlen) - return -ENOTDIR; - - data = get_ipc(table); - if (!data) - return -ENOTDIR; - - if (oldval && oldlenp) { - if (get_user(len, oldlenp)) - return -EFAULT; - if (len) { - if (len > table->maxlen) - len = table->maxlen; - if (copy_to_user(oldval, data, len)) - return -EFAULT; - if (put_user(len, oldlenp)) - return -EFAULT; - } - } - - if (newval && newlen) { - if (newlen > table->maxlen) - newlen = table->maxlen; - - if (copy_from_user(data, newval, newlen)) - return -EFAULT; - } - return 1; -} - -static int sysctl_ipc_registered_data(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - int rc; - - rc = sysctl_ipc_data(table, oldval, oldlenp, newval, newlen); - - if (newval && newlen && rc > 0) - /* - * Tunable has successfully been changed from userland - */ - unregister_ipcns_notifier(current->nsproxy->ipc_ns); - - return rc; -} -#else -#define sysctl_ipc_data NULL -#define sysctl_ipc_registered_data NULL -#endif - static int zero; static int one = 1; static struct ctl_table ipc_kern_table[] = { { - .ctl_name = KERN_SHMMAX, .procname = "shmmax", .data = &init_ipc_ns.shm_ctlmax, .maxlen = sizeof (init_ipc_ns.shm_ctlmax), .mode = 0644, .proc_handler = proc_ipc_doulongvec_minmax, - .strategy = sysctl_ipc_data, }, { - .ctl_name = KERN_SHMALL, .procname = "shmall", .data = &init_ipc_ns.shm_ctlall, .maxlen = sizeof (init_ipc_ns.shm_ctlall), .mode = 0644, .proc_handler = proc_ipc_doulongvec_minmax, - .strategy = sysctl_ipc_data, }, { - .ctl_name = KERN_SHMMNI, .procname = "shmmni", .data = &init_ipc_ns.shm_ctlmni, .maxlen = sizeof (init_ipc_ns.shm_ctlmni), .mode = 0644, .proc_handler = proc_ipc_dointvec, - .strategy = sysctl_ipc_data, }, { - .ctl_name = KERN_MSGMAX, .procname = "msgmax", .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof (init_ipc_ns.msg_ctlmax), .mode = 0644, .proc_handler = proc_ipc_dointvec, - .strategy = sysctl_ipc_data, }, { - .ctl_name = KERN_MSGMNI, .procname = "msgmni", .data = &init_ipc_ns.msg_ctlmni, .maxlen = sizeof (init_ipc_ns.msg_ctlmni), .mode = 0644, .proc_handler = proc_ipc_callback_dointvec, - .strategy = sysctl_ipc_registered_data, }, { - .ctl_name = KERN_MSGMNB, .procname = "msgmnb", .data = &init_ipc_ns.msg_ctlmnb, .maxlen = sizeof (init_ipc_ns.msg_ctlmnb), .mode = 0644, .proc_handler = proc_ipc_dointvec, - .strategy = sysctl_ipc_data, }, { - .ctl_name = KERN_SEM, .procname = "sem", .data = &init_ipc_ns.sem_ctls, .maxlen = 4*sizeof (int), .mode = 0644, .proc_handler = proc_ipc_dointvec, - .strategy = sysctl_ipc_data, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "auto_msgmni", .data = &init_ipc_ns.auto_msgmni, .maxlen = sizeof(int), @@ -272,7 +196,6 @@ static struct ctl_table ipc_kern_table[] = { static struct ctl_table ipc_root_table[] = { { - .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, .child = ipc_kern_table, diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index 8a058711fc10..0c09366b96f3 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -88,7 +88,7 @@ static ctl_table mq_sysctls[] = { .extra1 = &msg_maxsize_limit_min, .extra2 = &msg_maxsize_limit_max, }, - { .ctl_name = 0 } + {} }; static ctl_table mq_sysctl_dir[] = { @@ -97,17 +97,16 @@ static ctl_table mq_sysctl_dir[] = { .mode = 0555, .child = mq_sysctls, }, - { .ctl_name = 0 } + {} }; static ctl_table mq_sysctl_root[] = { { - .ctl_name = CTL_FS, .procname = "fs", .mode = 0555, .child = mq_sysctl_dir, }, - { .ctl_name = 0 } + {} }; struct ctl_table_header *mq_register_sysctl_table(void) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index ee9d69707c0a..e4e3f04803ca 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -32,7 +32,6 @@ #include <linux/nsproxy.h> #include <linux/pid.h> #include <linux/ipc_namespace.h> -#include <linux/ima.h> #include <net/sock.h> #include "util.h" @@ -135,7 +134,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb, init_waitqueue_head(&info->wait_q); INIT_LIST_HEAD(&info->e_wait_q[0].list); INIT_LIST_HEAD(&info->e_wait_q[1].list); - info->messages = NULL; info->notify_owner = NULL; info->qsize = 0; info->user = NULL; /* set when all is ok */ @@ -147,26 +145,24 @@ static struct inode *mqueue_get_inode(struct super_block *sb, info->attr.mq_msgsize = attr->mq_msgsize; } mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *); + info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL); + if (!info->messages) + goto out_inode; + mq_bytes = (mq_msg_tblsz + (info->attr.mq_maxmsg * info->attr.mq_msgsize)); spin_lock(&mq_lock); if (u->mq_bytes + mq_bytes < u->mq_bytes || u->mq_bytes + mq_bytes > - p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) { + task_rlimit(p, RLIMIT_MSGQUEUE)) { spin_unlock(&mq_lock); + kfree(info->messages); goto out_inode; } u->mq_bytes += mq_bytes; spin_unlock(&mq_lock); - info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL); - if (!info->messages) { - spin_lock(&mq_lock); - u->mq_bytes -= mq_bytes; - spin_unlock(&mq_lock); - goto out_inode; - } /* all is ok */ info->user = get_uid(u); } else if (S_ISDIR(mode)) { @@ -188,7 +184,7 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; struct ipc_namespace *ns = data; - int error = 0; + int error; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; @@ -206,7 +202,9 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) { iput(inode); error = -ENOMEM; + goto out; } + error = 0; out: return error; @@ -265,8 +263,9 @@ static void mqueue_delete_inode(struct inode *inode) clear_inode(inode); - mq_bytes = (info->attr.mq_maxmsg * sizeof(struct msg_msg *) + - (info->attr.mq_maxmsg * info->attr.mq_msgsize)); + /* Total amount of bytes accounted for the mqueue */ + mq_bytes = info->attr.mq_maxmsg * (sizeof(struct msg_msg *) + + info->attr.mq_msgsize); user = info->user; if (user) { spin_lock(&mq_lock); @@ -605,8 +604,8 @@ static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr) /* check for overflow */ if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg) return 0; - if ((unsigned long)(attr->mq_maxmsg * attr->mq_msgsize) + - (attr->mq_maxmsg * sizeof (struct msg_msg *)) < + if ((unsigned long)(attr->mq_maxmsg * (attr->mq_msgsize + + sizeof (struct msg_msg *))) < (unsigned long)(attr->mq_maxmsg * attr->mq_msgsize)) return 0; return 1; @@ -624,9 +623,10 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir, int ret; if (attr) { - ret = -EINVAL; - if (!mq_attr_ok(ipc_ns, attr)) + if (!mq_attr_ok(ipc_ns, attr)) { + ret = -EINVAL; goto out; + } /* store for use during create */ dentry->d_fsdata = attr; } @@ -660,24 +660,28 @@ out: static struct file *do_open(struct ipc_namespace *ipc_ns, struct dentry *dentry, int oflag) { + int ret; const struct cred *cred = current_cred(); static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, MAY_READ | MAY_WRITE }; if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) { - dput(dentry); - mntput(ipc_ns->mq_mnt); - return ERR_PTR(-EINVAL); + ret = -EINVAL; + goto err; } if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) { - dput(dentry); - mntput(ipc_ns->mq_mnt); - return ERR_PTR(-EACCES); + ret = -EACCES; + goto err; } return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred); + +err: + dput(dentry); + mntput(ipc_ns->mq_mnt); + return ERR_PTR(ret); } SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode, @@ -706,16 +710,17 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode, dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name)); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); - goto out_err; + goto out_putfd; } mntget(ipc_ns->mq_mnt); if (oflag & O_CREAT) { if (dentry->d_inode) { /* entry already exists */ audit_inode(name, dentry); - error = -EEXIST; - if (oflag & O_EXCL) + if (oflag & O_EXCL) { + error = -EEXIST; goto out; + } filp = do_open(ipc_ns, dentry, oflag); } else { filp = do_create(ipc_ns, ipc_ns->mq_mnt->mnt_root, @@ -723,9 +728,10 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode, u_attr ? &attr : NULL); } } else { - error = -ENOENT; - if (!dentry->d_inode) + if (!dentry->d_inode) { + error = -ENOENT; goto out; + } audit_inode(name, dentry); filp = do_open(ipc_ns, dentry, oflag); } @@ -734,7 +740,6 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode, error = PTR_ERR(filp); goto out_putfd; } - ima_counts_get(filp); fd_install(fd, filp); goto out_upsem; @@ -744,7 +749,6 @@ out: mntput(ipc_ns->mq_mnt); out_putfd: put_unused_fd(fd); -out_err: fd = error; out_upsem: mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex); @@ -874,19 +878,24 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, audit_mq_sendrecv(mqdes, msg_len, msg_prio, p); timeout = prepare_timeout(p); - ret = -EBADF; filp = fget(mqdes); - if (unlikely(!filp)) + if (unlikely(!filp)) { + ret = -EBADF; goto out; + } inode = filp->f_path.dentry->d_inode; - if (unlikely(filp->f_op != &mqueue_file_operations)) + if (unlikely(filp->f_op != &mqueue_file_operations)) { + ret = -EBADF; goto out_fput; + } info = MQUEUE_I(inode); audit_inode(NULL, filp->f_path.dentry); - if (unlikely(!(filp->f_mode & FMODE_WRITE))) + if (unlikely(!(filp->f_mode & FMODE_WRITE))) { + ret = -EBADF; goto out_fput; + } if (unlikely(msg_len > info->attr.mq_msgsize)) { ret = -EMSGSIZE; @@ -963,19 +972,24 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, audit_mq_sendrecv(mqdes, msg_len, 0, p); timeout = prepare_timeout(p); - ret = -EBADF; filp = fget(mqdes); - if (unlikely(!filp)) + if (unlikely(!filp)) { + ret = -EBADF; goto out; + } inode = filp->f_path.dentry->d_inode; - if (unlikely(filp->f_op != &mqueue_file_operations)) + if (unlikely(filp->f_op != &mqueue_file_operations)) { + ret = -EBADF; goto out_fput; + } info = MQUEUE_I(inode); audit_inode(NULL, filp->f_path.dentry); - if (unlikely(!(filp->f_mode & FMODE_READ))) + if (unlikely(!(filp->f_mode & FMODE_READ))) { + ret = -EBADF; goto out_fput; + } /* checks if buffer is big enough */ if (unlikely(msg_len < info->attr.mq_msgsize)) { @@ -1065,13 +1079,14 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, /* create the notify skb */ nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL); - ret = -ENOMEM; - if (!nc) + if (!nc) { + ret = -ENOMEM; goto out; - ret = -EFAULT; + } if (copy_from_user(nc->data, notification.sigev_value.sival_ptr, NOTIFY_COOKIE_LEN)) { + ret = -EFAULT; goto out; } @@ -1080,9 +1095,10 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, /* and attach it to the socket */ retry: filp = fget(notification.sigev_signo); - ret = -EBADF; - if (!filp) + if (!filp) { + ret = -EBADF; goto out; + } sock = netlink_getsockbyfilp(filp); fput(filp); if (IS_ERR(sock)) { @@ -1094,7 +1110,7 @@ retry: timeo = MAX_SCHEDULE_TIMEOUT; ret = netlink_attachskb(sock, nc, &timeo, NULL); if (ret == 1) - goto retry; + goto retry; if (ret) { sock = NULL; nc = NULL; @@ -1103,14 +1119,17 @@ retry: } } - ret = -EBADF; filp = fget(mqdes); - if (!filp) + if (!filp) { + ret = -EBADF; goto out; + } inode = filp->f_path.dentry->d_inode; - if (unlikely(filp->f_op != &mqueue_file_operations)) + if (unlikely(filp->f_op != &mqueue_file_operations)) { + ret = -EBADF; goto out_fput; + } info = MQUEUE_I(inode); ret = 0; @@ -1173,14 +1192,17 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, return -EINVAL; } - ret = -EBADF; filp = fget(mqdes); - if (!filp) + if (!filp) { + ret = -EBADF; goto out; + } inode = filp->f_path.dentry->d_inode; - if (unlikely(filp->f_op != &mqueue_file_operations)) + if (unlikely(filp->f_op != &mqueue_file_operations)) { + ret = -EBADF; goto out_fput; + } info = MQUEUE_I(inode); spin_lock(&info->lock); @@ -1274,7 +1296,7 @@ static int __init init_mqueue_fs(void) if (mqueue_inode_cachep == NULL) return -ENOMEM; - /* ignore failues - they are not fatal */ + /* ignore failures - they are not fatal */ mq_sysctl_table = mq_register_sysctl_table(); error = register_filesystem(&mqueue_fs_type); diff --git a/ipc/msg.c b/ipc/msg.c index 2ceab7f12fcb..af42ef8900a6 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -125,6 +125,7 @@ void msg_init_ns(struct ipc_namespace *ns) void msg_exit_ns(struct ipc_namespace *ns) { free_ipcs(ns, &msg_ids(ns), freeque); + idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); } #endif @@ -412,7 +413,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, struct msqid_ds __user *buf, int version) { struct kern_ipc_perm *ipcp; - struct msqid64_ds msqid64; + struct msqid64_ds uninitialized_var(msqid64); struct msg_queue *msq; int err; diff --git a/ipc/sem.c b/ipc/sem.c index 87c2b641fd7b..dbef95b15941 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -129,6 +129,7 @@ void sem_init_ns(struct ipc_namespace *ns) void sem_exit_ns(struct ipc_namespace *ns) { free_ipcs(ns, &sem_ids(ns), freeary); + idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); } #endif @@ -240,6 +241,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) key_t key = params->key; int nsems = params->u.nsems; int semflg = params->flg; + int i; if (!nsems) return -EINVAL; @@ -272,6 +274,11 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) ns->used_sems += nsems; sma->sem_base = (struct sem *) &sma[1]; + + for (i = 0; i < nsems; i++) + INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); + + sma->complex_count = 0; INIT_LIST_HEAD(&sma->sem_pending); INIT_LIST_HEAD(&sma->list_id); sma->sem_nsems = nsems; @@ -397,63 +404,109 @@ undo: return result; } -/* Go through the pending queue for the indicated semaphore - * looking for tasks that can be completed. +/* + * Wake up a process waiting on the sem queue with a given error. + * The queue is invalid (may not be accessed) after the function returns. */ -static void update_queue (struct sem_array * sma) +static void wake_up_sem_queue(struct sem_queue *q, int error) { - int error; - struct sem_queue * q; + /* + * Hold preempt off so that we don't get preempted and have the + * wakee busy-wait until we're scheduled back on. We're holding + * locks here so it may not strictly be needed, however if the + * locks become preemptible then this prevents such a problem. + */ + preempt_disable(); + q->status = IN_WAKEUP; + wake_up_process(q->sleeper); + /* hands-off: q can disappear immediately after writing q->status. */ + smp_wmb(); + q->status = error; + preempt_enable(); +} + +static void unlink_queue(struct sem_array *sma, struct sem_queue *q) +{ + list_del(&q->list); + if (q->nsops == 1) + list_del(&q->simple_list); + else + sma->complex_count--; +} + + +/** + * update_queue(sma, semnum): Look for tasks that can be completed. + * @sma: semaphore array. + * @semnum: semaphore that was modified. + * + * update_queue must be called after a semaphore in a semaphore array + * was modified. If multiple semaphore were modified, then @semnum + * must be set to -1. + */ +static void update_queue(struct sem_array *sma, int semnum) +{ + struct sem_queue *q; + struct list_head *walk; + struct list_head *pending_list; + int offset; + + /* if there are complex operations around, then knowing the semaphore + * that was modified doesn't help us. Assume that multiple semaphores + * were modified. + */ + if (sma->complex_count) + semnum = -1; + + if (semnum == -1) { + pending_list = &sma->sem_pending; + offset = offsetof(struct sem_queue, list); + } else { + pending_list = &sma->sem_base[semnum].sem_pending; + offset = offsetof(struct sem_queue, simple_list); + } + +again: + walk = pending_list->next; + while (walk != pending_list) { + int error, alter; + + q = (struct sem_queue *)((char *)walk - offset); + walk = walk->next; + + /* If we are scanning the single sop, per-semaphore list of + * one semaphore and that semaphore is 0, then it is not + * necessary to scan the "alter" entries: simple increments + * that affect only one entry succeed immediately and cannot + * be in the per semaphore pending queue, and decrements + * cannot be successful if the value is already 0. + */ + if (semnum != -1 && sma->sem_base[semnum].semval == 0 && + q->alter) + break; - q = list_entry(sma->sem_pending.next, struct sem_queue, list); - while (&q->list != &sma->sem_pending) { error = try_atomic_semop(sma, q->sops, q->nsops, q->undo, q->pid); /* Does q->sleeper still need to sleep? */ - if (error <= 0) { - struct sem_queue *n; - - /* - * Continue scanning. The next operation - * that must be checked depends on the type of the - * completed operation: - * - if the operation modified the array, then - * restart from the head of the queue and - * check for threads that might be waiting - * for semaphore values to become 0. - * - if the operation didn't modify the array, - * then just continue. - * The order of list_del() and reading ->next - * is crucial: In the former case, the list_del() - * must be done first [because we might be the - * first entry in ->sem_pending], in the latter - * case the list_del() must be done last - * [because the list is invalid after the list_del()] - */ - if (q->alter) { - list_del(&q->list); - n = list_entry(sma->sem_pending.next, - struct sem_queue, list); - } else { - n = list_entry(q->list.next, struct sem_queue, - list); - list_del(&q->list); - } - - /* wake up the waiting thread */ - q->status = IN_WAKEUP; + if (error > 0) + continue; - wake_up_process(q->sleeper); - /* hands-off: q will disappear immediately after - * writing q->status. - */ - smp_wmb(); - q->status = error; - q = n; - } else { - q = list_entry(q->list.next, struct sem_queue, list); - } + unlink_queue(sma, q); + + /* + * The next operation that must be checked depends on the type + * of the completed operation: + * - if the operation modified the array, then restart from the + * head of the queue and check for threads that might be + * waiting for the new semaphore values. + * - if the operation didn't modify the array, then just + * continue. + */ + alter = q->alter; + wake_up_sem_queue(q, error); + if (alter && !error) + goto again; } } @@ -533,12 +586,8 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) /* Wake up all pending processes and let them fail with EIDRM. */ list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { - list_del(&q->list); - - q->status = IN_WAKEUP; - wake_up_process(q->sleeper); /* doesn't sleep */ - smp_wmb(); - q->status = -EIDRM; /* hands-off q */ + unlink_queue(sma, q); + wake_up_sem_queue(q, -EIDRM); } /* Remove the semaphore set from the IDR */ @@ -575,7 +624,7 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, static int semctl_nolock(struct ipc_namespace *ns, int semid, int cmd, int version, union semun arg) { - int err = -EINVAL; + int err; struct sem_array *sma; switch(cmd) { @@ -652,7 +701,6 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, default: return -EINVAL; } - return err; out_unlock: sem_unlock(sma); return err; @@ -759,7 +807,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, } sma->sem_ctime = get_seconds(); /* maybe some queued-up processes were waiting for this */ - update_queue(sma); + update_queue(sma, -1); err = 0; goto out_unlock; } @@ -801,7 +849,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, curr->sempid = task_tgid_vnr(current); sma->sem_ctime = get_seconds(); /* maybe some queued-up processes were waiting for this */ - update_queue(sma); + update_queue(sma, semnum); err = 0; goto out_unlock; } @@ -961,17 +1009,31 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp) return 0; } -static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) +static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) { - struct sem_undo *walk; + struct sem_undo *un; - list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) { - if (walk->semid == semid) - return walk; + list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { + if (un->semid == semid) + return un; } return NULL; } +static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) +{ + struct sem_undo *un; + + assert_spin_locked(&ulp->lock); + + un = __lookup_undo(ulp, semid); + if (un) { + list_del_rcu(&un->list_proc); + list_add_rcu(&un->list_proc, &ulp->list_proc); + } + return un; +} + /** * find_alloc_undo - Lookup (and if not present create) undo array * @ns: namespace @@ -1163,7 +1225,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); if (error <= 0) { if (alter && error == 0) - update_queue (sma); + update_queue(sma, (nsops == 1) ? sops[0].sem_num : -1); + goto out_unlock_free; } @@ -1181,6 +1244,19 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, else list_add(&queue.list, &sma->sem_pending); + if (nsops == 1) { + struct sem *curr; + curr = &sma->sem_base[sops->sem_num]; + + if (alter) + list_add_tail(&queue.simple_list, &curr->sem_pending); + else + list_add(&queue.simple_list, &curr->sem_pending); + } else { + INIT_LIST_HEAD(&queue.simple_list); + sma->complex_count++; + } + queue.status = -EINTR; queue.sleeper = current; current->state = TASK_INTERRUPTIBLE; @@ -1222,7 +1298,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, */ if (timeout && jiffies_left == 0) error = -EAGAIN; - list_del(&queue.list); + unlink_queue(sma, &queue); out_unlock_free: sem_unlock(sma); @@ -1307,7 +1383,7 @@ void exit_sem(struct task_struct *tsk) if (IS_ERR(sma)) continue; - un = lookup_undo(ulp, semid); + un = __lookup_undo(ulp, semid); if (un == NULL) { /* exit_sem raced with IPC_RMID+semget() that created * exactly the same semid. Nothing to do. @@ -1351,7 +1427,7 @@ void exit_sem(struct task_struct *tsk) } sma->sem_otime = get_seconds(); /* maybe some queued-up processes were waiting for this */ - update_queue(sma); + update_queue(sma, -1); sem_unlock(sma); call_rcu(&un->rcu, free_un); @@ -1365,7 +1441,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) struct sem_array *sma = it; return seq_printf(s, - "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", + "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", sma->sem_perm.key, sma->sem_perm.id, sma->sem_perm.mode, diff --git a/ipc/shm.c b/ipc/shm.c index 464694e0aa4a..1a314c89f93c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -39,7 +39,6 @@ #include <linux/nsproxy.h> #include <linux/mount.h> #include <linux/ipc_namespace.h> -#include <linux/ima.h> #include <asm/uaccess.h> @@ -101,6 +100,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) void shm_exit_ns(struct ipc_namespace *ns) { free_ipcs(ns, &shm_ids(ns), do_shm_rmid); + idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); } #endif @@ -290,28 +290,31 @@ static unsigned long shm_get_unmapped_area(struct file *file, unsigned long flags) { struct shm_file_data *sfd = shm_file_data(file); - return get_unmapped_area(sfd->file, addr, len, pgoff, flags); -} - -int is_file_shm_hugepages(struct file *file) -{ - int ret = 0; - - if (file->f_op == &shm_file_operations) { - struct shm_file_data *sfd; - sfd = shm_file_data(file); - ret = is_file_hugepages(sfd->file); - } - return ret; + return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, + pgoff, flags); } static const struct file_operations shm_file_operations = { .mmap = shm_mmap, .fsync = shm_fsync, .release = shm_release, +#ifndef CONFIG_MMU + .get_unmapped_area = shm_get_unmapped_area, +#endif +}; + +static const struct file_operations shm_file_operations_huge = { + .mmap = shm_mmap, + .fsync = shm_fsync, + .release = shm_release, .get_unmapped_area = shm_get_unmapped_area, }; +int is_file_shm_hugepages(struct file *file) +{ + return file->f_op == &shm_file_operations_huge; +} + static const struct vm_operations_struct shm_vm_ops = { .open = shm_open, /* callback for a new vm-area open */ .close = shm_close, /* callback for when the vm-area is released */ @@ -761,8 +764,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) if (euid != shp->shm_perm.uid && euid != shp->shm_perm.cuid) goto out_unlock; - if (cmd == SHM_LOCK && - !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) + if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) goto out_unlock; } @@ -878,8 +880,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) if (err) goto out_unlock; - path.dentry = dget(shp->shm_file->f_path.dentry); - path.mnt = shp->shm_file->f_path.mnt; + path = shp->shm_file->f_path; + path_get(&path); shp->shm_nattch++; size = i_size_read(path.dentry->d_inode); shm_unlock(shp); @@ -889,10 +891,12 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) if (!sfd) goto out_put_dentry; - file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations); + file = alloc_file(&path, f_mode, + is_file_hugepages(shp->shm_file) ? + &shm_file_operations_huge : + &shm_file_operations); if (!file) goto out_free; - ima_counts_get(file); file->private_data = sfd; file->f_mapping = shp->shm_file->f_mapping; @@ -947,7 +951,7 @@ out_unlock: out_free: kfree(sfd); out_put_dentry: - dput(path.dentry); + path_put(&path); goto out_nattch; } diff --git a/ipc/syscall.c b/ipc/syscall.c new file mode 100644 index 000000000000..355a3da9ec73 --- /dev/null +++ b/ipc/syscall.c @@ -0,0 +1,99 @@ +/* + * sys_ipc() is the old de-multiplexer for the SysV IPC calls. + * + * This is really horribly ugly, and new architectures should just wire up + * the individual syscalls instead. + */ +#include <linux/unistd.h> + +#ifdef __ARCH_WANT_SYS_IPC +#include <linux/errno.h> +#include <linux/ipc.h> +#include <linux/shm.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> + +SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, int, second, + unsigned long, third, void __user *, ptr, long, fifth) +{ + int version, ret; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + case SEMOP: + return sys_semtimedop(first, (struct sembuf __user *)ptr, + second, NULL); + case SEMTIMEDOP: + return sys_semtimedop(first, (struct sembuf __user *)ptr, + second, + (const struct timespec __user *)fifth); + + case SEMGET: + return sys_semget(first, second, third); + case SEMCTL: { + union semun fourth; + if (!ptr) + return -EINVAL; + if (get_user(fourth.__pad, (void __user * __user *) ptr)) + return -EFAULT; + return sys_semctl(first, second, third, fourth); + } + + case MSGSND: + return sys_msgsnd(first, (struct msgbuf __user *) ptr, + second, third); + case MSGRCV: + switch (version) { + case 0: { + struct ipc_kludge tmp; + if (!ptr) + return -EINVAL; + + if (copy_from_user(&tmp, + (struct ipc_kludge __user *) ptr, + sizeof(tmp))) + return -EFAULT; + return sys_msgrcv(first, tmp.msgp, second, + tmp.msgtyp, third); + } + default: + return sys_msgrcv(first, + (struct msgbuf __user *) ptr, + second, fifth, third); + } + case MSGGET: + return sys_msgget((key_t) first, second); + case MSGCTL: + return sys_msgctl(first, second, (struct msqid_ds __user *)ptr); + + case SHMAT: + switch (version) { + default: { + unsigned long raddr; + ret = do_shmat(first, (char __user *)ptr, + second, &raddr); + if (ret) + return ret; + return put_user(raddr, (unsigned long __user *) third); + } + case 1: + /* + * This was the entry point for kernel-originating calls + * from iBCS2 in 2.2 days. + */ + return -EINVAL; + } + case SHMDT: + return sys_shmdt((char __user *)ptr); + case SHMGET: + return sys_shmget(first, second, third); + case SHMCTL: + return sys_shmctl(first, second, + (struct shmid_ds __user *) ptr); + default: + return -ENOSYS; + } +} +#endif |