From 55a6f170a413cd8dc7a3a52e5a326e1a87579b4f Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 29 Nov 2016 16:53:23 -0500 Subject: audit: move kaudit thread start from auditd registration to kaudit init (#2) Richard made this change some time ago but Eric backed it out because the rest of the supporting code wasn't ready. In order to move the netlink multicast send to kauditd_thread we need to ensure the kauditd_thread is always running, so restore commit 6ff5e459 ("audit: move kaudit thread start from auditd registration to kaudit init"). Signed-off-by: Richard Guy Briggs [PM: brought forward and merged based on Richard's old patch] Signed-off-by: Paul Moore --- kernel/audit.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'kernel/audit.c') diff --git a/kernel/audit.c b/kernel/audit.c index a8a91bd2b2a9..d4c78ba5c4f9 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -832,16 +832,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (err) return err; - /* As soon as there's any sign of userspace auditd, - * start kauditd to talk to it */ - if (!kauditd_task) { - kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); - if (IS_ERR(kauditd_task)) { - err = PTR_ERR(kauditd_task); - kauditd_task = NULL; - return err; - } - } seq = nlh->nlmsg_seq; data = nlmsg_data(nlh); @@ -1190,6 +1180,10 @@ static int __init audit_init(void) audit_default ? "enabled" : "disabled"); register_pernet_subsys(&audit_net_ops); + kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); + if (IS_ERR(kauditd_task)) + return PTR_ERR(kauditd_task); + skb_queue_head_init(&audit_skb_queue); skb_queue_head_init(&audit_skb_hold_queue); audit_initialized = AUDIT_INITIALIZED; -- cgit v1.2.3-55-g7522 From 6c9255645350ce2aecb7c3cd032d0e93d4a7a71a Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:23 -0500 Subject: audit: fixup audit_init() Make sure everything is initialized before we start the kauditd_thread and don't emit the "initialized" record until everything is finished. We also panic with a descriptive message if we can't start the kauditd_thread. Signed-off-by: Paul Moore --- kernel/audit.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'kernel/audit.c') diff --git a/kernel/audit.c b/kernel/audit.c index d4c78ba5c4f9..b61642b1934f 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1180,21 +1180,23 @@ static int __init audit_init(void) audit_default ? "enabled" : "disabled"); register_pernet_subsys(&audit_net_ops); - kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); - if (IS_ERR(kauditd_task)) - return PTR_ERR(kauditd_task); - skb_queue_head_init(&audit_skb_queue); skb_queue_head_init(&audit_skb_hold_queue); audit_initialized = AUDIT_INITIALIZED; audit_enabled = audit_default; audit_ever_enabled |= !!audit_default; - audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); - for (i = 0; i < AUDIT_INODE_BUCKETS; i++) INIT_LIST_HEAD(&audit_inode_hash[i]); + kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); + if (IS_ERR(kauditd_task)) { + int err = PTR_ERR(kauditd_task); + panic("audit: failed to start the kauditd thread (%d)\n", err); + } + + audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); + return 0; } __initcall(audit_init); -- cgit v1.2.3-55-g7522 From 4aa83872d346806d9b54768aa0d1329050542bad Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:24 -0500 Subject: audit: queue netlink multicast sends just like we do for unicast sends Sending audit netlink multicast messages is bad for all the same reasons that sending audit netlink unicast messages is bad, so this patch reworks things so that we don't do the multicast send in audit_log_end(), we do it from the dedicated kauditd_thread thread just as we do for unicast messages. See the GitHub issues below for more information/history: * https://github.com/linux-audit/audit-kernel/issues/23 * https://github.com/linux-audit/audit-kernel/issues/22 Signed-off-by: Paul Moore --- kernel/audit.c | 70 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) (limited to 'kernel/audit.c') diff --git a/kernel/audit.c b/kernel/audit.c index b61642b1934f..801247a6c9e5 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -511,26 +511,46 @@ static void flush_hold_queue(void) static int kauditd_thread(void *dummy) { + struct sk_buff *skb; + struct nlmsghdr *nlh; + set_freezable(); while (!kthread_should_stop()) { - struct sk_buff *skb; - flush_hold_queue(); skb = skb_dequeue(&audit_skb_queue); - if (skb) { - if (!audit_backlog_limit || - (skb_queue_len(&audit_skb_queue) <= audit_backlog_limit)) - wake_up(&audit_backlog_wait); + nlh = nlmsg_hdr(skb); + + /* if nlh->nlmsg_len is zero then we haven't attempted + * to send the message to userspace yet, if nlmsg_len + * is non-zero then we have attempted to send it to + * the multicast listeners as well as auditd; keep + * trying to send to auditd but don't repeat the + * multicast send */ + if (nlh->nlmsg_len == 0) { + nlh->nlmsg_len = skb->len; + kauditd_send_multicast_skb(skb, GFP_KERNEL); + + /* see the note in kauditd_send_multicast_skb + * regarding the nlh->nlmsg_len value and why + * it differs between the multicast and unicast + * clients */ + nlh->nlmsg_len -= NLMSG_HDRLEN; + } + if (audit_pid) kauditd_send_skb(skb); else audit_printk_skb(skb); - continue; + } else { + /* we have flushed the backlog so wake everyone up who + * is blocked and go to sleep until we have something + * in the backlog again */ + wake_up(&audit_backlog_wait); + wait_event_freezable(kauditd_wait, + skb_queue_len(&audit_skb_queue)); } - - wait_event_freezable(kauditd_wait, skb_queue_len(&audit_skb_queue)); } return 0; } @@ -1969,10 +1989,10 @@ out: * audit_log_end - end one audit record * @ab: the audit_buffer * - * netlink_unicast() cannot be called inside an irq context because it blocks - * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed - * on a queue and a tasklet is scheduled to remove them from the queue outside - * the irq context. May be called in any context. + * We can not do a netlink send inside an irq context because it blocks (last + * arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed on a + * queue and a tasklet is scheduled to remove them from the queue outside the + * irq context. May be called in any context. */ void audit_log_end(struct audit_buffer *ab) { @@ -1981,28 +2001,8 @@ void audit_log_end(struct audit_buffer *ab) if (!audit_rate_check()) { audit_log_lost("rate limit exceeded"); } else { - struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); - - nlh->nlmsg_len = ab->skb->len; - kauditd_send_multicast_skb(ab->skb, ab->gfp_mask); - - /* - * The original kaudit unicast socket sends up messages with - * nlmsg_len set to the payload length rather than the entire - * message length. This breaks the standard set by netlink. - * The existing auditd daemon assumes this breakage. Fixing - * this would require co-ordinating a change in the established - * protocol between the kaudit kernel subsystem and the auditd - * userspace code. - */ - nlh->nlmsg_len -= NLMSG_HDRLEN; - - if (audit_pid) { - skb_queue_tail(&audit_skb_queue, ab->skb); - wake_up_interruptible(&kauditd_wait); - } else { - audit_printk_skb(ab->skb); - } + skb_queue_tail(&audit_skb_queue, ab->skb); + wake_up_interruptible(&kauditd_wait); ab->skb = NULL; } audit_buffer_free(ab); -- cgit v1.2.3-55-g7522 From af8b824f283de5acc9b9ae8dbb60e4adacff721b Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:24 -0500 Subject: audit: rename the queues and kauditd related functions The audit queue names can be shortened and the record sending helpers associated with the kauditd task could be named better, do these small cleanups now to make life easier once we start reworking the queues and kauditd code. Signed-off-by: Paul Moore --- kernel/audit.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'kernel/audit.c') diff --git a/kernel/audit.c b/kernel/audit.c index 801247a6c9e5..6ac1df116c0b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -138,9 +138,9 @@ static DEFINE_SPINLOCK(audit_freelist_lock); static int audit_freelist_count; static LIST_HEAD(audit_freelist); -static struct sk_buff_head audit_skb_queue; +static struct sk_buff_head audit_queue; /* queue of skbs to send to auditd when/if it comes back */ -static struct sk_buff_head audit_skb_hold_queue; +static struct sk_buff_head audit_hold_queue; static struct task_struct *kauditd_task; static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); @@ -377,8 +377,8 @@ static void audit_hold_skb(struct sk_buff *skb) { if (audit_default && (!audit_backlog_limit || - skb_queue_len(&audit_skb_hold_queue) < audit_backlog_limit)) - skb_queue_tail(&audit_skb_hold_queue, skb); + skb_queue_len(&audit_hold_queue) < audit_backlog_limit)) + skb_queue_tail(&audit_hold_queue, skb); else kfree_skb(skb); } @@ -387,7 +387,7 @@ static void audit_hold_skb(struct sk_buff *skb) * For one reason or another this nlh isn't getting delivered to the userspace * audit daemon, just send it to printk. */ -static void audit_printk_skb(struct sk_buff *skb) +static void kauditd_printk_skb(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); char *data = nlmsg_data(nlh); @@ -402,7 +402,7 @@ static void audit_printk_skb(struct sk_buff *skb) audit_hold_skb(skb); } -static void kauditd_send_skb(struct sk_buff *skb) +static void kauditd_send_unicast_skb(struct sk_buff *skb) { int err; int attempts = 0; @@ -493,13 +493,13 @@ static void flush_hold_queue(void) if (!audit_default || !audit_pid) return; - skb = skb_dequeue(&audit_skb_hold_queue); + skb = skb_dequeue(&audit_hold_queue); if (likely(!skb)) return; while (skb && audit_pid) { - kauditd_send_skb(skb); - skb = skb_dequeue(&audit_skb_hold_queue); + kauditd_send_unicast_skb(skb); + skb = skb_dequeue(&audit_hold_queue); } /* @@ -518,7 +518,7 @@ static int kauditd_thread(void *dummy) while (!kthread_should_stop()) { flush_hold_queue(); - skb = skb_dequeue(&audit_skb_queue); + skb = skb_dequeue(&audit_queue); if (skb) { nlh = nlmsg_hdr(skb); @@ -540,16 +540,16 @@ static int kauditd_thread(void *dummy) } if (audit_pid) - kauditd_send_skb(skb); + kauditd_send_unicast_skb(skb); else - audit_printk_skb(skb); + kauditd_printk_skb(skb); } else { /* we have flushed the backlog so wake everyone up who * is blocked and go to sleep until we have something * in the backlog again */ wake_up(&audit_backlog_wait); wait_event_freezable(kauditd_wait, - skb_queue_len(&audit_skb_queue)); + skb_queue_len(&audit_queue)); } } return 0; @@ -865,7 +865,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.rate_limit = audit_rate_limit; s.backlog_limit = audit_backlog_limit; s.lost = atomic_read(&audit_lost); - s.backlog = skb_queue_len(&audit_skb_queue); + s.backlog = skb_queue_len(&audit_queue); s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; s.backlog_wait_time = audit_backlog_wait_time_master; audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); @@ -1200,8 +1200,8 @@ static int __init audit_init(void) audit_default ? "enabled" : "disabled"); register_pernet_subsys(&audit_net_ops); - skb_queue_head_init(&audit_skb_queue); - skb_queue_head_init(&audit_skb_hold_queue); + skb_queue_head_init(&audit_queue); + skb_queue_head_init(&audit_hold_queue); audit_initialized = AUDIT_INITIALIZED; audit_enabled = audit_default; audit_ever_enabled |= !!audit_default; @@ -1357,7 +1357,7 @@ static long wait_for_auditd(long sleep_time) DECLARE_WAITQUEUE(wait, current); if (audit_backlog_limit && - skb_queue_len(&audit_skb_queue) > audit_backlog_limit) { + skb_queue_len(&audit_queue) > audit_backlog_limit) { add_wait_queue_exclusive(&audit_backlog_wait, &wait); set_current_state(TASK_UNINTERRUPTIBLE); sleep_time = schedule_timeout(sleep_time); @@ -1406,7 +1406,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, } while (audit_backlog_limit - && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) { + && skb_queue_len(&audit_queue) > audit_backlog_limit + reserve) { if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) { long sleep_time; @@ -1419,7 +1419,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, } if (audit_rate_check() && printk_ratelimit()) pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n", - skb_queue_len(&audit_skb_queue), + skb_queue_len(&audit_queue), audit_backlog_limit); audit_log_lost("backlog limit exceeded"); audit_backlog_wait_time = 0; @@ -2001,7 +2001,7 @@ void audit_log_end(struct audit_buffer *ab) if (!audit_rate_check()) { audit_log_lost("rate limit exceeded"); } else { - skb_queue_tail(&audit_skb_queue, ab->skb); + skb_queue_tail(&audit_queue, ab->skb); wake_up_interruptible(&kauditd_wait); ab->skb = NULL; } -- cgit v1.2.3-55-g7522 From c6480207fdf7b61de216ee23e93eac0a6878fa74 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:25 -0500 Subject: audit: rework the audit queue handling The audit record backlog queue has always been a bit of a mess, and the moving the multicast send into kauditd_thread() from audit_log_end() only makes things worse. This patch attempts to fix the backlog queue with a better design that should hold up better under load and have less of a performance impact at syscall invocation time. While it looks like there is a log going on in this patch, the main change is the move from a single backlog queue to three queues: * A queue for holding records generated from audit_log_end() that haven't been consumed by kauditd_thread() (audit_queue). * A queue for holding records that have been sent via multicast but had a temporary failure when sending via unicast and need a resend (audit_retry_queue). * A queue for holding records that haven't been sent via unicast because no one is listening (audit_hold_queue). Special care is taken in this patch to ensure that the proper record ordering is preserved, e.g. we send everything in the hold queue first, then the retry queue, and finally the main queue. Signed-off-by: Paul Moore --- kernel/audit.c | 347 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 226 insertions(+), 121 deletions(-) (limited to 'kernel/audit.c') diff --git a/kernel/audit.c b/kernel/audit.c index 6ac1df116c0b..f4056bc331fc 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -138,11 +138,18 @@ static DEFINE_SPINLOCK(audit_freelist_lock); static int audit_freelist_count; static LIST_HEAD(audit_freelist); +/* queue msgs to send via kauditd_task */ static struct sk_buff_head audit_queue; -/* queue of skbs to send to auditd when/if it comes back */ +/* queue msgs due to temporary unicast send problems */ +static struct sk_buff_head audit_retry_queue; +/* queue msgs waiting for new auditd connection */ static struct sk_buff_head audit_hold_queue; + +/* queue servicing thread */ static struct task_struct *kauditd_task; static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); + +/* waitqueue for callers who are blocked on the audit backlog */ static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION, @@ -364,25 +371,6 @@ static int audit_set_failure(u32 state) return audit_do_config_change("audit_failure", &audit_failure, state); } -/* - * Queue skbs to be sent to auditd when/if it comes back. These skbs should - * already have been sent via prink/syslog and so if these messages are dropped - * it is not a huge concern since we already passed the audit_log_lost() - * notification and stuff. This is just nice to get audit messages during - * boot before auditd is running or messages generated while auditd is stopped. - * This only holds messages is audit_default is set, aka booting with audit=1 - * or building your kernel that way. - */ -static void audit_hold_skb(struct sk_buff *skb) -{ - if (audit_default && - (!audit_backlog_limit || - skb_queue_len(&audit_hold_queue) < audit_backlog_limit)) - skb_queue_tail(&audit_hold_queue, skb); - else - kfree_skb(skb); -} - /* * For one reason or another this nlh isn't getting delivered to the userspace * audit daemon, just send it to printk. @@ -398,58 +386,114 @@ static void kauditd_printk_skb(struct sk_buff *skb) else audit_log_lost("printk limit exceeded"); } +} + +/** + * kauditd_hold_skb - Queue an audit record, waiting for auditd + * @skb: audit record + * + * Description: + * Queue the audit record, waiting for an instance of auditd. When this + * function is called we haven't given up yet on sending the record, but things + * are not looking good. The first thing we want to do is try to write the + * record via printk and then see if we want to try and hold on to the record + * and queue it, if we have room. If we want to hold on to the record, but we + * don't have room, record a record lost message. + */ +static void kauditd_hold_skb(struct sk_buff *skb) +{ + /* at this point it is uncertain if we will ever send this to auditd so + * try to send the message via printk before we go any further */ + kauditd_printk_skb(skb); + + /* can we just silently drop the message? */ + if (!audit_default) { + kfree_skb(skb); + return; + } + + /* if we have room, queue the message */ + if (!audit_backlog_limit || + skb_queue_len(&audit_hold_queue) < audit_backlog_limit) { + skb_queue_tail(&audit_hold_queue, skb); + return; + } - audit_hold_skb(skb); + /* we have no other options - drop the message */ + audit_log_lost("kauditd hold queue overflow"); + kfree_skb(skb); } -static void kauditd_send_unicast_skb(struct sk_buff *skb) +/** + * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd + * @skb: audit record + * + * Description: + * Not as serious as kauditd_hold_skb() as we still have a connected auditd, + * but for some reason we are having problems sending it audit records so + * queue the given record and attempt to resend. + */ +static void kauditd_retry_skb(struct sk_buff *skb) { - int err; - int attempts = 0; -#define AUDITD_RETRIES 5 + /* NOTE: because records should only live in the retry queue for a + * short period of time, before either being sent or moved to the hold + * queue, we don't currently enforce a limit on this queue */ + skb_queue_tail(&audit_retry_queue, skb); +} + +/** + * auditd_reset - Disconnect the auditd connection + * + * Description: + * Break the auditd/kauditd connection and move all the records in the retry + * queue into the hold queue in case auditd reconnects. + */ +static void auditd_reset(void) +{ + struct sk_buff *skb; + + /* break the connection */ + audit_pid = 0; + audit_sock = NULL; + + /* flush all of the retry queue to the hold queue */ + while ((skb = skb_dequeue(&audit_retry_queue))) + kauditd_hold_skb(skb); +} + +/** + * kauditd_send_unicast_skb - Send a record via unicast to auditd + * @skb: audit record + */ +static int kauditd_send_unicast_skb(struct sk_buff *skb) +{ + int rc; -restart: - /* take a reference in case we can't send it and we want to hold it */ + /* get an extra skb reference in case we fail to send */ skb_get(skb); - err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); - if (err < 0) { - pr_err("netlink_unicast sending to audit_pid=%d returned error: %d\n", - audit_pid, err); - if (audit_pid) { - if (err == -ECONNREFUSED || err == -EPERM - || ++attempts >= AUDITD_RETRIES) { - char s[32]; - - snprintf(s, sizeof(s), "audit_pid=%d reset", audit_pid); - audit_log_lost(s); - audit_pid = 0; - audit_sock = NULL; - } else { - pr_warn("re-scheduling(#%d) write to audit_pid=%d\n", - attempts, audit_pid); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - goto restart; - } - } - /* we might get lucky and get this in the next auditd */ - audit_hold_skb(skb); - } else - /* drop the extra reference if sent ok */ + rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); + if (rc >= 0) { consume_skb(skb); + rc = 0; + } + + return rc; } /* - * kauditd_send_multicast_skb - send the skb to multicast userspace listeners + * kauditd_send_multicast_skb - Send a record to any multicast listeners + * @skb: audit record * + * Description: * This function doesn't consume an skb as might be expected since it has to * copy it anyways. */ -static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask) +static void kauditd_send_multicast_skb(struct sk_buff *skb) { - struct sk_buff *copy; - struct audit_net *aunet = net_generic(&init_net, audit_net_id); - struct sock *sock = aunet->nlsk; + struct sk_buff *copy; + struct audit_net *aunet = net_generic(&init_net, audit_net_id); + struct sock *sock = aunet->nlsk; + struct nlmsghdr *nlh; if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG)) return; @@ -464,94 +508,155 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask) * no reason for new multicast clients to continue with this * non-compliance. */ - copy = skb_copy(skb, gfp_mask); + copy = skb_copy(skb, GFP_KERNEL); if (!copy) return; + nlh = nlmsg_hdr(copy); + nlh->nlmsg_len = skb->len; - nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask); + nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL); } -/* - * flush_hold_queue - empty the hold queue if auditd appears - * - * If auditd just started, drain the queue of messages already - * sent to syslog/printk. Remember loss here is ok. We already - * called audit_log_lost() if it didn't go out normally. so the - * race between the skb_dequeue and the next check for audit_pid - * doesn't matter. +/** + * kauditd_wake_condition - Return true when it is time to wake kauditd_thread * - * If you ever find kauditd to be too slow we can get a perf win - * by doing our own locking and keeping better track if there - * are messages in this queue. I don't see the need now, but - * in 5 years when I want to play with this again I'll see this - * note and still have no friggin idea what i'm thinking today. + * Description: + * This function is for use by the wait_event_freezable() call in + * kauditd_thread(). */ -static void flush_hold_queue(void) +static int kauditd_wake_condition(void) { - struct sk_buff *skb; - - if (!audit_default || !audit_pid) - return; - - skb = skb_dequeue(&audit_hold_queue); - if (likely(!skb)) - return; + static int pid_last = 0; + int rc; + int pid = audit_pid; - while (skb && audit_pid) { - kauditd_send_unicast_skb(skb); - skb = skb_dequeue(&audit_hold_queue); - } + /* wake on new messages or a change in the connected auditd */ + rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last); + if (rc) + pid_last = pid; - /* - * if auditd just disappeared but we - * dequeued an skb we need to drop ref - */ - consume_skb(skb); + return rc; } static int kauditd_thread(void *dummy) { + int rc; + int auditd = 0; + int reschedule = 0; struct sk_buff *skb; struct nlmsghdr *nlh; +#define UNICAST_RETRIES 5 +#define AUDITD_BAD(x,y) \ + ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES) + + /* NOTE: we do invalidate the auditd connection flag on any sending + * errors, but we only "restore" the connection flag at specific places + * in the loop in order to help ensure proper ordering of audit + * records */ + set_freezable(); while (!kthread_should_stop()) { - flush_hold_queue(); + /* NOTE: possible area for future improvement is to look at + * the hold and retry queues, since only this thread + * has access to these queues we might be able to do + * our own queuing and skip some/all of the locking */ + + /* NOTE: it might be a fun experiment to split the hold and + * retry queue handling to another thread, but the + * synchronization issues and other overhead might kill + * any performance gains */ + + /* attempt to flush the hold queue */ + while (auditd && (skb = skb_dequeue(&audit_hold_queue))) { + rc = kauditd_send_unicast_skb(skb); + if (rc) { + /* requeue to the same spot */ + skb_queue_head(&audit_hold_queue, skb); + + auditd = 0; + if (AUDITD_BAD(rc, reschedule)) { + auditd_reset(); + reschedule = 0; + } + } else + /* we were able to send successfully */ + reschedule = 0; + } + + /* attempt to flush the retry queue */ + while (auditd && (skb = skb_dequeue(&audit_retry_queue))) { + rc = kauditd_send_unicast_skb(skb); + if (rc) { + auditd = 0; + if (AUDITD_BAD(rc, reschedule)) { + kauditd_hold_skb(skb); + auditd_reset(); + reschedule = 0; + } else + /* temporary problem (we hope), queue + * to the same spot and retry */ + skb_queue_head(&audit_retry_queue, skb); + } else + /* we were able to send successfully */ + reschedule = 0; + } + /* standard queue processing, try to be as quick as possible */ +quick_loop: skb = skb_dequeue(&audit_queue); if (skb) { + /* setup the netlink header, see the comments in + * kauditd_send_multicast_skb() for length quirks */ nlh = nlmsg_hdr(skb); - - /* if nlh->nlmsg_len is zero then we haven't attempted - * to send the message to userspace yet, if nlmsg_len - * is non-zero then we have attempted to send it to - * the multicast listeners as well as auditd; keep - * trying to send to auditd but don't repeat the - * multicast send */ - if (nlh->nlmsg_len == 0) { - nlh->nlmsg_len = skb->len; - kauditd_send_multicast_skb(skb, GFP_KERNEL); - - /* see the note in kauditd_send_multicast_skb - * regarding the nlh->nlmsg_len value and why - * it differs between the multicast and unicast - * clients */ - nlh->nlmsg_len -= NLMSG_HDRLEN; - } - - if (audit_pid) - kauditd_send_unicast_skb(skb); + nlh->nlmsg_len = skb->len - NLMSG_HDRLEN; + + /* attempt to send to any multicast listeners */ + kauditd_send_multicast_skb(skb); + + /* attempt to send to auditd, queue on failure */ + if (auditd) { + rc = kauditd_send_unicast_skb(skb); + if (rc) { + auditd = 0; + if (AUDITD_BAD(rc, reschedule)) { + auditd_reset(); + reschedule = 0; + } + + /* move to the retry queue */ + kauditd_retry_skb(skb); + } else + /* everything is working so go fast! */ + goto quick_loop; + } else if (reschedule) + /* we are currently having problems, move to + * the retry queue */ + kauditd_retry_skb(skb); else - kauditd_printk_skb(skb); + /* dump the message via printk and hold it */ + kauditd_hold_skb(skb); } else { - /* we have flushed the backlog so wake everyone up who - * is blocked and go to sleep until we have something - * in the backlog again */ + /* we have flushed the backlog so wake everyone */ wake_up(&audit_backlog_wait); - wait_event_freezable(kauditd_wait, - skb_queue_len(&audit_queue)); + + /* if everything is okay with auditd (if present), go + * to sleep until there is something new in the queue + * or we have a change in the connected auditd; + * otherwise simply reschedule to give things a chance + * to recover */ + if (reschedule) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } else + wait_event_freezable(kauditd_wait, + kauditd_wake_condition()); + + /* update the auditd connection status */ + auditd = (audit_pid ? 1 : 0); } } + return 0; } @@ -616,6 +721,7 @@ static int audit_send_reply_thread(void *arg) kfree(reply); return 0; } + /** * audit_send_reply - send an audit reply message via netlink * @request_skb: skb of request we are replying to (used to target the reply) @@ -1171,10 +1277,8 @@ static void __net_exit audit_net_exit(struct net *net) { struct audit_net *aunet = net_generic(net, audit_net_id); struct sock *sock = aunet->nlsk; - if (sock == audit_sock) { - audit_pid = 0; - audit_sock = NULL; - } + if (sock == audit_sock) + auditd_reset(); RCU_INIT_POINTER(aunet->nlsk, NULL); synchronize_net(); @@ -1201,6 +1305,7 @@ static int __init audit_init(void) register_pernet_subsys(&audit_net_ops); skb_queue_head_init(&audit_queue); + skb_queue_head_init(&audit_retry_queue); skb_queue_head_init(&audit_hold_queue); audit_initialized = AUDIT_INITIALIZED; audit_enabled = audit_default; -- cgit v1.2.3-55-g7522 From 3197542482df22c2a131d4a813280bd7c54cedf5 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:25 -0500 Subject: audit: rework audit_log_start() The backlog queue handling in audit_log_start() is a little odd with some questionable design decisions, this patch attempts to rectify this with the following changes: * Never make auditd wait, ignore any backlog limits as we need auditd awake so it can drain the backlog queue. * When we hit a backlog limit and start dropping records, don't wake all the tasks sleeping on the backlog, that's silly. Instead, let kauditd_thread() take care of waking everyone once it has had a chance to drain the backlog queue. * Don't keep a global backlog timeout countdown, make it per-task. A per-task timer means we won't have all the sleeping tasks waking at the same time and hammering on an already stressed backlog queue. Signed-off-by: Paul Moore --- kernel/audit.c | 92 +++++++++++++++++++++++----------------------------------- 1 file changed, 36 insertions(+), 56 deletions(-) (limited to 'kernel/audit.c') diff --git a/kernel/audit.c b/kernel/audit.c index f4056bc331fc..e23ce6ce101f 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -107,7 +107,6 @@ static u32 audit_rate_limit; * When set to zero, this means unlimited. */ static u32 audit_backlog_limit = 64; #define AUDIT_BACKLOG_WAIT_TIME (60 * HZ) -static u32 audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME; static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME; /* The identity of the user shutting down the audit system. */ @@ -345,7 +344,7 @@ static int audit_set_backlog_limit(u32 limit) static int audit_set_backlog_wait_time(u32 timeout) { return audit_do_config_change("audit_backlog_wait_time", - &audit_backlog_wait_time_master, timeout); + &audit_backlog_wait_time, timeout); } static int audit_set_enabled(u32 state) @@ -973,7 +972,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.lost = atomic_read(&audit_lost); s.backlog = skb_queue_len(&audit_queue); s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; - s.backlog_wait_time = audit_backlog_wait_time_master; + s.backlog_wait_time = audit_backlog_wait_time; audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); break; } @@ -1454,24 +1453,6 @@ static inline void audit_get_stamp(struct audit_context *ctx, } } -/* - * Wait for auditd to drain the queue a little - */ -static long wait_for_auditd(long sleep_time) -{ - DECLARE_WAITQUEUE(wait, current); - - if (audit_backlog_limit && - skb_queue_len(&audit_queue) > audit_backlog_limit) { - add_wait_queue_exclusive(&audit_backlog_wait, &wait); - set_current_state(TASK_UNINTERRUPTIBLE); - sleep_time = schedule_timeout(sleep_time); - remove_wait_queue(&audit_backlog_wait, &wait); - } - - return sleep_time; -} - /** * audit_log_start - obtain an audit buffer * @ctx: audit_context (may be NULL) @@ -1490,12 +1471,9 @@ static long wait_for_auditd(long sleep_time) struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type) { - struct audit_buffer *ab = NULL; - struct timespec t; - unsigned int uninitialized_var(serial); - int reserve = 5; /* Allow atomic callers to go up to five - entries over the normal backlog limit */ - unsigned long timeout_start = jiffies; + struct audit_buffer *ab; + struct timespec t; + unsigned int uninitialized_var(serial); if (audit_initialized != AUDIT_INITIALIZED) return NULL; @@ -1503,38 +1481,40 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE))) return NULL; - if (gfp_mask & __GFP_DIRECT_RECLAIM) { - if (audit_pid && audit_pid == current->tgid) - gfp_mask &= ~__GFP_DIRECT_RECLAIM; - else - reserve = 0; - } - - while (audit_backlog_limit - && skb_queue_len(&audit_queue) > audit_backlog_limit + reserve) { - if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) { - long sleep_time; - - sleep_time = timeout_start + audit_backlog_wait_time - jiffies; - if (sleep_time > 0) { - sleep_time = wait_for_auditd(sleep_time); - if (sleep_time > 0) - continue; + /* don't ever fail/sleep on auditd since we need auditd to drain the + * queue; also, when we are checking for auditd, compare PIDs using + * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() using + * a PID anchored in the caller's namespace */ + if (!(audit_pid && audit_pid == task_tgid_vnr(current))) { + long sleep_time = audit_backlog_wait_time; + + while (audit_backlog_limit && + (skb_queue_len(&audit_queue) > audit_backlog_limit)) { + /* wake kauditd to try and flush the queue */ + wake_up_interruptible(&kauditd_wait); + + /* sleep if we are allowed and we haven't exhausted our + * backlog wait limit */ + if ((gfp_mask & __GFP_DIRECT_RECLAIM) && + (sleep_time > 0)) { + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue_exclusive(&audit_backlog_wait, + &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + sleep_time = schedule_timeout(sleep_time); + remove_wait_queue(&audit_backlog_wait, &wait); + } else { + if (audit_rate_check() && printk_ratelimit()) + pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n", + skb_queue_len(&audit_queue), + audit_backlog_limit); + audit_log_lost("backlog limit exceeded"); + return NULL; } } - if (audit_rate_check() && printk_ratelimit()) - pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n", - skb_queue_len(&audit_queue), - audit_backlog_limit); - audit_log_lost("backlog limit exceeded"); - audit_backlog_wait_time = 0; - wake_up(&audit_backlog_wait); - return NULL; } - if (!reserve && !audit_backlog_wait_time) - audit_backlog_wait_time = audit_backlog_wait_time_master; - ab = audit_buffer_alloc(ctx, gfp_mask, type); if (!ab) { audit_log_lost("out of memory in audit_log_start"); @@ -1542,9 +1522,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, } audit_get_stamp(ab->ctx, &t, &serial); - audit_log_format(ab, "audit(%lu.%03lu:%u): ", t.tv_sec, t.tv_nsec/1000000, serial); + return ab; } -- cgit v1.2.3-55-g7522 From e1d166212894d9d959a601c4802882b877bb420a Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:25 -0500 Subject: audit: wake up kauditd_thread after auditd registers This patch was suggested by Richard Briggs back in 2015, see the link to the mail archive below. Unfortunately, that patch is no longer even remotely valid due to other changes to the code. * https://www.redhat.com/archives/linux-audit/2015-October/msg00075.html Suggested-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/audit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/audit.c') diff --git a/kernel/audit.c b/kernel/audit.c index e23ce6ce101f..0572e5dcfda7 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1009,6 +1009,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) audit_pid = new_pid; audit_nlk_portid = NETLINK_CB(skb).portid; audit_sock = skb->sk; + wake_up_interruptible(&kauditd_wait); } if (s.mask & AUDIT_STATUS_RATE_LIMIT) { err = audit_set_rate_limit(s.rate_limit); -- cgit v1.2.3-55-g7522 From 6c54e7899693dee3db67ea996e9be0e10f67920f Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:26 -0500 Subject: audit: handle a clean auditd shutdown with grace When auditd stops cleanly it sets 'auditd_pid' to 0 with an AUDIT_SET message, in this case we should reset our backlog queues via the auditd_reset() function. This patch also adds a 'auditd_pid' check to the top of kauditd_send_unicast_skb() so we can fail quicker. Signed-off-by: Paul Moore --- kernel/audit.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/audit.c') diff --git a/kernel/audit.c b/kernel/audit.c index 0572e5dcfda7..b447a6b1fdc8 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -468,6 +468,10 @@ static int kauditd_send_unicast_skb(struct sk_buff *skb) { int rc; + /* if we know nothing is connected, don't even try the netlink call */ + if (!audit_pid) + return -ECONNREFUSED; + /* get an extra skb reference in case we fail to send */ skb_get(skb); rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); @@ -1009,6 +1013,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) audit_pid = new_pid; audit_nlk_portid = NETLINK_CB(skb).portid; audit_sock = skb->sk; + if (!new_pid) + auditd_reset(); wake_up_interruptible(&kauditd_wait); } if (s.mask & AUDIT_STATUS_RATE_LIMIT) { -- cgit v1.2.3-55-g7522 From a09cfa470817ac086cf68418da13a2b91c2744ef Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:26 -0500 Subject: audit: don't ever sleep on a command record/message Sleeping on a command record/message in audit_log_start() could slow something, e.g. auditd, from doing something important, e.g. clean shutdown, which could present problems on a heavily loaded system. This patch allows tasks to bypass any queue restrictions if they are logging a command record/message. Signed-off-by: Paul Moore --- kernel/audit.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'kernel/audit.c') diff --git a/kernel/audit.c b/kernel/audit.c index b447a6b1fdc8..f20eee0db7e6 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1488,11 +1488,19 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE))) return NULL; - /* don't ever fail/sleep on auditd since we need auditd to drain the - * queue; also, when we are checking for auditd, compare PIDs using - * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() using - * a PID anchored in the caller's namespace */ - if (!(audit_pid && audit_pid == task_tgid_vnr(current))) { + /* don't ever fail/sleep on these two conditions: + * 1. auditd generated record - since we need auditd to drain the + * queue; also, when we are checking for auditd, compare PIDs using + * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() + * using a PID anchored in the caller's namespace + * 2. audit command message - record types 1000 through 1099 inclusive + * are command messages/records used to manage the kernel subsystem + * and the audit userspace, blocking on these messages could cause + * problems under load so don't do it (note: not all of these + * command types are valid as record types, but it is quicker to + * just check two ints than a series of ints in a if/switch stmt) */ + if (!((audit_pid && audit_pid == task_tgid_vnr(current)) || + (type >= 1000 && type <= 1099))) { long sleep_time = audit_backlog_wait_time; while (audit_backlog_limit && -- cgit v1.2.3-55-g7522 From 533c7b69c764ad5febb3e716899f43a75564fcab Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 13 Dec 2016 10:03:01 -0500 Subject: audit: use proper refcount locking on audit_sock Resetting audit_sock appears to be racy. audit_sock was being copied and dereferenced without using a refcount on the source sock. Bump the refcount on the underlying sock when we store a refrence in audit_sock and release it when we reset audit_sock. audit_sock modification needs the audit_cmd_mutex. See: https://lkml.org/lkml/2016/11/26/232 Thanks to Eric Dumazet and Cong Wang on ideas how to fix it. Signed-off-by: Richard Guy Briggs Reviewed-by: Cong Wang [PM: fixed the comment block text formatting for auditd_reset()] Signed-off-by: Paul Moore --- kernel/audit.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'kernel/audit.c') diff --git a/kernel/audit.c b/kernel/audit.c index f20eee0db7e6..41017685f9f2 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -445,15 +445,20 @@ static void kauditd_retry_skb(struct sk_buff *skb) * * Description: * Break the auditd/kauditd connection and move all the records in the retry - * queue into the hold queue in case auditd reconnects. + * queue into the hold queue in case auditd reconnects. The audit_cmd_mutex + * must be held when calling this function. */ static void auditd_reset(void) { struct sk_buff *skb; /* break the connection */ + if (audit_sock) { + sock_put(audit_sock); + audit_sock = NULL; + } audit_pid = 0; - audit_sock = NULL; + audit_nlk_portid = 0; /* flush all of the retry queue to the hold queue */ while ((skb = skb_dequeue(&audit_retry_queue))) @@ -579,7 +584,9 @@ static int kauditd_thread(void *dummy) auditd = 0; if (AUDITD_BAD(rc, reschedule)) { + mutex_lock(&audit_cmd_mutex); auditd_reset(); + mutex_unlock(&audit_cmd_mutex); reschedule = 0; } } else @@ -594,7 +601,9 @@ static int kauditd_thread(void *dummy) auditd = 0; if (AUDITD_BAD(rc, reschedule)) { kauditd_hold_skb(skb); + mutex_lock(&audit_cmd_mutex); auditd_reset(); + mutex_unlock(&audit_cmd_mutex); reschedule = 0; } else /* temporary problem (we hope), queue @@ -623,7 +632,9 @@ quick_loop: if (rc) { auditd = 0; if (AUDITD_BAD(rc, reschedule)) { + mutex_lock(&audit_cmd_mutex); auditd_reset(); + mutex_unlock(&audit_cmd_mutex); reschedule = 0; } @@ -1010,11 +1021,16 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } if (audit_enabled != AUDIT_OFF) audit_log_config_change("audit_pid", new_pid, audit_pid, 1); - audit_pid = new_pid; - audit_nlk_portid = NETLINK_CB(skb).portid; - audit_sock = skb->sk; - if (!new_pid) + if (new_pid) { + if (audit_sock) + sock_put(audit_sock); + audit_pid = new_pid; + audit_nlk_portid = NETLINK_CB(skb).portid; + sock_hold(skb->sk); + audit_sock = skb->sk; + } else { auditd_reset(); + } wake_up_interruptible(&kauditd_wait); } if (s.mask & AUDIT_STATUS_RATE_LIMIT) { @@ -1283,8 +1299,10 @@ static void __net_exit audit_net_exit(struct net *net) { struct audit_net *aunet = net_generic(net, audit_net_id); struct sock *sock = aunet->nlsk; + mutex_lock(&audit_cmd_mutex); if (sock == audit_sock) auditd_reset(); + mutex_unlock(&audit_cmd_mutex); RCU_INIT_POINTER(aunet->nlsk, NULL); synchronize_net(); -- cgit v1.2.3-55-g7522