diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 128 | ||||
-rw-r--r-- | kernel/auditsc.c | 327 | ||||
-rw-r--r-- | kernel/exit.c | 6 | ||||
-rw-r--r-- | kernel/fork.c | 3 | ||||
-rw-r--r-- | kernel/module.c | 11 | ||||
-rw-r--r-- | kernel/power/Kconfig | 2 | ||||
-rw-r--r-- | kernel/power/disk.c | 6 | ||||
-rw-r--r-- | kernel/power/power.h | 2 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 12 | ||||
-rw-r--r-- | kernel/printk.c | 7 | ||||
-rw-r--r-- | kernel/sched.c | 8 | ||||
-rw-r--r-- | kernel/signal.c | 31 | ||||
-rw-r--r-- | kernel/sys.c | 55 | ||||
-rw-r--r-- | kernel/timer.c | 9 |
14 files changed, 411 insertions, 196 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 7f0699790d4..83096b67510 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -79,6 +79,8 @@ static int audit_rate_limit; /* Number of outstanding audit_buffers allowed. */ static int audit_backlog_limit = 64; +static int audit_backlog_wait_time = 60 * HZ; +static int audit_backlog_wait_overflow = 0; /* The identity of the user shutting down the audit system. */ uid_t audit_sig_uid = -1; @@ -106,18 +108,12 @@ static LIST_HEAD(audit_freelist); static struct sk_buff_head audit_skb_queue; static struct task_struct *kauditd_task; static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); - -/* There are three lists of rules -- one to search at task creation - * time, one to search at syscall entry time, and another to search at - * syscall exit time. */ -static LIST_HEAD(audit_tsklist); -static LIST_HEAD(audit_entlist); -static LIST_HEAD(audit_extlist); +static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); /* The netlink socket is only to be read by 1 CPU, which lets us assume * that list additions and deletions never happen simultaneously in * auditsc.c */ -static DECLARE_MUTEX(audit_netlink_sem); +DECLARE_MUTEX(audit_netlink_sem); /* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting * audit records. Since printk uses a 1024 byte buffer, this buffer @@ -137,6 +133,7 @@ struct audit_buffer { struct list_head list; struct sk_buff *skb; /* formatted skb ready to send */ struct audit_context *ctx; /* NULL or associated context */ + int gfp_mask; }; static void audit_set_pid(struct audit_buffer *ab, pid_t pid) @@ -145,11 +142,6 @@ static void audit_set_pid(struct audit_buffer *ab, pid_t pid) nlh->nlmsg_pid = pid; } -struct audit_entry { - struct list_head list; - struct audit_rule rule; -}; - static void audit_panic(const char *message) { switch (audit_failure) @@ -233,7 +225,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid) { int old = audit_rate_limit; audit_rate_limit = limit; - audit_log(NULL, AUDIT_CONFIG_CHANGE, + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_rate_limit=%d old=%d by auid=%u", audit_rate_limit, old, loginuid); return old; @@ -243,7 +235,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid) { int old = audit_backlog_limit; audit_backlog_limit = limit; - audit_log(NULL, AUDIT_CONFIG_CHANGE, + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_backlog_limit=%d old=%d by auid=%u", audit_backlog_limit, old, loginuid); return old; @@ -255,7 +247,7 @@ static int audit_set_enabled(int state, uid_t loginuid) if (state != 0 && state != 1) return -EINVAL; audit_enabled = state; - audit_log(NULL, AUDIT_CONFIG_CHANGE, + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_enabled=%d old=%d by auid=%u", audit_enabled, old, loginuid); return old; @@ -269,7 +261,7 @@ static int audit_set_failure(int state, uid_t loginuid) && state != AUDIT_FAIL_PANIC) return -EINVAL; audit_failure = state; - audit_log(NULL, AUDIT_CONFIG_CHANGE, + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_failure=%d old=%d by auid=%u", audit_failure, old, loginuid); return old; @@ -281,6 +273,7 @@ int kauditd_thread(void *dummy) while (1) { skb = skb_dequeue(&audit_skb_queue); + wake_up(&audit_backlog_wait); if (skb) { if (audit_pid) { int err = netlink_unicast(audit_sock, skb, audit_pid, 0); @@ -290,7 +283,7 @@ int kauditd_thread(void *dummy) audit_pid = 0; } } else { - printk(KERN_ERR "%s\n", skb->data + NLMSG_SPACE(0)); + printk(KERN_NOTICE "%s\n", skb->data + NLMSG_SPACE(0)); kfree_skb(skb); } } else { @@ -423,7 +416,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (status_get->mask & AUDIT_STATUS_PID) { int old = audit_pid; audit_pid = status_get->pid; - audit_log(NULL, AUDIT_CONFIG_CHANGE, + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_pid=%d old=%d by auid=%u", audit_pid, old, loginuid); } @@ -435,15 +428,21 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG...AUDIT_LAST_USER_MSG: - ab = audit_log_start(NULL, msg_type); - if (!ab) - break; /* audit_panic has been called */ - audit_log_format(ab, - "user pid=%d uid=%u auid=%u" - " msg='%.1024s'", - pid, uid, loginuid, (char *)data); - audit_set_pid(ab, pid); - audit_log_end(ab); + if (!audit_enabled && msg_type != AUDIT_USER_AVC) + return 0; + + err = audit_filter_user(&NETLINK_CB(skb), msg_type); + if (err == 1) { + err = 0; + ab = audit_log_start(NULL, GFP_KERNEL, msg_type); + if (ab) { + audit_log_format(ab, + "user pid=%d uid=%u auid=%u msg='%.1024s'", + pid, uid, loginuid, (char *)data); + audit_set_pid(ab, pid); + audit_log_end(ab); + } + } break; case AUDIT_ADD: case AUDIT_DEL: @@ -523,7 +522,7 @@ static int __init audit_init(void) skb_queue_head_init(&audit_skb_queue); audit_initialized = 1; audit_enabled = audit_default; - audit_log(NULL, AUDIT_KERNEL, "initialized"); + audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); return 0; } __initcall(audit_init); @@ -561,7 +560,7 @@ static void audit_buffer_free(struct audit_buffer *ab) } static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx, - int gfp_mask, int type) + unsigned int __nocast gfp_mask, int type) { unsigned long flags; struct audit_buffer *ab = NULL; @@ -587,6 +586,7 @@ static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx, goto err; ab->ctx = ctx; + ab->gfp_mask = gfp_mask; nlh = (struct nlmsghdr *)skb_put(ab->skb, NLMSG_SPACE(0)); nlh->nlmsg_type = type; nlh->nlmsg_flags = 0; @@ -606,26 +606,27 @@ err: * (timestamp,serial) tuple is unique for each syscall and is live from * syscall entry to syscall exit. * - * Atomic values are only guaranteed to be 24-bit, so we count down. - * * NOTE: Another possibility is to store the formatted records off the * audit context (for those records that have a context), and emit them * all at syscall exit. However, this could delay the reporting of * significant errors until syscall exit (or never, if the system * halts). */ + unsigned int audit_serial(void) { - static atomic_t serial = ATOMIC_INIT(0xffffff); - unsigned int a, b; + static spinlock_t serial_lock = SPIN_LOCK_UNLOCKED; + static unsigned int serial = 0; + + unsigned long flags; + unsigned int ret; + spin_lock_irqsave(&serial_lock, flags); do { - a = atomic_read(&serial); - if (atomic_dec_and_test(&serial)) - atomic_set(&serial, 0xffffff); - b = atomic_read(&serial); - } while (b != a - 1); + ret = ++serial; + } while (unlikely(!ret)); + spin_unlock_irqrestore(&serial_lock, flags); - return 0xffffff - b; + return ret; } static inline void audit_get_stamp(struct audit_context *ctx, @@ -645,17 +646,43 @@ static inline void audit_get_stamp(struct audit_context *ctx, * syscall, then the syscall is marked as auditable and an audit record * will be written at syscall exit. If there is no associated task, tsk * should be NULL. */ -struct audit_buffer *audit_log_start(struct audit_context *ctx, int type) + +struct audit_buffer *audit_log_start(struct audit_context *ctx, int gfp_mask, + int type) { struct audit_buffer *ab = NULL; struct timespec t; unsigned int serial; + int reserve; + unsigned long timeout_start = jiffies; if (!audit_initialized) return NULL; - if (audit_backlog_limit - && skb_queue_len(&audit_skb_queue) > audit_backlog_limit) { + if (gfp_mask & __GFP_WAIT) + reserve = 0; + else + reserve = 5; /* Allow atomic callers to go up to five + entries over the normal backlog limit */ + + while (audit_backlog_limit + && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) { + if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time + && time_before(jiffies, timeout_start + audit_backlog_wait_time)) { + + /* Wait for auditd to drain the queue a little */ + DECLARE_WAITQUEUE(wait, current); + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&audit_backlog_wait, &wait); + + if (audit_backlog_limit && + skb_queue_len(&audit_skb_queue) > audit_backlog_limit) + schedule_timeout(timeout_start + audit_backlog_wait_time - jiffies); + + __set_current_state(TASK_RUNNING); + remove_wait_queue(&audit_backlog_wait, &wait); + continue; + } if (audit_rate_check()) printk(KERN_WARNING "audit: audit_backlog=%d > " @@ -663,10 +690,12 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, int type) skb_queue_len(&audit_skb_queue), audit_backlog_limit); audit_log_lost("backlog limit exceeded"); + audit_backlog_wait_time = audit_backlog_wait_overflow; + wake_up(&audit_backlog_wait); return NULL; } - ab = audit_buffer_alloc(ctx, GFP_ATOMIC, type); + ab = audit_buffer_alloc(ctx, gfp_mask, type); if (!ab) { audit_log_lost("out of memory in audit_log_start"); return NULL; @@ -690,7 +719,7 @@ static inline int audit_expand(struct audit_buffer *ab, int extra) { struct sk_buff *skb = ab->skb; int ret = pskb_expand_head(skb, skb_headroom(skb), extra, - GFP_ATOMIC); + ab->gfp_mask); if (ret < 0) { audit_log_lost("out of memory in audit_expand"); return 0; @@ -809,7 +838,7 @@ void audit_log_d_path(struct audit_buffer *ab, const char *prefix, audit_log_format(ab, " %s", prefix); /* We will allow 11 spaces for ' (deleted)' to be appended */ - path = kmalloc(PATH_MAX+11, GFP_KERNEL); + path = kmalloc(PATH_MAX+11, ab->gfp_mask); if (!path) { audit_log_format(ab, "<no memory>"); return; @@ -841,7 +870,7 @@ void audit_log_end(struct audit_buffer *ab) ab->skb = NULL; wake_up_interruptible(&kauditd_wait); } else { - printk("%s\n", ab->skb->data + NLMSG_SPACE(0)); + printk(KERN_NOTICE "%s\n", ab->skb->data + NLMSG_SPACE(0)); } } audit_buffer_free(ab); @@ -850,12 +879,13 @@ void audit_log_end(struct audit_buffer *ab) /* Log an audit record. This is a convenience function that calls * audit_log_start, audit_log_vformat, and audit_log_end. It may be * called in any context. */ -void audit_log(struct audit_context *ctx, int type, const char *fmt, ...) +void audit_log(struct audit_context *ctx, int gfp_mask, int type, + const char *fmt, ...) { struct audit_buffer *ab; va_list args; - ab = audit_log_start(ctx, type); + ab = audit_log_start(ctx, gfp_mask, type); if (ab) { va_start(args, fmt); audit_log_vformat(ab, fmt, args); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e75f84e1a1a..88696f639aa 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -39,6 +39,9 @@ #include <linux/audit.h> #include <linux/personality.h> #include <linux/time.h> +#include <linux/kthread.h> +#include <linux/netlink.h> +#include <linux/compiler.h> #include <asm/unistd.h> /* 0 = no checking @@ -95,6 +98,7 @@ struct audit_names { uid_t uid; gid_t gid; dev_t rdev; + unsigned flags; }; struct audit_aux_data { @@ -167,9 +171,16 @@ struct audit_context { /* There are three lists of rules -- one to search at task creation * time, one to search at syscall entry time, and another to search at * syscall exit time. */ -static LIST_HEAD(audit_tsklist); -static LIST_HEAD(audit_entlist); -static LIST_HEAD(audit_extlist); +static struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { + LIST_HEAD_INIT(audit_filter_list[0]), + LIST_HEAD_INIT(audit_filter_list[1]), + LIST_HEAD_INIT(audit_filter_list[2]), + LIST_HEAD_INIT(audit_filter_list[3]), + LIST_HEAD_INIT(audit_filter_list[4]), +#if AUDIT_NR_FILTERS != 5 +#error Fix audit_filter_list initialiser +#endif +}; struct audit_entry { struct list_head list; @@ -179,9 +190,36 @@ struct audit_entry { extern int audit_pid; +/* Copy rule from user-space to kernel-space. Called from + * audit_add_rule during AUDIT_ADD. */ +static inline int audit_copy_rule(struct audit_rule *d, struct audit_rule *s) +{ + int i; + + if (s->action != AUDIT_NEVER + && s->action != AUDIT_POSSIBLE + && s->action != AUDIT_ALWAYS) + return -1; + if (s->field_count < 0 || s->field_count > AUDIT_MAX_FIELDS) + return -1; + if ((s->flags & ~AUDIT_FILTER_PREPEND) >= AUDIT_NR_FILTERS) + return -1; + + d->flags = s->flags; + d->action = s->action; + d->field_count = s->field_count; + for (i = 0; i < d->field_count; i++) { + d->fields[i] = s->fields[i]; + d->values[i] = s->values[i]; + } + for (i = 0; i < AUDIT_BITMASK_SIZE; i++) d->mask[i] = s->mask[i]; + return 0; +} + /* Check to see if two rules are identical. It is called from + * audit_add_rule during AUDIT_ADD and * audit_del_rule during AUDIT_DEL. */ -static int audit_compare_rule(struct audit_rule *a, struct audit_rule *b) +static inline int audit_compare_rule(struct audit_rule *a, struct audit_rule *b) { int i; @@ -210,19 +248,37 @@ static int audit_compare_rule(struct audit_rule *a, struct audit_rule *b) /* Note that audit_add_rule and audit_del_rule are called via * audit_receive() in audit.c, and are protected by * audit_netlink_sem. */ -static inline int audit_add_rule(struct audit_entry *entry, - struct list_head *list) +static inline int audit_add_rule(struct audit_rule *rule, + struct list_head *list) { - if (entry->rule.flags & AUDIT_PREPEND) { - entry->rule.flags &= ~AUDIT_PREPEND; + struct audit_entry *entry; + + /* Do not use the _rcu iterator here, since this is the only + * addition routine. */ + list_for_each_entry(entry, list, list) { + if (!audit_compare_rule(rule, &entry->rule)) { + return -EEXIST; + } + } + + if (!(entry = kmalloc(sizeof(*entry), GFP_KERNEL))) + return -ENOMEM; + if (audit_copy_rule(&entry->rule, rule)) { + kfree(entry); + return -EINVAL; + } + + if (entry->rule.flags & AUDIT_FILTER_PREPEND) { + entry->rule.flags &= ~AUDIT_FILTER_PREPEND; list_add_rcu(&entry->list, list); } else { list_add_tail_rcu(&entry->list, list); } + return 0; } -static void audit_free_rule(struct rcu_head *head) +static inline void audit_free_rule(struct rcu_head *head) { struct audit_entry *e = container_of(head, struct audit_entry, rcu); kfree(e); @@ -245,82 +301,82 @@ static inline int audit_del_rule(struct audit_rule *rule, return 0; } } - return -EFAULT; /* No matching rule */ + return -ENOENT; /* No matching rule */ } -/* Copy rule from user-space to kernel-space. Called during - * AUDIT_ADD. */ -static int audit_copy_rule(struct audit_rule *d, struct audit_rule *s) +static int audit_list_rules(void *_dest) { + int pid, seq; + int *dest = _dest; + struct audit_entry *entry; int i; - if (s->action != AUDIT_NEVER - && s->action != AUDIT_POSSIBLE - && s->action != AUDIT_ALWAYS) - return -1; - if (s->field_count < 0 || s->field_count > AUDIT_MAX_FIELDS) - return -1; + pid = dest[0]; + seq = dest[1]; + kfree(dest); - d->flags = s->flags; - d->action = s->action; - d->field_count = s->field_count; - for (i = 0; i < d->field_count; i++) { - d->fields[i] = s->fields[i]; - d->values[i] = s->values[i]; + down(&audit_netlink_sem); + + /* The *_rcu iterators not needed here because we are + always called with audit_netlink_sem held. */ + for (i=0; i<AUDIT_NR_FILTERS; i++) { + list_for_each_entry(entry, &audit_filter_list[i], list) + audit_send_reply(pid, seq, AUDIT_LIST, 0, 1, + &entry->rule, sizeof(entry->rule)); } - for (i = 0; i < AUDIT_BITMASK_SIZE; i++) d->mask[i] = s->mask[i]; + audit_send_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0); + + up(&audit_netlink_sem); return 0; } int audit_receive_filter(int type, int pid, int uid, int seq, void *data, uid_t loginuid) { - u32 flags; - struct audit_entry *entry; + struct task_struct *tsk; + int *dest; int err = 0; + unsigned listnr; switch (type) { case AUDIT_LIST: - /* The *_rcu iterators not needed here because we are - always called with audit_netlink_sem held. */ - list_for_each_entry(entry, &audit_tsklist, list) - audit_send_reply(pid, seq, AUDIT_LIST, 0, 1, - &entry->rule, sizeof(entry->rule)); - list_for_each_entry(entry, &audit_entlist, list) - audit_send_reply(pid, seq, AUDIT_LIST, 0, 1, - &entry->rule, sizeof(entry->rule)); - list_for_each_entry(entry, &audit_extlist, list) - audit_send_reply(pid, seq, AUDIT_LIST, 0, 1, - &entry->rule, sizeof(entry->rule)); - audit_send_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0); + /* We can't just spew out the rules here because we might fill + * the available socket buffer space and deadlock waiting for + * auditctl to read from it... which isn't ever going to + * happen if we're actually running in the context of auditctl + * trying to _send_ the stuff */ + + dest = kmalloc(2 * sizeof(int), GFP_KERNEL); + if (!dest) + return -ENOMEM; + dest[0] = pid; + dest[1] = seq; + + tsk = kthread_run(audit_list_rules, dest, "audit_list_rules"); + if (IS_ERR(tsk)) { + kfree(dest); + err = PTR_ERR(tsk); + } break; case AUDIT_ADD: - if (!(entry = kmalloc(sizeof(*entry), GFP_KERNEL))) - return -ENOMEM; - if (audit_copy_rule(&entry->rule, data)) { - kfree(entry); + listnr =((struct audit_rule *)data)->flags & ~AUDIT_FILTER_PREPEND; + if (listnr >= AUDIT_NR_FILTERS) return -EINVAL; - } - flags = entry->rule.flags; - if (!err && (flags & AUDIT_PER_TASK)) - err = audit_add_rule(entry, &audit_tsklist); - if (!err && (flags & AUDIT_AT_ENTRY)) - err = audit_add_rule(entry, &audit_entlist); - if (!err && (flags & AUDIT_AT_EXIT)) - err = audit_add_rule(entry, &audit_extlist); - audit_log(NULL, AUDIT_CONFIG_CHANGE, - "auid=%u added an audit rule\n", loginuid); + + err = audit_add_rule(data, &audit_filter_list[listnr]); + if (!err) + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u added an audit rule\n", loginuid); break; case AUDIT_DEL: - flags =((struct audit_rule *)data)->flags; - if (!err && (flags & AUDIT_PER_TASK)) - err = audit_del_rule(data, &audit_tsklist); - if (!err && (flags & AUDIT_AT_ENTRY)) - err = audit_del_rule(data, &audit_entlist); - if (!err && (flags & AUDIT_AT_EXIT)) - err = audit_del_rule(data, &audit_extlist); - audit_log(NULL, AUDIT_CONFIG_CHANGE, - "auid=%u removed an audit rule\n", loginuid); + listnr =((struct audit_rule *)data)->flags & ~AUDIT_FILTER_PREPEND; + if (listnr >= AUDIT_NR_FILTERS) + return -EINVAL; + + err = audit_del_rule(data, &audit_filter_list[listnr]); + if (!err) + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u removed an audit rule\n", loginuid); break; default: return -EINVAL; @@ -384,8 +440,12 @@ static int audit_filter_rules(struct task_struct *tsk, result = (ctx->return_code == value); break; case AUDIT_SUCCESS: - if (ctx && ctx->return_valid) - result = (ctx->return_valid == AUDITSC_SUCCESS); + if (ctx && ctx->return_valid) { + if (value) + result = (ctx->return_valid == AUDITSC_SUCCESS); + else + result = (ctx->return_valid == AUDITSC_FAILURE); + } break; case AUDIT_DEVMAJOR: if (ctx) { @@ -454,7 +514,7 @@ static enum audit_state audit_filter_task(struct task_struct *tsk) enum audit_state state; rcu_read_lock(); - list_for_each_entry_rcu(e, &audit_tsklist, list) { + list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) { if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { rcu_read_unlock(); return state; @@ -474,20 +534,84 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, struct list_head *list) { struct audit_entry *e; + enum audit_state state; + + if (audit_pid && tsk->tgid == audit_pid) + return AUDIT_DISABLED; + + rcu_read_lock(); + if (!list_empty(list)) { + int word = AUDIT_WORD(ctx->major); + int bit = AUDIT_BIT(ctx->major); + + list_for_each_entry_rcu(e, list, list) { + if ((e->rule.mask[word] & bit) == bit + && audit_filter_rules(tsk, &e->rule, ctx, &state)) { + rcu_read_unlock(); + return state; + } + } + } + rcu_read_unlock(); + return AUDIT_BUILD_CONTEXT; +} + +static int audit_filter_user_rules(struct netlink_skb_parms *cb, + struct audit_rule *rule, + enum audit_state *state) +{ + int i; + + for (i = 0; i < rule->field_count; i++) { + u32 field = rule->fields[i] & ~AUDIT_NEGATE; + u32 value = rule->values[i]; + int result = 0; + + switch (field) { + case AUDIT_PID: + result = (cb->creds.pid == value); + break; + case AUDIT_UID: + result = (cb->creds.uid == value); + break; + case AUDIT_GID: + result = (cb->creds.gid == value); + break; + case AUDIT_LOGINUID: + result = (cb->loginuid == value); + break; + } + + if (rule->fields[i] & AUDIT_NEGATE) + result = !result; + if (!result) + return 0; + } + switch (rule->action) { + case AUDIT_NEVER: *state = AUDIT_DISABLED; break; + case AUDIT_POSSIBLE: *state = AUDIT_BUILD_CONTEXT; break; + case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; + } + return 1; +} + +int audit_filter_user(struct netlink_skb_parms *cb, int type) +{ + struct audit_entry *e; enum audit_state state; - int word = AUDIT_WORD(ctx->major); - int bit = AUDIT_BIT(ctx->major); + int ret = 1; rcu_read_lock(); - list_for_each_entry_rcu(e, list, list) { - if ((e->rule.mask[word] & bit) == bit - && audit_filter_rules(tsk, &e->rule, ctx, &state)) { - rcu_read_unlock(); - return state; + list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_USER], list) { + if (audit_filter_user_rules(cb, &e->rule, &state)) { + if (state == AUDIT_DISABLED) + ret = 0; + break; } } rcu_read_unlock(); - return AUDIT_BUILD_CONTEXT; + + return ret; /* Audit by default */ } /* This should be called with task_lock() held. */ @@ -504,7 +628,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk, if (context->in_syscall && !context->auditable) { enum audit_state state; - state = audit_filter_syscall(tsk, context, &audit_extlist); + state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]); if (state == AUDIT_RECORD_CONTEXT) context->auditable = 1; } @@ -679,13 +803,13 @@ static void audit_log_task_info(struct audit_buffer *ab) up_read(&mm->mmap_sem); } -static void audit_log_exit(struct audit_context *context) +static void audit_log_exit(struct audit_context *context, unsigned int gfp_mask) { int i; struct audit_buffer *ab; struct audit_aux_data *aux; - ab = audit_log_start(context, AUDIT_SYSCALL); + ab = audit_log_start(context, gfp_mask, AUDIT_SYSCALL); if (!ab) return; /* audit_panic has been called */ audit_log_format(ab, "arch=%x syscall=%d", @@ -717,7 +841,7 @@ static void audit_log_exit(struct audit_context *context) for (aux = context->aux; aux; aux = aux->next) { - ab = audit_log_start(context, aux->type); + ab = audit_log_start(context, GFP_KERNEL, aux->type); if (!ab) continue; /* audit_panic has been called */ @@ -754,14 +878,14 @@ static void audit_log_exit(struct audit_context *context) } if (context->pwd && context->pwdmnt) { - ab = audit_log_start(context, AUDIT_CWD); + ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); if (ab) { audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); audit_log_end(ab); } } for (i = 0; i < context->name_count; i++) { - ab = audit_log_start(context, AUDIT_PATH); + ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); if (!ab) continue; /* audit_panic has been called */ @@ -770,6 +894,8 @@ static void audit_log_exit(struct audit_context *context) audit_log_format(ab, " name="); audit_log_untrustedstring(ab, context->names[i].name); } + audit_log_format(ab, " flags=%x\n", context->names[i].flags); + if (context->names[i].ino != (unsigned long)-1) audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#o" " ouid=%u ogid=%u rdev=%02x:%02x", @@ -799,9 +925,11 @@ void audit_free(struct task_struct *tsk) return; /* Check for system calls that do not go through the exit - * function (e.g., exit_group), then free context block. */ - if (context->in_syscall && context->auditable && context->pid != audit_pid) - audit_log_exit(context); + * function (e.g., exit_group), then free context block. + * We use GFP_ATOMIC here because we might be doing this + * in the context of the idle thread */ + if (context->in_syscall && context->auditable) + audit_log_exit(context, GFP_ATOMIC); audit_free_context(context); } @@ -876,11 +1004,11 @@ void audit_syscall_entry(struct task_struct *tsk, int arch, int major, state = context->state; if (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT) - state = audit_filter_syscall(tsk, context, &audit_entlist); + state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]); if (likely(state == AUDIT_DISABLED)) return; - context->serial = audit_serial(); + context->serial = 0; context->ctime = CURRENT_TIME; context->in_syscall = 1; context->auditable = !!(state == AUDIT_RECORD_CONTEXT); @@ -903,10 +1031,10 @@ void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code) /* Not having a context here is ok, since the parent may have * called __put_task_struct. */ if (likely(!context)) - return; + goto out; - if (context->in_syscall && context->auditable && context->pid != audit_pid) - audit_log_exit(context); + if (context->in_syscall && context->auditable) + audit_log_exit(context, GFP_KERNEL); context->in_syscall = 0; context->auditable = 0; @@ -919,9 +1047,9 @@ void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code) } else { audit_free_names(context); audit_free_aux(context); - audit_zero_context(context, context->state); tsk->audit_context = context; } + out: put_task_struct(tsk); } @@ -996,7 +1124,7 @@ void audit_putname(const char *name) /* Store the inode and device from a lookup. Called from * fs/namei.c:path_lookup(). */ -void audit_inode(const char *name, const struct inode *inode) +void audit_inode(const char *name, const struct inode *inode, unsigned flags) { int idx; struct audit_context *context = current->audit_context; @@ -1022,17 +1150,20 @@ void audit_inode(const char *name, const struct inode *inode) ++context->ino_count; #endif } - context->names[idx].ino = inode->i_ino; - context->names[idx].dev = inode->i_sb->s_dev; - context->names[idx].mode = inode->i_mode; - context->names[idx].uid = inode->i_uid; - context->names[idx].gid = inode->i_gid; - context->names[idx].rdev = inode->i_rdev; + context->names[idx].flags = flags; + context->names[idx].ino = inode->i_ino; + context->names[idx].dev = inode->i_sb->s_dev; + context->names[idx].mode = inode->i_mode; + context->names[idx].uid = inode->i_uid; + context->names[idx].gid = inode->i_gid; + context->names[idx].rdev = inode->i_rdev; } void auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial) { + if (!ctx->serial) + ctx->serial = audit_serial(); t->tv_sec = ctx->ctime.tv_sec; t->tv_nsec = ctx->ctime.tv_nsec; *serial = ctx->serial; @@ -1044,7 +1175,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid) if (task->audit_context) { struct audit_buffer *ab; - ab = audit_log_start(NULL, AUDIT_LOGIN); + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); if (ab) { audit_log_format(ab, "login pid=%d uid=%u " "old auid=%u new auid=%u", @@ -1153,7 +1284,7 @@ void audit_signal_info(int sig, struct task_struct *t) extern pid_t audit_sig_pid; extern uid_t audit_sig_uid; - if (unlikely(audit_pid && t->pid == audit_pid)) { + if (unlikely(audit_pid && t->tgid == audit_pid)) { if (sig == SIGTERM || sig == SIGHUP) { struct audit_context *ctx = current->audit_context; audit_sig_pid = current->pid; diff --git a/kernel/exit.c b/kernel/exit.c index 6d2089a1bce..ee6d8b8abef 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -371,6 +371,12 @@ static inline void close_files(struct files_struct * files) struct fdtable *fdt; j = 0; + + /* + * It is safe to dereference the fd table without RCU or + * ->file_lock because this is the last reference to the + * files structure. + */ fdt = files_fdtable(files); for (;;) { unsigned long set; diff --git a/kernel/fork.c b/kernel/fork.c index 8149f360288..533ce27f4b2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1062,7 +1062,8 @@ static task_t *copy_process(unsigned long clone_flags, * parent's CPU). This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; - if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed))) + if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || + !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); /* diff --git a/kernel/module.c b/kernel/module.c index 4b39d3793c7..ff5c500ab62 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -20,6 +20,7 @@ #include <linux/module.h> #include <linux/moduleloader.h> #include <linux/init.h> +#include <linux/kernel.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/elf.h> @@ -498,7 +499,7 @@ static inline int try_force(unsigned int flags) { int ret = (flags & O_TRUNC); if (ret) - tainted |= TAINT_FORCED_MODULE; + add_taint(TAINT_FORCED_MODULE); return ret; } #else @@ -897,7 +898,7 @@ static int check_version(Elf_Shdr *sechdrs, if (!(tainted & TAINT_FORCED_MODULE)) { printk("%s: no version for \"%s\" found: kernel tainted.\n", mod->name, symname); - tainted |= TAINT_FORCED_MODULE; + add_taint(TAINT_FORCED_MODULE); } return 1; } @@ -1352,7 +1353,7 @@ static void set_license(struct module *mod, const char *license) if (!mod->license_gplok && !(tainted & TAINT_PROPRIETARY_MODULE)) { printk(KERN_WARNING "%s: module license '%s' taints kernel.\n", mod->name, license); - tainted |= TAINT_PROPRIETARY_MODULE; + add_taint(TAINT_PROPRIETARY_MODULE); } } @@ -1610,7 +1611,7 @@ static struct module *load_module(void __user *umod, modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); /* This is allowed: modprobe --force will invalidate it. */ if (!modmagic) { - tainted |= TAINT_FORCED_MODULE; + add_taint(TAINT_FORCED_MODULE); printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", mod->name); } else if (!same_magic(modmagic, vermagic)) { @@ -1739,7 +1740,7 @@ static struct module *load_module(void __user *umod, (mod->num_gpl_syms && !gplcrcindex)) { printk(KERN_WARNING "%s: No versions for exported symbols." " Tainting kernel.\n", mod->name); - tainted |= TAINT_FORCED_MODULE; + add_taint(TAINT_FORCED_MODULE); } #endif diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 396c7873e80..46a5e5acff9 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -29,7 +29,7 @@ config PM_DEBUG config SOFTWARE_SUSPEND bool "Software Suspend" - depends on PM && SWAP && (X86 || ((FVR || PPC32) && !SMP)) + depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FVR || PPC32) && !SMP) ---help--- Enable the possibility of suspending the machine. It doesn't need APM. diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 2d8bf054d03..761956e813f 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -17,12 +17,12 @@ #include <linux/delay.h> #include <linux/fs.h> #include <linux/mount.h> +#include <linux/pm.h> #include "power.h" extern suspend_disk_method_t pm_disk_mode; -extern struct pm_ops * pm_ops; extern int swsusp_suspend(void); extern int swsusp_write(void); @@ -49,13 +49,11 @@ dev_t swsusp_resume_device; static void power_down(suspend_disk_method_t mode) { - unsigned long flags; int error = 0; - local_irq_save(flags); switch(mode) { case PM_DISK_PLATFORM: - device_shutdown(); + kernel_power_off_prepare(); error = pm_ops->enter(PM_SUSPEND_DISK); break; case PM_DISK_SHUTDOWN: diff --git a/kernel/power/power.h b/kernel/power/power.h index cd6a3493cc0..9c9167d910d 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -1,7 +1,7 @@ #include <linux/suspend.h> #include <linux/utsname.h> -/* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but +/* With SUSPEND_CONSOLE defined suspend looks *really* cool, but we probably do not take enough locks for switching consoles, etc, so bad things might happen. */ diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index d967e875ee8..1cc9ff25e47 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -363,7 +363,7 @@ static void lock_swapdevices(void) } /** - * write_swap_page - Write one page to a fresh swap location. + * write_page - Write one page to a fresh swap location. * @addr: Address we're writing. * @loc: Place to store the entry we used. * @@ -863,6 +863,9 @@ static int alloc_image_pages(void) return 0; } +/* Free pages we allocated for suspend. Suspend pages are alocated + * before atomic copy, so we need to free them after resume. + */ void swsusp_free(void) { BUG_ON(PageNosave(virt_to_page(pagedir_save))); @@ -918,6 +921,7 @@ static int swsusp_alloc(void) pagedir_nosave = NULL; nr_copy_pages = calc_nr(nr_copy_pages); + nr_copy_pages_check = nr_copy_pages; pr_debug("suspend: (pages needed: %d + %d free: %d)\n", nr_copy_pages, PAGES_FOR_IO, nr_free_pages()); @@ -940,7 +944,6 @@ static int swsusp_alloc(void) return error; } - nr_copy_pages_check = nr_copy_pages; return 0; } @@ -1213,8 +1216,9 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) free_pagedir(pblist); free_eaten_memory(); pblist = NULL; - } - else + /* Is this even worth handling? It should never ever happen, and we + have just lost user's state, anyway... */ + } else printk("swsusp: Relocated %d pages\n", rel); return pblist; diff --git a/kernel/printk.c b/kernel/printk.c index a967605bc2e..4b8f0f9230a 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -488,6 +488,11 @@ static int __init printk_time_setup(char *str) __setup("time", printk_time_setup); +__attribute__((weak)) unsigned long long printk_clock(void) +{ + return sched_clock(); +} + /* * This is printk. It can be called from any context. We want it to work. * @@ -565,7 +570,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) loglev_char = default_message_loglevel + '0'; } - t = sched_clock(); + t = printk_clock(); nanosec_rem = do_div(t, 1000000000); tlen = sprintf(tbuf, "<%c>[%5lu.%06lu] ", diff --git a/kernel/sched.c b/kernel/sched.c index 81b3a96ed2d..1f31a528fdb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -294,6 +294,10 @@ static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) { +#ifdef CONFIG_DEBUG_SPINLOCK + /* this is a valid case when another task releases the spinlock */ + rq->lock.owner = current; +#endif spin_unlock_irq(&rq->lock); } @@ -1529,10 +1533,6 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev) * Manfred Spraul <manfred@colorfullife.com> */ prev_task_flags = prev->flags; -#ifdef CONFIG_DEBUG_SPINLOCK - /* this is a valid case when another task releases the spinlock */ - rq->lock.owner = current; -#endif finish_arch_switch(prev); finish_lock_switch(rq, prev); if (mm) diff --git a/kernel/signal.c b/kernel/signal.c index b92c3c9f8b9..5a274705ba1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -936,34 +936,31 @@ force_sig_specific(int sig, struct task_struct *t) * as soon as they're available, so putting the signal on the shared queue * will be equivalent to sending it to one such thread. */ -#define wants_signal(sig, p, mask) \ - (!sigismember(&(p)->blocked, sig) \ - && !((p)->state & mask) \ - && !((p)->flags & PF_EXITING) \ - && (task_curr(p) || !signal_pending(p))) - +static inline int wants_signal(int sig, struct task_struct *p) +{ + if (sigismember(&p->blocked, sig)) + return 0; + if (p->flags & PF_EXITING) + return 0; + if (sig == SIGKILL) + return 1; + if (p->state & (TASK_STOPPED | TASK_TRACED)) + return 0; + return task_curr(p) || !signal_pending(p); +} static void __group_complete_signal(int sig, struct task_struct *p) { - unsigned int mask; struct task_struct *t; /* - * Don't bother traced and stopped tasks (but - * SIGKILL will punch through that). - */ - mask = TASK_STOPPED | TASK_TRACED; - if (sig == SIGKILL) - mask = 0; - - /* * Now find a thread we can wake up to take the signal off the queue. * * If the main thread wants the signal, it gets first crack. * Probably the least surprising to the average bear. */ - if (wants_signal(sig, p, mask)) + if (wants_signal(sig, p)) t = p; else if (thread_group_empty(p)) /* @@ -981,7 +978,7 @@ __group_complete_signal(int sig, struct task_struct *p) t = p->signal->curr_target = p; BUG_ON(t->tgid != p->tgid); - while (!wants_signal(sig, t, mask)) { + while (!wants_signal(sig, t)) { t = next_thread(t); if (t == p->signal->curr_target) /* diff --git a/kernel/sys.c b/kernel/sys.c index c80412be230..2fa1ed18123 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -361,17 +361,35 @@ out_unlock: return retval; } +/** + * emergency_restart - reboot the system + * + * Without shutting down any hardware or taking any locks + * reboot the system. This is called when we know we are in + * trouble so this is our best effort to reboot. This is + * safe to call in interrupt context. + */ void emergency_restart(void) { machine_emergency_restart(); } EXPORT_SYMBOL_GPL(emergency_restart); -void kernel_restart(char *cmd) +/** + * kernel_restart - reboot the system + * + * Shutdown everything and perform a clean reboot. + * This is not safe to call in interrupt context. + */ +void kernel_restart_prepare(char *cmd) { notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; device_shutdown(); +} +void kernel_restart(char *cmd) +{ + kernel_restart_prepare(cmd); if (!cmd) { printk(KERN_EMERG "Restarting system.\n"); } else { @@ -382,6 +400,12 @@ void kernel_restart(char *cmd) } EXPORT_SYMBOL_GPL(kernel_restart); +/** + * kernel_kexec - reboot the system + * + * Move into place and start executing a preloaded standalone + * executable. If nothing was preloaded return an error. + */ void kernel_kexec(void) { #ifdef CONFIG_KEXEC @@ -390,9 +414,7 @@ void kernel_kexec(void) if (!image) { return; } - notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); - system_state = SYSTEM_RESTART; - device_shutdown(); + kernel_restart_prepare(NULL); printk(KERN_EMERG "Starting new kernel\n"); machine_shutdown(); machine_kexec(image); @@ -400,21 +422,39 @@ void kernel_kexec(void) } EXPORT_SYMBOL_GPL(kernel_kexec); -void kernel_halt(void) +/** + * kernel_halt - halt the system + * + * Shutdown everything and perform a clean system halt. + */ +void kernel_halt_prepare(void) { notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL); system_state = SYSTEM_HALT; device_shutdown(); +} +void kernel_halt(void) +{ + kernel_halt_prepare(); printk(KERN_EMERG "System halted.\n"); machine_halt(); } EXPORT_SYMBOL_GPL(kernel_halt); -void kernel_power_off(void) +/** + * kernel_power_off - power_off the system + * + * Shutdown everything and perform a clean system power_off. + */ +void kernel_power_off_prepare(void) { notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL); system_state = SYSTEM_POWER_OFF; device_shutdown(); +} +void kernel_power_off(void) +{ + kernel_power_off_prepare(); printk(KERN_EMERG "Power down.\n"); machine_power_off(); } @@ -1728,8 +1768,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, error = put_user(current->pdeath_signal, (int __user *)arg2); break; case PR_GET_DUMPABLE: - if (current->mm->dumpable) - error = 1; + error = current->mm->dumpable; break; case PR_SET_DUMPABLE: if (arg2 < 0 || arg2 > 2) { diff --git a/kernel/timer.c b/kernel/timer.c index f4152fcd9f8..3ba10fa35b6 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1151,19 +1151,22 @@ fastcall signed long __sched schedule_timeout(signed long timeout) out: return timeout < 0 ? 0 : timeout; } - EXPORT_SYMBOL(schedule_timeout); +/* + * We can use __set_current_state() here because schedule_timeout() calls + * schedule() unconditionally. + */ signed long __sched schedule_timeout_interruptible(signed long timeout) { - set_current_state(TASK_INTERRUPTIBLE); + __set_current_state(TASK_INTERRUPTIBLE); return schedule_timeout(timeout); } EXPORT_SYMBOL(schedule_timeout_interruptible); signed long __sched schedule_timeout_uninterruptible(signed long timeout) { - set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(TASK_UNINTERRUPTIBLE); return schedule_timeout(timeout); } EXPORT_SYMBOL(schedule_timeout_uninterruptible); |