diff options
author | Jan Safranek <jsafrane@redhat.com> | 2009-03-13 15:16:19 +0100 |
---|---|---|
committer | Jan Safranek <jsafrane@redhat.com> | 2009-03-26 09:34:18 +0100 |
commit | f8e05fc8c129a13fed256b03a23537ef94c77152 (patch) | |
tree | c64ea7d9f7daeefd307feec1bcb90ea5e3e6d600 /src/api.c | |
parent | 04bb98f8bd9751dd8a514b0e3a6c4862ceabeae9 (diff) | |
download | libcg-f8e05fc8c129a13fed256b03a23537ef94c77152.tar.gz libcg-f8e05fc8c129a13fed256b03a23537ef94c77152.tar.xz libcg-f8e05fc8c129a13fed256b03a23537ef94c77152.zip |
Distribute files to various subdirectories
Signed-off-by: Jan Safranek <jsafrane@redhat.com>
Diffstat (limited to 'src/api.c')
-rw-r--r-- | src/api.c | 2320 |
1 files changed, 2320 insertions, 0 deletions
diff --git a/src/api.c b/src/api.c new file mode 100644 index 0000000..28365b4 --- /dev/null +++ b/src/api.c @@ -0,0 +1,2320 @@ +/* + * Copyright IBM Corporation. 2007 + * + * Author: Dhaval Giani <dhaval@linux.vnet.ibm.com> + * Author: Balbir Singh <balbir@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * TODOs: + * 1. Convert comments to Docbook style. + * 2. Add more APIs for the control groups. + * 3. Handle the configuration related APIs. + * 4. Error handling. + * + * Code initiated and designed by Dhaval Giani. All faults are most likely + * his mistake. + */ + +#include <dirent.h> +#include <errno.h> +#include <libcgroup.h> +#include <libcgroup-internal.h> +#include <mntent.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <fts.h> +#include <ctype.h> +#include <pwd.h> +#include <libgen.h> +#include <assert.h> + +#ifndef PACKAGE_VERSION +#define PACKAGE_VERSION 0.01 +#endif + +#define VERSION(ver) #ver + +/* + * The errno which happend the last time (have to be thread specific) + */ +__thread int last_errno; + +#define MAXLEN 256 + +/* the value have to be thread specific */ +__thread char errtext[MAXLEN]; + +/* + * Remember to bump this up for major API changes. + */ +const static char cg_version[] = VERSION(PACKAGE_VERSION); + +struct cg_mount_table_s cg_mount_table[CG_CONTROLLER_MAX]; +static pthread_rwlock_t cg_mount_table_lock = PTHREAD_RWLOCK_INITIALIZER; + +/* Check if cgroup_init has been called or not. */ +static int cgroup_initialized; + +/* Check if the rules cache has been loaded or not. */ +static bool cgroup_rules_loaded; + +/* List of configuration rules */ +static struct cgroup_rule_list rl; + +/* Temporary list of configuration rules (for non-cache apps) */ +static struct cgroup_rule_list trl; + +/* Lock for the list of rules (rl) */ +static pthread_rwlock_t rl_lock = PTHREAD_RWLOCK_INITIALIZER; + +char *cgroup_strerror_codes[] = { + "Cgroup is not compiled in", + "Cgroup is not mounted", + "Cgroup does not exist", + "Cgroup has not been created", + "Cgroup one of the needed subsystems is not mounted", + "Cgroup, request came in from non owner", + "Cgroup controllers controllers are bound to different mount points", + "Cgroup, operation not allowed", + "Cgroup value set exceeds maximum", + "Cgroup controller already exists", + "Cgroup value already exists", + "Cgroup invalid operation", + "Cgroup, creation of controller failed", + "Cgroup operation failed", + "Cgroup not initialized", + "Cgroup trying to set value for control that does not exist", + "Cgroup generic error, see errno", + "Cgroup values are not equal", + "Cgroup controllers are different", + "Cgroup parsing failed", + "Cgroup, rules file does not exist", + "Cgroup mounting failed", + "The config file can not be opend", + "End of File or iterator", +}; + +static int cg_chown_file(FTS *fts, FTSENT *ent, uid_t owner, gid_t group) +{ + int ret = 0; + const char *filename = fts->fts_path; + cgroup_dbg("seeing file %s\n", filename); + switch (ent->fts_info) { + case FTS_ERR: + errno = ent->fts_errno; + break; + case FTS_D: + case FTS_DC: + case FTS_NSOK: + case FTS_NS: + case FTS_DNR: + case FTS_DP: + ret = chown(filename, owner, group); + if (ret) + goto fail_chown; + ret = chmod(filename, S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | + S_IWGRP | S_IXGRP | S_IROTH | S_IXOTH); + break; + case FTS_F: + case FTS_DEFAULT: + ret = chown(filename, owner, group); + if (ret) + goto fail_chown; + ret = chmod(filename, S_IRUSR | S_IWUSR | S_IRGRP | + S_IWGRP | S_IROTH); + break; + } +fail_chown: + if (ret < 0) { + last_errno = errno; + ret = ECGOTHER; + } + return ret; +} + +/* + * TODO: Need to decide a better place to put this function. + */ +static int cg_chown_recursive(char **path, uid_t owner, gid_t group) +{ + int ret = 0; + cgroup_dbg("path is %s\n", *path); + FTS *fts = fts_open(path, FTS_PHYSICAL | FTS_NOCHDIR | + FTS_NOSTAT, NULL); + while (1) { + FTSENT *ent; + ent = fts_read(fts); + if (!ent) { + cgroup_dbg("fts_read failed\n"); + break; + } + ret = cg_chown_file(fts, ent, owner, group); + } + fts_close(fts); + return ret; +} + +static int cgroup_test_subsys_mounted(const char *name) +{ + int i; + + pthread_rwlock_rdlock(&cg_mount_table_lock); + + for (i = 0; cg_mount_table[i].name[0] != '\0'; i++) { + if (strncmp(cg_mount_table[i].name, name, + sizeof(cg_mount_table[i].name)) == 0) { + pthread_rwlock_unlock(&cg_mount_table_lock); + return 1; + } + } + pthread_rwlock_unlock(&cg_mount_table_lock); + return 0; +} + +/** + * Free a single cgroup_rule struct. + * @param r The rule to free from memory + */ +static void cgroup_free_rule(struct cgroup_rule *r) +{ + /* Loop variable */ + int i = 0; + + /* Make sure our rule is not NULL, first. */ + if (!r) { + cgroup_dbg("Warning: Attempted to free NULL rule.\n"); + return; + } + + /* We must free any used controller strings, too. */ + for(i = 0; i < MAX_MNT_ELEMENTS; i++) { + if (r->controllers[i]) + free(r->controllers[i]); + } + + free(r); +} + +/** + * Free a list of cgroup_rule structs. If rl is the main list of rules, + * the lock must be taken for writing before calling this function! + * @param rl Pointer to the list of rules to free from memory + */ +static void cgroup_free_rule_list(struct cgroup_rule_list *rl) +{ + /* Temporary pointer */ + struct cgroup_rule *tmp = NULL; + + /* Make sure we're not freeing NULL memory! */ + if (!(rl->head)) { + cgroup_dbg("Warning: Attempted to free NULL list.\n"); + return; + } + + while (rl->head) { + tmp = rl->head; + rl->head = tmp->next; + cgroup_free_rule(tmp); + } + + /* Don't leave wild pointers around! */ + rl->head = NULL; + rl->tail = NULL; +} + +/** + * Parse the configuration file that maps UID/GIDs to cgroups. If ever the + * configuration file is modified, applications should call this function to + * load the new configuration rules. The function caller is responsible for + * calling free() on each rule in the list. + * + * The cache parameter alters the behavior of this function. If true, this + * function will read the entire configuration file and store the results in + * rl (global rules list). If false, this function will only parse until it + * finds a rule matching the given UID or GID. It will store this rule in rl, + * as well as any children rules (rules that begin with a %) that it has. + * + * This function is NOT thread safe! + * @param cache True to cache rules, else false + * @param muid If cache is false, the UID to match against + * @param mgid If cache is false, the GID to match against + * @return 0 on success, -1 if no cache and match found, > 0 on error. + * TODO: Make this function thread safe! + */ +static int cgroup_parse_rules(bool cache, uid_t muid, gid_t mgid) +{ + /* File descriptor for the configuration file */ + FILE *fp = NULL; + + /* Buffer to store the line we're working on */ + char *buff = NULL; + + /* Iterator for the line we're working on */ + char *itr = NULL; + + /* Pointer to the list that we're using */ + struct cgroup_rule_list *lst = NULL; + + /* Rule to add to the list */ + struct cgroup_rule *newrule = NULL; + + /* Structure to get GID from group name */ + struct group *grp = NULL; + + /* Structure to get UID from user name */ + struct passwd *pwd = NULL; + + /* Temporary storage for a configuration rule */ + char user[LOGIN_NAME_MAX] = { '\0' }; + char controllers[CG_CONTROLLER_MAX] = { '\0' }; + char destination[FILENAME_MAX] = { '\0' }; + uid_t uid = CGRULE_INVALID; + gid_t gid = CGRULE_INVALID; + + /* The current line number */ + unsigned int linenum = 0; + + /* Did we skip the previous line? */ + bool skipped = false; + + /* Have we found a matching rule (non-cache mode)? */ + bool matched = false; + + /* Return codes */ + int ret = 0; + + /* Temporary buffer for strtok() */ + char *stok_buff = NULL; + + /* Loop variable. */ + int i = 0; + + /* Open the configuration file. */ + pthread_rwlock_wrlock(&rl_lock); + fp = fopen(CGRULES_CONF_FILE, "r"); + if (!fp) { + cgroup_dbg("Failed to open configuration file %s with" + " error: %s\n", CGRULES_CONF_FILE, + strerror(errno)); + last_errno = errno; + goto finish; + } + + buff = calloc(CGROUP_RULE_MAXLINE, sizeof(char)); + if (!buff) { + cgroup_dbg("Out of memory? Error: %s\n", strerror(errno)); + last_errno = errno; + ret = ECGOTHER; + goto close_unlock; + } + + /* Determine which list we're using. */ + if (cache) + lst = &rl; + else + lst = &trl; + + /* If our list already exists, clean it. */ + if (lst->head) + cgroup_free_rule_list(lst); + + /* Now, parse the configuration file one line at a time. */ + cgroup_dbg("Parsing configuration file.\n"); + while ((itr = fgets(buff, CGROUP_RULE_MAXLINE, fp)) != NULL) { + linenum++; + + /* We ignore anything after a # sign as comments. */ + if ((itr = strchr(buff, '#'))) + *itr = '\0'; + + /* We also need to remove the newline character. */ + if ((itr = strchr(buff, '\n'))) + *itr = '\0'; + + /* Now, skip any leading tabs and spaces. */ + itr = buff; + while (itr && isblank(*itr)) + itr++; + + /* If there's nothing left, we can ignore this line. */ + if (!strlen(itr)) + continue; + + /* + * If we skipped the last rule and this rule is a continuation + * of it (begins with %), then we should skip this rule too. + */ + if (skipped && *itr == '%') { + cgroup_dbg("Warning: Skipped child of invalid rule," + " line %d.\n", linenum); + memset(buff, '\0', CGROUP_RULE_MAXLINE); + continue; + } + + /* + * If there is something left, it should be a rule. Otherwise, + * there's an error in the configuration file. + */ + skipped = false; + memset(user, '\0', LOGIN_NAME_MAX); + memset(controllers, '\0', CG_CONTROLLER_MAX); + memset(destination, '\0', FILENAME_MAX); + i = sscanf(itr, "%s%s%s", user, controllers, destination); + if (i != 3) { + cgroup_dbg("Failed to parse configuration file on" + " line %d.\n", linenum); + goto parsefail; + } + + /* + * Next, check the user/group. If it's a % sign, then we + * are continuing another rule and UID/GID should not be + * reset. If it's a @, we're dealing with a GID rule. If + * it's a *, then we do not need to do a lookup because the + * rule always applies (it's a wildcard). If we're using + * non-cache mode and we've found a matching rule, we only + * continue to parse if we're looking at a child rule. + */ + if ((!cache) && matched && (strncmp(user, "%", 1) != 0)) { + /* If we make it here, we finished (non-cache). */ + cgroup_dbg("Parsing of configuration file" + " complete.\n\n"); + ret = -1; + goto cleanup; + } + if (strncmp(user, "@", 1) == 0) { + /* New GID rule. */ + itr = &(user[1]); + if ((grp = getgrnam(itr))) { + uid = CGRULE_INVALID; + gid = grp->gr_gid; + } else { + cgroup_dbg("Warning: Entry for %s not" + "found. Skipping rule on line" + " %d.\n", itr, linenum); + memset(buff, '\0', CGROUP_RULE_MAXLINE); + skipped = true; + continue; + } + } else if (strncmp(user, "*", 1) == 0) { + /* Special wildcard rule. */ + uid = CGRULE_WILD; + gid = CGRULE_WILD; + } else if (*itr != '%') { + /* New UID rule. */ + if ((pwd = getpwnam(user))) { + uid = pwd->pw_uid; + gid = CGRULE_INVALID; + } else { + cgroup_dbg("Warning: Entry for %s not" + "found. Skipping rule on line" + " %d.\n", user, linenum); + memset(buff, '\0', CGROUP_RULE_MAXLINE); + skipped = true; + continue; + } + } /* Else, we're continuing another rule (UID/GID are okay). */ + + /* + * If we are not caching rules, then we need to check for a + * match before doing anything else. We consider four cases: + * The UID matches, the GID matches, the UID is a member of the + * GID, or we're looking at the wildcard rule, which always + * matches. If none of these are true, we simply continue to + * the next line in the file. + */ + if (grp && muid != CGRULE_INVALID) { + pwd = getpwuid(muid); + for (i = 0; grp->gr_mem[i]; i++) { + if (!(strcmp(pwd->pw_name, grp->gr_mem[i]))) + matched = true; + } + } + + if (uid == muid || gid == mgid || uid == CGRULE_WILD) { + matched = true; + } + + if (!cache && !matched) + continue; + + /* + * Now, we're either caching rules or we found a match. Either + * way, copy everything into a new rule and push it into the + * list. + */ + newrule = calloc(1, sizeof(struct cgroup_rule)); + if (!newrule) { + cgroup_dbg("Out of memory? Error: %s\n", + strerror(errno)); + last_errno = errno; + ret = ECGOTHER; + goto cleanup; + } + + newrule->uid = uid; + newrule->gid = gid; + strncpy(newrule->name, user, strlen(user)); + strncpy(newrule->destination, destination, strlen(destination)); + newrule->next = NULL; + + /* Parse the controller list, and add that to newrule too. */ + stok_buff = strtok(controllers, ","); + if (!stok_buff) { + cgroup_dbg("Failed to parse controllers on line" + " %d\n", linenum); + goto destroyrule; + } + + i = 0; + do { + if (i >= MAX_MNT_ELEMENTS) { + cgroup_dbg("Too many controllers listed" + " on line %d\n", linenum); + goto destroyrule; + } + + newrule->controllers[i] = strndup(stok_buff, + strlen(stok_buff) + 1); + if (!(newrule->controllers[i])) { + cgroup_dbg("Out of memory? Error was: %s\n", + strerror(errno)); + goto destroyrule; + } + i++; + } while ((stok_buff = strtok(NULL, ","))); + + /* Now, push the rule. */ + if (lst->head == NULL) { + lst->head = newrule; + lst->tail = newrule; + } else { + lst->tail->next = newrule; + lst->tail = newrule; + } + + cgroup_dbg("Added rule %s (UID: %d, GID: %d) -> %s for" + " controllers:", lst->tail->name, lst->tail->uid, + lst->tail->gid, lst->tail->destination); + for (i = 0; lst->tail->controllers[i]; i++) { + cgroup_dbg(" %s", lst->tail->controllers[i]); + } + cgroup_dbg("\n"); + + /* Finally, clear the buffer. */ + memset(buff, '\0', CGROUP_RULE_MAXLINE); + grp = NULL; + pwd = NULL; + } + + /* If we make it here, there were no errors. */ + cgroup_dbg("Parsing of configuration file complete.\n\n"); + ret = (matched && !cache) ? -1 : 0; + goto cleanup; + +destroyrule: + cgroup_free_rule(newrule); + +parsefail: + ret = ECGROUPPARSEFAIL; + +cleanup: + free(buff); + +close_unlock: + fclose(fp); + pthread_rwlock_unlock(&rl_lock); +finish: + return ret; +} + +/** + * cgroup_init(), initializes the MOUNT_POINT. + * + * This code is theoretically thread safe now. Its not really tested + * so it can blow up. If does for you, please let us know with your + * test case and we can really make it thread safe. + * + */ +int cgroup_init() +{ + FILE *proc_mount = NULL; + struct mntent *ent = NULL; + struct mntent *temp_ent = NULL; + int found_mnt = 0; + int ret = 0; + static char *controllers[CG_CONTROLLER_MAX]; + FILE *proc_cgroup = NULL; + char subsys_name[FILENAME_MAX]; + int hierarchy, num_cgroups, enabled; + int i=0; + char *mntopt = NULL; + int err; + char *buf = NULL; + char mntent_buffer[4 * FILENAME_MAX]; + char *strtok_buffer = NULL; + + pthread_rwlock_wrlock(&cg_mount_table_lock); + + proc_cgroup = fopen("/proc/cgroups", "r"); + + if (!proc_cgroup) { + last_errno = errno; + ret = ECGOTHER; + goto unlock_exit; + } + + /* + * The first line of the file has stuff we are not interested in. + * So just read it and discard the information. + * + * XX: fix the size for fgets + */ + buf = malloc(FILENAME_MAX); + if (!buf) { + last_errno = errno; + ret = ECGOTHER; + goto unlock_exit; + } + buf = fgets(buf, FILENAME_MAX, proc_cgroup); + if (!buf) { + last_errno = errno; + ret = ECGOTHER; + goto unlock_exit; + } + free(buf); + + while (!feof(proc_cgroup)) { + err = fscanf(proc_cgroup, "%s %d %d %d", subsys_name, + &hierarchy, &num_cgroups, &enabled); + if (err < 0) + break; + controllers[i] = strdup(subsys_name); + i++; + } + controllers[i] = NULL; + + proc_mount = fopen("/proc/mounts", "r"); + if (proc_mount == NULL) { + ret = ECGFAIL; + goto unlock_exit; + } + + temp_ent = (struct mntent *) malloc(sizeof(struct mntent)); + + if (!temp_ent) { + last_errno = errno; + ret = ECGOTHER; + goto unlock_exit; + } + + while ((ent = getmntent_r(proc_mount, temp_ent, + mntent_buffer, + sizeof(mntent_buffer))) != NULL) { + if (!strcmp(ent->mnt_type, "cgroup")) { + for (i = 0; controllers[i] != NULL; i++) { + mntopt = hasmntopt(ent, controllers[i]); + + if (!mntopt) + continue; + + mntopt = strtok_r(mntopt, ",", &strtok_buffer); + + if (strcmp(mntopt, controllers[i]) == 0) { + cgroup_dbg("matched %s:%s\n", mntopt, + controllers[i]); + strcpy(cg_mount_table[found_mnt].name, + controllers[i]); + strcpy(cg_mount_table[found_mnt].path, + ent->mnt_dir); + cgroup_dbg("Found cgroup option %s, " + " count %d\n", + ent->mnt_opts, found_mnt); + found_mnt++; + } + } + } + } + + free(temp_ent); + + if (!found_mnt) { + cg_mount_table[0].name[0] = '\0'; + ret = ECGROUPNOTMOUNTED; + goto unlock_exit; + } + + found_mnt++; + cg_mount_table[found_mnt].name[0] = '\0'; + + cgroup_initialized = 1; + +unlock_exit: + if (proc_cgroup) + fclose(proc_cgroup); + + if (proc_mount) + fclose(proc_mount); + + for (i = 0; controllers[i]; i++) { + free(controllers[i]); + controllers[i] = NULL; + } + + pthread_rwlock_unlock(&cg_mount_table_lock); + + return ret; +} + +static int cg_test_mounted_fs() +{ + FILE *proc_mount = NULL; + struct mntent *ent = NULL; + struct mntent *temp_ent = NULL; + char mntent_buff[4 * FILENAME_MAX]; + int ret = 1; + + proc_mount = fopen("/proc/mounts", "r"); + if (proc_mount == NULL) { + return 0; + } + + temp_ent = (struct mntent *) malloc(sizeof(struct mntent)); + if (!temp_ent) { + /* We just fail at the moment. */ + return 0; + } + + ent = getmntent_r(proc_mount, temp_ent, mntent_buff, + sizeof(mntent_buff)); + + if (!ent) + return 0; + + while (strcmp(ent->mnt_type, "cgroup") !=0) { + ent = getmntent_r(proc_mount, temp_ent, mntent_buff, + sizeof(mntent_buff)); + if (ent == NULL) { + ret = 0; + goto done; + } + } +done: + fclose(proc_mount); + free(temp_ent); + return ret; +} + +static inline pid_t cg_gettid() +{ + return syscall(__NR_gettid); +} + + +/* Call with cg_mount_table_lock taken */ +static char *cg_build_path_locked(char *name, char *path, char *type) +{ + int i; + for (i = 0; cg_mount_table[i].name[0] != '\0'; i++) { + /* + * XX: Change to snprintf once you figure what n should be + */ + if (strcmp(cg_mount_table[i].name, type) == 0) { + sprintf(path, "%s/", cg_mount_table[i].path); + if (name) { + char *tmp; + tmp = strdup(path); + sprintf(path, "%s%s/", tmp, name); + free(tmp); + } + return path; + } + } + return NULL; +} + +char *cg_build_path(char *name, char *path, char *type) +{ + pthread_rwlock_rdlock(&cg_mount_table_lock); + path = cg_build_path_locked(name, path, type); + pthread_rwlock_unlock(&cg_mount_table_lock); + + return path; +} + +/** cgroup_attach_task_pid is used to assign tasks to a cgroup. + * struct cgroup *cgroup: The cgroup to assign the thread to. + * pid_t tid: The thread to be assigned to the cgroup. + * + * returns 0 on success. + * returns ECGROUPNOTOWNER if the caller does not have access to the cgroup. + * returns ECGROUPNOTALLOWED for other causes of failure. + */ +int cgroup_attach_task_pid(struct cgroup *cgroup, pid_t tid) +{ + char path[FILENAME_MAX]; + FILE *tasks = NULL; + int i, ret = 0; + + if (!cgroup_initialized) { + cgroup_dbg("libcgroup is not initialized\n"); + return ECGROUPNOTINITIALIZED; + } + if(!cgroup) + { + pthread_rwlock_rdlock(&cg_mount_table_lock); + for(i = 0; i < CG_CONTROLLER_MAX && + cg_mount_table[i].name[0]!='\0'; i++) { + if (!cg_build_path_locked(NULL, path, + cg_mount_table[i].name)) + continue; + strncat(path, "/tasks", sizeof(path) - strlen(path)); + + tasks = fopen(path, "w"); + if (!tasks) { + pthread_rwlock_unlock(&cg_mount_table_lock); + switch (errno) { + case EPERM: + return ECGROUPNOTOWNER; + case ENOENT: + return ECGROUPNOTEXIST; + default: + return ECGROUPNOTALLOWED; + } + } + ret = fprintf(tasks, "%d", tid); + if (ret < 0) { + cgroup_dbg("Error writing tid %d to %s:%s\n", + tid, path, strerror(errno)); + fclose(tasks); + last_errno = errno; + return ECGOTHER; + } + + ret = fflush(tasks); + if (ret) { + last_errno = errno; + cgroup_dbg("Error writing tid %d to %s:%s\n", + tid, path, strerror(errno)); + fclose(tasks); + return ECGOTHER; + } + fclose(tasks); + } + pthread_rwlock_unlock(&cg_mount_table_lock); + } else { + for (i = 0; i < cgroup->index; i++) { + if (!cgroup_test_subsys_mounted(cgroup->controller[i]->name)) { + cgroup_dbg("subsystem %s is not mounted\n", + cgroup->controller[i]->name); + return ECGROUPSUBSYSNOTMOUNTED; + } + } + + for (i = 0; i < cgroup->index; i++) { + if (!cg_build_path(cgroup->name, path, + cgroup->controller[i]->name)) + continue; + + strncat(path, "/tasks", sizeof(path) - strlen(path)); + + tasks = fopen(path, "w"); + if (!tasks) { + cgroup_dbg("fopen failed for %s:%s", path, + strerror(errno)); + + switch (errno) { + case EPERM: + return ECGROUPNOTOWNER; + case ENOENT: + return ECGROUPNOTEXIST; + default: + return ECGROUPNOTALLOWED; + } + } + ret = fprintf(tasks, "%d", tid); + if (ret < 0) { + last_errno = errno; + cgroup_dbg("Error writing tid %d to %s:%s\n", + tid, path, strerror(errno)); + fclose(tasks); + return ECGOTHER; + } + ret = fflush(tasks); + if (ret) { + last_errno = errno; + cgroup_dbg("Error writing tid %d to %s:%s\n", + tid, path, strerror(errno)); + fclose(tasks); + return ECGOTHER; + } + fclose(tasks); + } + } + return 0; +} + +/** cgroup_attach_task is used to attach the current thread to a cgroup. + * struct cgroup *cgroup: The cgroup to assign the current thread to. + * + * See cg_attach_task_pid for return values. + */ +int cgroup_attach_task(struct cgroup *cgroup) +{ + pid_t tid = cg_gettid(); + int error; + + error = cgroup_attach_task_pid(cgroup, tid); + + return error; +} + +/** + * cg_mkdir_p, emulate the mkdir -p command (recursively creating paths) + * @path: path to create + */ +static int cg_mkdir_p(const char *path) +{ + char *real_path = NULL; + char *wd = NULL; + int i = 0, j = 0; + char pos; + char *str = NULL; + int ret = 0; + char cwd[FILENAME_MAX]; + char *buf = NULL; + + buf = getcwd(cwd, FILENAME_MAX); + + if (!buf) { + last_errno = errno; + return ECGOTHER; + } + + real_path = strdup(path); + if (!real_path) { + last_errno = errno; + return ECGOTHER; + } + + do { + while (real_path[j] != '\0' && real_path[j] != '/') + j++; + while (real_path[j] != '\0' && real_path[j] == '/') + j++; + if (i == j) + continue; + pos = real_path[j]; + real_path[j] = '\0'; /* Temporarily overwrite "/" */ + str = &real_path[i]; + ret = mkdir(str, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); + wd = strdup(str); + if (!wd) { + last_errno = errno; + ret = ECGOTHER; + break; + } + real_path[j] = pos; + if (ret) { + switch (errno) { + case EEXIST: + ret = 0; /* Not fatal really */ + break; + case EPERM: + ret = ECGROUPNOTOWNER; + free(wd); + goto done; + default: + ret = ECGROUPNOTALLOWED; + free(wd); + goto done; + } + } + i = j; + ret = chdir(wd); + if (ret) { + cgroup_dbg("could not chdir to child directory (%s)\n", + wd); + break; + } + free(wd); + } while (real_path[i]); + + ret = chdir(buf); + if (ret) { + last_errno = errno; + ret = ECGOTHER; + cgroup_dbg("could not go back to old directory (%s)\n", cwd); + } + +done: + free(real_path); + return ret; +} + +/* + * create_control_group() + * This is the basic function used to create the control group. This function + * just makes the group. It does not set any permissions, or any control values. + * The argument path is the fully qualified path name to make it generic. + */ +static int cg_create_control_group(char *path) +{ + int error; + if (!cg_test_mounted_fs()) + return ECGROUPNOTMOUNTED; + error = cg_mkdir_p(path); + return error; +} + +/* + * set_control_value() + * This is the low level function for putting in a value in a control file. + * This function takes in the complete path and sets the value in val in that + * file. + */ +static int cg_set_control_value(char *path, char *val) +{ + FILE *control_file = NULL; + if (!cg_test_mounted_fs()) + return ECGROUPNOTMOUNTED; + + control_file = fopen(path, "r+"); + + if (!control_file) { + if (errno == EPERM) { + /* + * We need to set the correct error value, does the + * group exist but we don't have the subsystem + * mounted at that point, or is it that the group + * does not exist. So we check if the tasks file + * exist. Before that, we need to extract the path. + */ + int len = strlen(path); + + while (*(path+len) != '/') + len--; + *(path+len+1) = '\0'; + strncat(path, "tasks", sizeof(path) - strlen(path)); + control_file = fopen(path, "r"); + if (!control_file) { + if (errno == ENOENT) + return ECGROUPSUBSYSNOTMOUNTED; + } + fclose(control_file); + return ECGROUPNOTALLOWED; + } + return ECGROUPVALUENOTEXIST; + } + + fprintf(control_file, "%s", val); + fclose(control_file); + return 0; +} + +/** cgroup_modify_cgroup modifies the cgroup control files. + * struct cgroup *cgroup: The name will be the cgroup to be modified. + * The values will be the values to be modified, those not mentioned + * in the structure will not be modified. + * + * The uids cannot be modified yet. + * + * returns 0 on success. + * + */ + +int cgroup_modify_cgroup(struct cgroup *cgroup) +{ + char *path, base[FILENAME_MAX]; + int i; + int error; + int ret; + + if (!cgroup_initialized) + return ECGROUPNOTINITIALIZED; + + if (!cgroup) + return ECGROUPNOTALLOWED; + + for (i = 0; i < cgroup->index; i++) { + if (!cgroup_test_subsys_mounted(cgroup->controller[i]->name)) { + cgroup_dbg("subsystem %s is not mounted\n", + cgroup->controller[i]->name); + return ECGROUPSUBSYSNOTMOUNTED; + } + } + + for (i = 0; i < cgroup->index; i++) { + int j; + if (!cg_build_path(cgroup->name, base, + cgroup->controller[i]->name)) + continue; + for (j = 0; j < cgroup->controller[i]->index; j++) { + ret = asprintf(&path, "%s%s", base, + cgroup->controller[i]->values[j]->name); + if (ret < 0) { + last_errno = errno; + error = ECGOTHER; + goto err; + } + error = cg_set_control_value(path, + cgroup->controller[i]->values[j]->value); + free(path); + path = NULL; + if (error) + goto err; + } + } + if (path) + free(path); + return 0; +err: + if (path) + free(path); + return error; + +} + +/** + * @dst: Destination controller + * @src: Source controller from which values will be copied to dst + * + * Create a duplicate copy of values under the specified controller + */ +int cgroup_copy_controller_values(struct cgroup_controller *dst, + struct cgroup_controller *src) +{ + int i, ret = 0; + + if (!dst || !src) + return ECGFAIL; + + strncpy(dst->name, src->name, FILENAME_MAX); + for (i = 0; i < src->index; i++, dst->index++) { + struct control_value *src_val = src->values[i]; + struct control_value *dst_val; + + dst->values[i] = calloc(1, sizeof(struct control_value)); + if (!dst->values[i]) { + ret = ECGFAIL; + goto err; + } + + dst_val = dst->values[i]; + strncpy(dst_val->value, src_val->value, CG_VALUE_MAX); + strncpy(dst_val->name, src_val->name, FILENAME_MAX); + } +err: + return ret; +} + +/** + * @dst: Destination control group + * @src: Source from which values will be copied to dst + * + * Create a duplicate copy of src in dst. This will be useful for those who + * that intend to create new instances based on an existing control group + */ +int cgroup_copy_cgroup(struct cgroup *dst, struct cgroup *src) +{ + int ret = 0, i; + + if (!dst || !src) + return ECGROUPNOTEXIST; + + /* + * Should we just use the restrict keyword instead? + */ + if (dst == src) + return ECGFAIL; + + cgroup_free_controllers(dst); + + for (i = 0; i < src->index; i++, dst->index++) { + struct cgroup_controller *src_ctlr = src->controller[i]; + struct cgroup_controller *dst_ctlr; + + dst->controller[i] = calloc(1, sizeof(struct cgroup_controller)); + if (!dst->controller[i]) { + ret = ECGFAIL; + goto err; + } + + dst_ctlr = dst->controller[i]; + ret = cgroup_copy_controller_values(dst_ctlr, src_ctlr); + if (ret) + goto err; + } +err: + return ret; +} + +/** cgroup_create_cgroup creates a new control group. + * struct cgroup *cgroup: The control group to be created + * + * returns 0 on success. We recommend calling cg_delete_cgroup + * if this routine fails. That should do the cleanup operation. + */ +int cgroup_create_cgroup(struct cgroup *cgroup, int ignore_ownership) +{ + char *fts_path[2]; + char *base = NULL; + char *path = NULL; + int i, j, k; + int error = 0; + int retval = 0; + int ret; + + if (!cgroup_initialized) + return ECGROUPNOTINITIALIZED; + + if (!cgroup) + return ECGROUPNOTALLOWED; + + for (i = 0; i < cgroup->index; i++) { + if (!cgroup_test_subsys_mounted(cgroup->controller[i]->name)) + return ECGROUPSUBSYSNOTMOUNTED; + } + + fts_path[0] = (char *)malloc(FILENAME_MAX); + if (!fts_path[0]) { + last_errno = errno; + return ECGOTHER; + } + fts_path[1] = NULL; + path = fts_path[0]; + + /* + * XX: One important test to be done is to check, if you have multiple + * subsystems mounted at one point, all of them *have* be on the cgroup + * data structure. If not, we fail. + */ + for (k = 0; k < cgroup->index; k++) { + if (!cg_build_path(cgroup->name, path, + cgroup->controller[k]->name)) + continue; + + error = cg_create_control_group(path); + if (error) + goto err; + + base = strdup(path); + + if (!base) { + last_errno = errno; + error = ECGOTHER; + goto err; + } + + if (!ignore_ownership) { + cgroup_dbg("Changing ownership of %s\n", fts_path[0]); + error = cg_chown_recursive(fts_path, + cgroup->control_uid, cgroup->control_gid); + } + + if (error) + goto err; + + for (j = 0; j < cgroup->controller[k]->index; j++) { + ret = snprintf(path, FILENAME_MAX, "%s%s", base, + cgroup->controller[k]->values[j]->name); + cgroup_dbg("setting %s to %s, error %d\n", path, + cgroup->controller[k]->values[j]->name, ret); + if (ret < 0 || ret >= FILENAME_MAX) { + last_errno = errno; + error = ECGOTHER; + goto err; + } + error = cg_set_control_value(path, + cgroup->controller[k]->values[j]->value); + /* + * Should we undo, what we've done in the loops above? + * An error should not be treated as fatal, since we + * have several read-only files and several files that + * are only conditionally created in the child. + * + * A middle ground would be to track that there + * was an error and return that value. + */ + if (error) { + retval = error; + continue; + } + } + + if (!ignore_ownership) { + ret = snprintf(path, FILENAME_MAX, "%s/tasks", base); + if (ret < 0 || ret >= FILENAME_MAX) { + last_errno = errno; + error = ECGOTHER; + goto err; + } + error = chown(path, cgroup->tasks_uid, + cgroup->tasks_gid); + if (error) { + last_errno = errno; + error = ECGOTHER; + goto err; + } + } + free(base); + base = NULL; + } + +err: + if (path) + free(path); + if (base) + free(base); + if (retval && !error) + error = retval; + return error; +} + +/** + * Find the parent of the specified directory. It returns the parent (the + * parent is usually name/.. unless name is a mount point. + */ +char *cgroup_find_parent(char *name) +{ + char child[FILENAME_MAX]; + char *parent = NULL; + struct stat stat_child, stat_parent; + char *type = NULL; + char *dir = NULL; + + pthread_rwlock_rdlock(&cg_mount_table_lock); + type = cg_mount_table[0].name; + if (!cg_build_path_locked(name, child, type)) { + pthread_rwlock_unlock(&cg_mount_table_lock); + return NULL; + } + pthread_rwlock_unlock(&cg_mount_table_lock); + + cgroup_dbg("path is %s\n", child); + dir = dirname(child); + cgroup_dbg("directory name is %s\n", dir); + + if (asprintf(&parent, "%s/..", dir) < 0) + return NULL; + + cgroup_dbg("parent's name is %s\n", parent); + + if (stat(dir, &stat_child) < 0) + goto free_parent; + + if (stat(parent, &stat_parent) < 0) + goto free_parent; + + /* + * Is the specified "name" a mount point? + */ + if (stat_parent.st_dev != stat_child.st_dev) { + cgroup_dbg("parent is a mount point\n"); + strcpy(parent, "."); + } else { + dir = strdup(name); + if (!dir) + goto free_parent; + dir = dirname(dir); + if (strcmp(dir, ".") == 0) + strcpy(parent, ".."); + else + strcpy(parent, dir); + } + + return parent; + +free_parent: + free(parent); + return NULL; +} + +/** + * @cgroup: cgroup data structure to be filled with parent values and then + * passed down for creation + * @ignore_ownership: Ignore doing a chown on the newly created cgroup + */ +int cgroup_create_cgroup_from_parent(struct cgroup *cgroup, + int ignore_ownership) +{ + char *parent = NULL; + struct cgroup *parent_cgroup = NULL; + int ret = ECGFAIL; + + if (!cgroup_initialized) + return ECGROUPNOTINITIALIZED; + + parent = cgroup_find_parent(cgroup->name); + if (!parent) + return ret; + + cgroup_dbg("parent is %s\n", parent); + parent_cgroup = cgroup_new_cgroup(parent); + if (!parent_cgroup) + goto err_nomem; + + if (cgroup_get_cgroup(parent_cgroup)) + goto err_parent; + + cgroup_dbg("got parent group for %s\n", parent_cgroup->name); + ret = cgroup_copy_cgroup(cgroup, parent_cgroup); + if (ret) + goto err_parent; + + cgroup_dbg("copied parent group %s to %s\n", parent_cgroup->name, + cgroup->name); + ret = cgroup_create_cgroup(cgroup, ignore_ownership); + +err_parent: + cgroup_free(&parent_cgroup); +err_nomem: + free(parent); + return ret; +} + +/** cgroup_delete cgroup deletes a control group. + * struct cgroup *cgroup takes the group which is to be deleted. + * + * returns 0 on success. + */ +int cgroup_delete_cgroup(struct cgroup *cgroup, int ignore_migration) +{ + FILE *delete_tasks = NULL, *base_tasks = NULL; + int tids; + char path[FILENAME_MAX]; + int error = ECGROUPNOTALLOWED; + int i, ret; + + if (!cgroup_initialized) + return ECGROUPNOTINITIALIZED; + + if (!cgroup) + return ECGROUPNOTALLOWED; + + for (i = 0; i < cgroup->index; i++) { + if (!cgroup_test_subsys_mounted(cgroup->controller[i]->name)) + return ECGROUPSUBSYSNOTMOUNTED; + } + + for (i = 0; i < cgroup->index; i++) { + if (!cg_build_path(cgroup->name, path, + cgroup->controller[i]->name)) + continue; + strncat(path, "../tasks", sizeof(path) - strlen(path)); + + base_tasks = fopen(path, "w"); + if (!base_tasks) + goto open_err; + + if (!cg_build_path(cgroup->name, path, + cgroup->controller[i]->name)) { + fclose(base_tasks); + continue; + } + + strncat(path, "tasks", sizeof(path) - strlen(path)); + + delete_tasks = fopen(path, "r"); + if (!delete_tasks) { + fclose(base_tasks); + goto open_err; + } + + while (!feof(delete_tasks)) { + ret = fscanf(delete_tasks, "%d", &tids); + if (ret == EOF || ret < 1) + break; + fprintf(base_tasks, "%d", tids); + } + + fclose(delete_tasks); + fclose(base_tasks); + + if (!cg_build_path(cgroup->name, path, + cgroup->controller[i]->name)) + continue; + error = rmdir(path); + last_errno = errno; + } +open_err: + if (ignore_migration) { + for (i = 0; i < cgroup->index; i++) { + if (!cg_build_path(cgroup->name, path, + cgroup->controller[i]->name)) + continue; + error = rmdir(path); + if (error < 0 && errno == ENOENT) { + last_errno = errno; + error = 0; + } + } + } + if (error) + return ECGOTHER; + + return error; +} + +/* + * This function should really have more checks, but this version + * will assume that the callers have taken care of everything. + * Including the locking. + */ +static int cg_rd_ctrl_file(char *subsys, char *cgroup, char *file, char **value) +{ + char path[FILENAME_MAX]; + FILE *ctrl_file = NULL; + int ret; + + if (!cg_build_path_locked(cgroup, path, subsys)) + return ECGFAIL; + + strncat(path, file, sizeof(path) - strlen(path)); + ctrl_file = fopen(path, "r"); + if (!ctrl_file) + return ECGROUPVALUENOTEXIST; + + *value = malloc(CG_VALUE_MAX); + if (!*value) { + last_errno = errno; + return ECGOTHER; + } + + /* + * using %as crashes when we try to read from files like + * memory.stat + */ + ret = fscanf(ctrl_file, "%s", *value); + if (ret == 0 || ret == EOF) { + free(*value); + *value = NULL; + } + + fclose(ctrl_file); + + return 0; +} + +/* + * Call this function with required locks taken. + */ +static int cgroup_fill_cgc(struct dirent *ctrl_dir, struct cgroup *cgroup, + struct cgroup_controller *cgc, int index) +{ + char *ctrl_name = NULL; + char *ctrl_file = NULL; + char *ctrl_value = NULL; + char *d_name = NULL; + char path[FILENAME_MAX+1]; + char *buffer = NULL; + int error = 0; + struct stat stat_buffer; + + d_name = strdup(ctrl_dir->d_name); + + if (!strcmp(d_name, ".") || !strcmp(d_name, "..")) { + error = ECGINVAL; + goto fill_error; + } + + + /* + * This part really needs to be optimized out. Probably use + * some sort of a flag, but this is fine for now. + */ + + cg_build_path_locked(cgroup->name, path, cg_mount_table[index].name); + strncat(path, d_name, sizeof(path) - strlen(path)); + + error = stat(path, &stat_buffer); + + if (error) { + error = ECGFAIL; + goto fill_error; + } + + cgroup->control_uid = stat_buffer.st_uid; + cgroup->control_gid = stat_buffer.st_gid; + + ctrl_name = strtok_r(d_name, ".", &buffer); + + if (!ctrl_name) { + error = ECGFAIL; + goto fill_error; + } + + ctrl_file = strtok_r(NULL, ".", &buffer); + + if (!ctrl_file) { + error = ECGINVAL; + goto fill_error; + } + + if (strcmp(ctrl_name, cg_mount_table[index].name) == 0) { + error = cg_rd_ctrl_file(cg_mount_table[index].name, + cgroup->name, ctrl_dir->d_name, &ctrl_value); + if (error || !ctrl_value) + goto fill_error; + + if (cgroup_add_value_string(cgc, ctrl_dir->d_name, + ctrl_value)) { + error = ECGFAIL; + goto fill_error; + } + } +fill_error: + if (ctrl_value) + free(ctrl_value); + free(d_name); + return error; +} + +/* + * cgroup_get_cgroup reads the cgroup data from the filesystem. + * struct cgroup has the name of the group to be populated + * + * return 0 on success. + */ +int cgroup_get_cgroup(struct cgroup *cgroup) +{ + int i; + char path[FILENAME_MAX]; + DIR *dir = NULL; + struct dirent *ctrl_dir = NULL; + char *control_path = NULL; + int error; + int ret; + + if (!cgroup_initialized) { + /* ECGROUPNOTINITIALIZED */ + return ECGROUPNOTINITIALIZED; + } + + if (!cgroup) { + /* ECGROUPNOTALLOWED */ + return ECGROUPNOTALLOWED; + } + + pthread_rwlock_rdlock(&cg_mount_table_lock); + for (i = 0; i < CG_CONTROLLER_MAX && + cg_mount_table[i].name[0] != '\0'; i++) { + /* + * cgc will not leak, since it has to be freed using + * cgroup_free_cgroup + */ + struct cgroup_controller *cgc; + struct stat stat_buffer; + int path_len; + + if (!cg_build_path_locked(NULL, path, + cg_mount_table[i].name)) + continue; + + path_len = strlen(path); + strncat(path, cgroup->name, FILENAME_MAX - path_len - 1); + + if (access(path, F_OK)) + continue; + + if (!cg_build_path_locked(cgroup->name, path, + cg_mount_table[i].name)) { + /* + * This fails when the cgroup does not exist + * for that controller. + */ + continue; + } + + /* + * Get the uid and gid information + */ + + ret = asprintf(&control_path, "%s/tasks", path); + + if (ret < 0) { + last_errno = errno; + error = ECGOTHER; + goto unlock_error; + } + + if (stat(control_path, &stat_buffer)) { + last_errno = errno; + free(control_path); + error = ECGOTHER; + goto unlock_error; + } + + cgroup->tasks_uid = stat_buffer.st_uid; + cgroup->tasks_gid = stat_buffer.st_gid; + + free(control_path); + + cgc = cgroup_add_controller(cgroup, + cg_mount_table[i].name); + if (!cgc) { + error = ECGINVAL; + goto unlock_error; + } + + dir = opendir(path); + if (!dir) { + last_errno = errno; + error = ECGOTHER; + goto unlock_error; + } + + while ((ctrl_dir = readdir(dir)) != NULL) { + /* + * Skip over non regular files + */ + if (ctrl_dir->d_type != DT_REG) + continue; + + error = cgroup_fill_cgc(ctrl_dir, cgroup, cgc, i); + if (error == ECGFAIL) { + closedir(dir); + goto unlock_error; + } + } + closedir(dir); + } + + /* Check if the group really exists or not */ + if (!cgroup->index) { + error = ECGROUPNOTEXIST; + goto unlock_error; + } + + pthread_rwlock_unlock(&cg_mount_table_lock); + return 0; + +unlock_error: + pthread_rwlock_unlock(&cg_mount_table_lock); + /* + * XX: Need to figure out how to cleanup? Cleanup just the stuff + * we added, or the whole structure. + */ + cgroup = NULL; + return error; +} + +/** cg_prepare_cgroup + * Process the selected rule. Prepare the cgroup structure which can be + * used to add the task to destination cgroup. + * + * + * returns 0 on success. + */ +static int cg_prepare_cgroup(struct cgroup *cgroup, pid_t pid, + const char *dest, + char *controllers[]) +{ + int ret = 0, i; + char *controller = NULL; + struct cgroup_controller *cptr = NULL; + + /* Fill in cgroup details. */ + cgroup_dbg("Will move pid %d to cgroup '%s'\n", pid, dest); + + strcpy(cgroup->name, dest); + + /* Scan all the controllers */ + for (i = 0; i < CG_CONTROLLER_MAX; i++) { + if (!controllers[i]) + return 0; + controller = controllers[i]; + + /* If first string is "*" that means all the mounted + * controllers. */ + if (strcmp(controller, "*") == 0) { + pthread_rwlock_rdlock(&cg_mount_table_lock); + for (i = 0; i < CG_CONTROLLER_MAX && + cg_mount_table[i].name[0] != '\0'; i++) { + cgroup_dbg("Adding controller %s\n", + cg_mount_table[i].name); + cptr = cgroup_add_controller(cgroup, + cg_mount_table[i].name); + if (!cptr) { + cgroup_dbg("Adding controller '%s'" + " failed\n", + cg_mount_table[i].name); + pthread_rwlock_unlock(&cg_mount_table_lock); + return ECGROUPNOTALLOWED; + } + } + pthread_rwlock_unlock(&cg_mount_table_lock); + return ret; + } + + /* it is individual controller names and not "*" */ + cgroup_dbg("Adding controller %s\n", controller); + cptr = cgroup_add_controller(cgroup, controller); + if (!cptr) { + cgroup_dbg("Adding controller '%s' failed\n", + controller); + return ECGROUPNOTALLOWED; + } + } + + return ret; +} + +/** + * Finds the first rule in the cached list that matches the given UID or GID, + * and returns a pointer to that rule. This function uses rl_lock. + * + * This function may NOT be thread safe. + * @param uid The UID to match + * @param gid The GID to match + * @return Pointer to the first matching rule, or NULL if no match + * TODO: Determine thread-safeness and fix if not safe. + */ +static struct cgroup_rule *cgroup_find_matching_rule_uid_gid(const uid_t uid, + const gid_t gid) +{ + /* Return value */ + struct cgroup_rule *ret = rl.head; + + /* Temporary user data */ + struct passwd *usr = NULL; + + /* Temporary group data */ + struct group *grp = NULL; + + /* Temporary string pointer */ + char *sp = NULL; + + /* Loop variable */ + int i = 0; + + pthread_rwlock_wrlock(&rl_lock); + while (ret) { + /* The wildcard rule always matches. */ + if ((ret->uid == CGRULE_WILD) && (ret->gid == CGRULE_WILD)) { + goto finished; + } + + /* This is the simple case of the UID matching. */ + if (ret->uid == uid) { + goto finished; + } + + /* This is the simple case of the GID matching. */ + if (ret->gid == gid) { + goto finished; + } + + /* If this is a group rule, the UID might be a member. */ + if (ret->name[0] == '@') { + /* Get the group data. */ + sp = &(ret->name[1]); + grp = getgrnam(sp); + if (!grp) { + continue; + } + + /* Get the data for UID. */ + usr = getpwuid(uid); + if (!usr) { + continue; + } + + /* If UID is a member of group, we matched. */ + for (i = 0; grp->gr_mem[i]; i++) { + if (!(strcmp(usr->pw_name, grp->gr_mem[i]))) + goto finished; + } + } + + /* If we haven't matched, try the next rule. */ + ret = ret->next; + } + + /* If we get here, no rules matched. */ + ret = NULL; + +finished: + pthread_rwlock_unlock(&rl_lock); + return ret; +} + +/** + * Changes the cgroup of a program based on the rules in the config file. If a + * rule exists for the given UID or GID, then the given PID is placed into the + * correct group. By default, this function parses the configuration file each + * time it is called. + * + * The flags can alter the behavior of this function: + * CGFLAG_USECACHE: Use cached rules instead of parsing the config file + * + * This function may NOT be thread safe. + * @param uid The UID to match + * @param gid The GID to match + * @param pid The PID of the process to move + * @param flags Bit flags to change the behavior, as defined above + * @return 0 on success, > 0 on error + * TODO: Determine thread-safeness and fix of not safe. + */ +int cgroup_change_cgroup_uid_gid_flags(const uid_t uid, const gid_t gid, + const pid_t pid, const int flags) +{ + /* Temporary pointer to a rule */ + struct cgroup_rule *tmp = NULL; + + /* Return codes */ + int ret = 0; + + /* We need to check this before doing anything else! */ + if (!cgroup_initialized) { + cgroup_dbg("libcgroup is not initialized\n"); + ret = ECGROUPNOTINITIALIZED; + goto finished; + } + + /* + * If the user did not ask for cached rules, we must parse the + * configuration to find a matching rule (if one exists). Else, we'll + * find the first match in the cached list (rl). + */ + if (!(flags & CGFLAG_USECACHE)) { + cgroup_dbg("Not using cached rules for PID %d.\n", pid); + ret = cgroup_parse_rules(false, uid, gid); + + /* The configuration file has an error! We must exit now. */ + if (ret != -1 && ret != 0) { + cgroup_dbg("Failed to parse the configuration" + " rules.\n"); + goto finished; + } + + /* We did not find a matching rule, so we're done. */ + if (ret == 0) { + cgroup_dbg("No rule found to match PID: %d, UID: %d, " + "GID: %d\n", pid, uid, gid); + goto finished; + } + + /* Otherwise, we did match a rule and it's in trl. */ + tmp = trl.head; + } else { + /* Find the first matching rule in the cached list. */ + tmp = cgroup_find_matching_rule_uid_gid(uid, gid); + if (!tmp) { + cgroup_dbg("No rule found to match PID: %d, UID: %d, " + "GID: %d\n", pid, uid, gid); + ret = 0; + goto finished; + } + } + cgroup_dbg("Found matching rule %s for PID: %d, UID: %d, GID: %d\n", + tmp->name, pid, uid, gid); + + /* If we are here, then we found a matching rule, so execute it. */ + do { + cgroup_dbg("Executing rule %s for PID %d... ", tmp->name, pid); + ret = cgroup_change_cgroup_path(tmp->destination, + pid, tmp->controllers); + if (ret) { + cgroup_dbg("FAILED! (Error Code: %d)\n", ret); + goto finished; + } + cgroup_dbg("OK!\n"); + + /* Now, check for multi-line rules. As long as the "next" + * rule starts with '%', it's actually part of the rule that + * we just executed. + */ + tmp = tmp->next; + } while (tmp && (tmp->name[0] == '%')); + +finished: + return ret; +} + +/** + * Provides backwards-compatibility with older versions of the API. This + * function is deprecated, and cgroup_change_cgroup_uid_gid_flags() should be + * used instead. In fact, this function simply calls the newer one with flags + * set to 0 (none). + * @param uid The UID to match + * @param gid The GID to match + * @param pid The PID of the process to move + * @return 0 on success, > 0 on error + * + */ +int cgroup_change_cgroup_uid_gid(uid_t uid, gid_t gid, pid_t pid) +{ + return cgroup_change_cgroup_uid_gid_flags(uid, gid, pid, 0); +} + +/** + * Changes the cgroup of a program based on the path provided. In this case, + * the user must already know into which cgroup the task should be placed and + * no rules will be parsed. + * + * returns 0 on success. + */ +int cgroup_change_cgroup_path(char *dest, pid_t pid, char *controllers[]) +{ + int ret; + struct cgroup cgroup; + + if (!cgroup_initialized) { + cgroup_dbg("libcgroup is not initialized\n"); + return ECGROUPNOTINITIALIZED; + } + memset(&cgroup, 0, sizeof(struct cgroup)); + + ret = cg_prepare_cgroup(&cgroup, pid, dest, controllers); + if (ret) + return ret; + /* Add task to cgroup */ + ret = cgroup_attach_task_pid(&cgroup, pid); + if (ret) { + cgroup_dbg("cgroup_attach_task_pid failed:%d\n", ret); + return ret; + } + return 0; +} + +/** + * Print the cached rules table. This function should be called only after + * first calling cgroup_parse_config(), but it will work with an empty rule + * list. + * @param fp The file stream to print to + */ +void cgroup_print_rules_config(FILE *fp) +{ + /* Iterator */ + struct cgroup_rule *itr = NULL; + + /* Loop variable */ + int i = 0; + + pthread_rwlock_rdlock(&rl_lock); + + if (!(rl.head)) { + fprintf(fp, "The rules table is empty.\n\n"); + pthread_rwlock_unlock(&rl_lock); + return; + } + + itr = rl.head; + while (itr) { + fprintf(fp, "Rule: %s\n", itr->name); + + if (itr->uid == CGRULE_WILD) + fprintf(fp, " UID: any\n"); + else if (itr->uid == CGRULE_INVALID) + fprintf(fp, " UID: N/A\n"); + else + fprintf(fp, " UID: %d\n", itr->uid); + + if (itr->gid == CGRULE_WILD) + fprintf(fp, " GID: any\n"); + else if (itr->gid == CGRULE_INVALID) + fprintf(fp, " GID: N/A\n"); + else + fprintf(fp, " GID: %d\n", itr->gid); + + fprintf(fp, " DEST: %s\n", itr->destination); + + fprintf(fp, " CONTROLLERS:\n"); + for (i = 0; i < MAX_MNT_ELEMENTS; i++) { + if (itr->controllers[i]) { + fprintf(fp, " %s\n", itr->controllers[i]); + } + } + fprintf(fp, "\n"); + itr = itr->next; + } + pthread_rwlock_unlock(&rl_lock); +} + +/** + * Reloads the rules list, using the given configuration file. This function + * is probably NOT thread safe (calls cgroup_parse_rules()). + * @return 0 on success, > 0 on failure + */ +int cgroup_reload_cached_rules() +{ + /* Return codes */ + int ret = 0; + + cgroup_dbg("Reloading cached rules from %s.\n", CGRULES_CONF_FILE); + if ((ret = cgroup_parse_rules(true, CGRULE_INVALID, CGRULE_INVALID))) { + cgroup_dbg("Error parsing configuration file \"%s\": %d.\n", + CGRULES_CONF_FILE, ret); + ret = ECGROUPPARSEFAIL; + goto finished; + } + + #ifdef CGROUP_DEBUG + cgroup_print_rules_config(stdout); + #endif + +finished: + return ret; +} + +/** + * Initializes the rules cache. + * @return 0 on success, > 0 on error + */ +int cgroup_init_rules_cache() +{ + /* Return codes */ + int ret = 0; + + /* Attempt to read the configuration file and cache the rules. */ + ret = cgroup_parse_rules(true, CGRULE_INVALID, CGRULE_INVALID); + if (ret) { + cgroup_dbg("Could not initialize rule cache, error was: %d\n", + ret); + cgroup_rules_loaded = false; + } else { + cgroup_rules_loaded = true; + } + + return ret; +} + +/** + * cgroup_get_current_controller_path + * @pid: pid of the current process for which the path is to be determined + * @controller: name of the controller for which to determine current path + * @current_path: a pointer that is filled with the value of the current + * path as seen in /proc/<pid>/cgroup + */ +int cgroup_get_current_controller_path(pid_t pid, const char *controller, + char **current_path) +{ + char *path = NULL; + int ret; + FILE *pid_cgroup_fd = NULL; + + if (!controller) + return ECGOTHER; + + if (!cgroup_initialized) { + cgroup_dbg("libcgroup is not initialized\n"); + return ECGROUPNOTINITIALIZED; + } + + ret = asprintf(&path, "/proc/%d/cgroup", pid); + if (ret <= 0) { + cgroup_dbg("cannot allocate memory (/proc/pid/cgroup) ret %d\n", + ret); + return ret; + } + + ret = ECGROUPNOTEXIST; + pid_cgroup_fd = fopen(path, "r"); + if (!pid_cgroup_fd) + goto cleanup_path; + + /* + * Why do we grab the cg_mount_table_lock?, the reason is that + * the cgroup of a pid can change via the cgroup_attach_task_pid() + * call. To make sure, we return consitent and safe results, + * we acquire the lock upfront. We can optimize by acquiring + * and releasing the lock in the while loop, but that + * will be more expensive. + */ + pthread_rwlock_rdlock(&cg_mount_table_lock); + while (!feof(pid_cgroup_fd)) { + char controllers[FILENAME_MAX]; + char cgroup_path[FILENAME_MAX]; + int num; + char *savedptr; + char *token; + + ret = fscanf(pid_cgroup_fd, "%d:%[^:]:%s\n", &num, controllers, + cgroup_path); + /* + * Magic numbers like "3" seem to be integrating into + * my daily life, I need some magic to help make them + * disappear :) + */ + if (ret != 3 || ret == EOF) { + cgroup_dbg("read failed for pid_cgroup_fd ret %d\n", + ret); + last_errno = errno; + ret = ECGOTHER; + goto done; + } + + token = strtok_r(controllers, ",", &savedptr); + do { + if (strncmp(controller, token, strlen(controller) + 1) + == 0) { + *current_path = strdup(cgroup_path); + if (!*current_path) { + last_errno = errno; + ret = ECGOTHER; + goto done; + } + ret = 0; + goto done; + } + token = strtok_r(NULL, ",", &savedptr); + } while (token); + } + +done: + pthread_rwlock_unlock(&cg_mount_table_lock); + fclose(pid_cgroup_fd); +cleanup_path: + free(path); + return ret; +} + +char *cgroup_strerror(int code) +{ + assert((code >= ECGROUPNOTCOMPILED) && (code < ECGSENTINEL)); + if (code == ECGOTHER) { + snprintf(errtext, MAXLEN, "%s: error message: %s", + cgroup_strerror_codes[code % ECGROUPNOTCOMPILED], + strerror(cgroup_get_last_errno())); + return errtext; + } + return cgroup_strerror_codes[code % ECGROUPNOTCOMPILED]; +} + +/** + * Return last errno, which caused ECGOTHER error. + */ +int cgroup_get_last_errno() +{ + return last_errno; +} + + +static int cg_walk_node(FTS *fts, FTSENT *ent, const int depth, + struct cgroup_file_info *info) +{ + int ret = 0; + + if (!cgroup_initialized) + return ECGROUPNOTINITIALIZED; + + cgroup_dbg("seeing file %s\n", ent->fts_path); + + info->path = ent->fts_name; + info->parent = ent->fts_parent->fts_name; + info->full_path = ent->fts_path; + info->depth = ent->fts_level; + info->type = CGROUP_FILE_TYPE_OTHER; + + if (depth && (info->depth > depth)) + return 0; + + switch (ent->fts_info) { + case FTS_DNR: + case FTS_ERR: + errno = ent->fts_errno; + break; + case FTS_D: + info->type = CGROUP_FILE_TYPE_DIR; + break; + case FTS_DC: + case FTS_NSOK: + case FTS_NS: + case FTS_DP: + break; + case FTS_F: + info->type = CGROUP_FILE_TYPE_FILE; + break; + case FTS_DEFAULT: + break; + } + return ret; +} + +int cgroup_walk_tree_next(const int depth, void **handle, + struct cgroup_file_info *info, int base_level) +{ + int ret = 0; + FTS *fts = *(FTS **)handle; + FTSENT *ent; + + if (!cgroup_initialized) + return ECGROUPNOTINITIALIZED; + + if (!handle) + return ECGINVAL; + ent = fts_read(fts); + if (!ent) + return ECGEOF; + if (!base_level && depth) + base_level = ent->fts_level + depth; + ret = cg_walk_node(fts, ent, base_level, info); + *handle = fts; + return ret; +} + +int cgroup_walk_tree_end(void **handle) +{ + FTS *fts = *(FTS **)handle; + + if (!cgroup_initialized) + return ECGROUPNOTINITIALIZED; + + if (!handle) + return ECGINVAL; + fts_close(fts); + return 0; +} + +/* + * TODO: Need to decide a better place to put this function. + */ +int cgroup_walk_tree_begin(char *controller, char *base_path, const int depth, + void **handle, struct cgroup_file_info *info, + int *base_level) +{ + int ret = 0; + cgroup_dbg("path is %s\n", base_path); + char *cg_path[2]; + char full_path[FILENAME_MAX]; + FTSENT *ent; + FTS *fts; + + if (!cgroup_initialized) + return ECGROUPNOTINITIALIZED; + + if (!cg_build_path(base_path, full_path, controller)) + return ECGOTHER; + + *base_level = 0; + cg_path[0] = full_path; + cg_path[1] = NULL; + + fts = fts_open(cg_path, FTS_LOGICAL | FTS_NOCHDIR | + FTS_NOSTAT, NULL); + ent = fts_read(fts); + if (!ent) { + cgroup_dbg("fts_read failed\n"); + return ECGINVAL; + } + if (!*base_level && depth) + *base_level = ent->fts_level + depth; + ret = cg_walk_node(fts, ent, *base_level, info); + *handle = fts; + return ret; +} |