diff options
Diffstat (limited to 'api.c')
-rw-r--r-- | api.c | 337 |
1 files changed, 216 insertions, 121 deletions
@@ -2,6 +2,7 @@ * Copyright IBM Corporation. 2007 * * Author: Dhaval Giani <dhaval@linux.vnet.ibm.com> + * Author: Balbir Singh <balbir@linux.vnet.ibm.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2.1 of the GNU Lesser General Public License @@ -31,24 +32,113 @@ #include <sys/stat.h> #include <sys/syscall.h> #include <unistd.h> +#include <fts.h> +/* + * Remember to bump this up for major API changes. + */ +const static char cg_version[] = "0.01"; + +/* + * Only one mount point is currently supported. This will be enhanced to + * support several hierarchies in the future + */ static char MOUNT_POINT[FILENAME_MAX]; +static int cg_chown_file(FTS *fts, FTSENT *ent, uid_t owner, gid_t group) +{ + int ret = 0; + const char *filename = fts->fts_path; + dbg("seeing file %s\n", filename); + switch (ent->fts_info) { + case FTS_ERR: + errno = ent->fts_errno; + break; + case FTS_D: + case FTS_DC: + case FTS_NSOK: + case FTS_NS: + case FTS_DNR: + case FTS_DP: + case FTS_F: + case FTS_DEFAULT: + ret = chown(filename, owner, group); + break; + } + return ret; +} + +/* + * TODO: Need to decide a better place to put this function. + */ +static int cg_chown_recursive(char **path, uid_t owner, gid_t group) +{ + int ret = 0; + dbg("path is %s\n", *path); + FTS *fts = fts_open(path, FTS_PHYSICAL | FTS_NOCHDIR | + FTS_NOSTAT, NULL); + while (1) { + FTSENT *ent; + ent = fts_read(fts); + if (!ent) { + dbg("fts_read failed\n"); + break; + } + ret = cg_chown_file(fts, ent, owner, group); + } + fts_close(fts); + return ret; +} + +/** + * cg_init(), initializes the MOUNT_POINT. + * This code is not currently thread safe (hint: getmntent is not thread safe). + * This API is likely to change in the future to push state back to the caller + * to achieve thread safety. The code currently supports just one mount point. + * Complain if the cgroup filesystem controllers are bound to different mount + * points. + */ int cg_init() { FILE *proc_mount; - struct mntent *ent; + struct mntent *ent, *found_ent = NULL; + int found_mnt = 0; + int ret = 0; proc_mount = fopen("/proc/mounts", "r"); - ent = getmntent(proc_mount); + if (proc_mount == NULL) { + return EIO; + } - while (strcmp(ent->mnt_fsname,"cgroup") != 0) { - ent = getmntent(proc_mount); - if (ent == NULL) - return ECGROUPNOTMOUNTED; + while ((ent = getmntent(proc_mount)) != NULL) { + if (!strncmp(ent->mnt_fsname,"cgroup", strlen("cgroup"))) { + found_ent = ent; + found_mnt++; + dbg("Found cgroup option %s, count %d\n", + found_ent->mnt_opts, found_mnt); + } } - strcpy(MOUNT_POINT, ent->mnt_dir); - return 0; + + /* + * Currently we require that all controllers be bound together + */ + if (!found_mnt) + ret = ECGROUPNOTMOUNTED; + else if (found_mnt > 1) + ret = ECGROUPMULTIMOUNTED; + else { + /* + * NOTE: FILENAME_MAX ensures that we don't need to worry + * about crossing MOUNT_POINT size. For the paranoid, yes + * this is a potential security hole - Balbir + * Dhaval - fix these things. + */ + strcpy(MOUNT_POINT, found_ent->mnt_dir); + strcat(MOUNT_POINT, "/"); + } + + fclose(proc_mount); + return ret; } static int cg_test_mounted_fs() @@ -67,6 +157,7 @@ static int cg_test_mounted_fs() if (ent == NULL) return 0; } + fclose(proc_mount); return 1; } @@ -75,21 +166,33 @@ static inline pid_t cg_gettid() return syscall(__NR_gettid); } -/* +static char* cg_build_path(char *name, char *path) +{ + strcpy(path, MOUNT_POINT); + strcat(path, name); + strcat(path, "/"); + return path; +} + +/** cg_attach_task_pid is used to assign tasks to a cgroup. + * struct cgroup *cgroup: The cgroup to assign the thread to. + * pid_t tid: The thread to be assigned to the cgroup. + * + * returns 0 on success. + * returns ECGROUPNOTOWNER if the caller does not have access to the cgroup. + * returns ECGROUPNOTALLOWED for other causes of failure. */ -int cg_attach_task_pid(char *cgroup, pid_t tid) +int cg_attach_task_pid(struct cgroup *cgroup, pid_t tid) { char path[FILENAME_MAX]; FILE *tasks; - if (cgroup == NULL) { - cgroup = (char *) malloc(sizeof(char)); - cgroup = "\0"; + if (cgroup == NULL) + strcpy(path, MOUNT_POINT); + else { + cg_build_path(cgroup->name, path); } - strcpy(path, MOUNT_POINT); - strcat(path, "/"); - strcat(path, cgroup); strcat(path, "/tasks"); tasks = fopen(path, "w"); @@ -102,17 +205,18 @@ int cg_attach_task_pid(char *cgroup, pid_t tid) } } fprintf(tasks, "%d", tid); + fclose(tasks); return 0; } -/* - * Used to attach the task to a control group. +/** cg_attach_task is used to attach the current thread to a cgroup. + * struct cgroup *cgroup: The cgroup to assign the current thread to. * - * WARNING: Will change to use struct cgroup when it is implemented. + * See cg_attach_task_pid for return values. */ -int cg_attach_task(char *cgroup) +int cg_attach_task(struct cgroup *cgroup) { pid_t tid = cg_gettid(); int error; @@ -133,8 +237,8 @@ static int cg_create_control_group(char *path) int error; if (!cg_test_mounted_fs()) return ECGROUPNOTMOUNTED; - error = mkdir(path, 0700); - if (!error) { + error = mkdir(path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); + if (error) { switch(errno) { case EPERM: return ECGROUPNOTOWNER; @@ -184,54 +288,48 @@ static int cg_set_control_value(char *path, char *val) if (errno == ENOENT) return ECGROUPSUBSYSNOTMOUNTED; } + fclose(control_file); return ECGROUPNOTALLOWED; } + return errno; } fprintf(control_file, "%s", val); + fclose(control_file); return 0; } -/* - * WARNING: This API is not final. It WILL change format to use - * struct cgroup. This API will then become internal and be called something - * else. - * - * I am still not happy with how the data structure is looking at the moment, - * plus there are a couple of additional details to be worked out. Please - * do not rely on this API. - * - * Be prepared to change the implementation later once it shifts to - * struct cgroup in the real alpha release. +/** cg_modify_cgroup modifies the cgroup control files. + * struct cgroup *cgroup: The name will be the cgroup to be modified. + * The values will be the values to be modified, those not mentioned + * in the structure will not be modified. * - * The final version is expected to be + * The uids cannot be modified yet. * - * int modify_cgroup(struct cgroup *original, struct cgroup *final); + * returns 0 on success. * - * where original is the cgroup which is to be modified and final is how it - * should look. - * - * Also this version is still at one level since we do not have - * multi-hierarchy support in kernel. The real alpha release should have this - * issue sorted out as well. */ -int cg_modify_cgroup(char *cgroup, struct control_value *values[], int n) +int cg_modify_cgroup(struct cgroup *cgroup) { char path[FILENAME_MAX], base[FILENAME_MAX]; int i; int error; - strcpy(base, MOUNT_POINT); - strcat(base, "/"); - strcat(base, cgroup); - strcat(base, "/"); - - for (i = 0; i < n; i++, strcpy(path, base)) { - strcat(path, values[i]->name); - error = cg_set_control_value(path, values[i]->value); - if (error) - goto err; + cg_build_path(cgroup->name, base); + + for (i = 0; i < CG_CONTROLLER_MAX && cgroup->controller[i]; + i++, strcpy(path, base)) { + int j; + for(j = 0; j < CG_NV_MAX && + cgroup->controller[i]->values[j]; + j++, strcpy(path, base)) { + strcat(path, cgroup->controller[i]->values[j]->name); + error = cg_set_control_value(path, + cgroup->controller[i]->values[j]->value); + if (error) + goto err; + } } return 0; err: @@ -239,109 +337,106 @@ err: } -/* - * WARNING: This API is not final. It WILL change format to use - * struct cgroup. This API will then become internal and be called something - * else. - * - * I am still not happy with how the data structure is looking at the moment, - * plus there are a couple of additional details to be worked out. Please - * do not rely on this API. +/** create_cgroup creates a new control group. + * struct cgroup *cgroup: The control group to be created * - * Be prepared to change the implementation later once it shifts to - * struct cgroup in the real alpha release. - * - * The final version is expected to be - * - * int create_cgroup(struct cgroup *group); - * - * where group is the group to be created - * - * Also this version is still at one level since we do not have - * multi-hierarchy support in kernel. The real alpha release should have this - * issue sorted out as well. + * returns 0 on success. We recommend calling cg_delete_cgroup + * if this routine fails. That should do the cleanup operation. */ -int cg_create_cgroup(char *cgroup, struct control_value *values[], int n) +int cg_create_cgroup(struct cgroup *cgroup, int ignore_ownership) { - char path[FILENAME_MAX], base[FILENAME_MAX]; + char *fts_path[2], base[FILENAME_MAX], *path; int i; int error; - if (MOUNT_POINT == NULL) - return ECGROUPNOTMOUNTED; - - strcpy(path, MOUNT_POINT); - strcat(path, "/"); - strcat(path, cgroup); + fts_path[0] = (char *)malloc(FILENAME_MAX); + if (!fts_path[0]) + return ENOMEM; + fts_path[1] = NULL; + path = fts_path[0]; + cg_build_path(cgroup->name, path); error = cg_create_control_group(path); - strcat(path, "/"); + if (error) + goto err; + strcpy(base, path); - for (i = 0; i < n; i++, strcpy(path, base)) { - strcat(path, values[i]->name); - error = cg_set_control_value(path, values[i]->value); - if (!error) - return error; + if (!ignore_ownership) + error = cg_chown_recursive(fts_path, cgroup->control_uid, + cgroup->control_gid); + + if (error) + goto err; + + for (i = 0; i < CG_CONTROLLER_MAX && cgroup->controller[i]; + i++, strcpy(path, base)) { + int j; + for(j = 0; j < CG_NV_MAX && cgroup->controller[i]->values[j]; + j++, strcpy(path, base)) { + strcat(path, cgroup->controller[i]->values[j]->name); + error = cg_set_control_value(path, + cgroup->controller[i]->values[j]->value); + + /* + * Should we undo, what we've done in the loops above? + */ + if (error) + goto err; + } + } + + if (!ignore_ownership) { + strcpy(path, base); + strcat(path, "/tasks"); + chown(path, cgroup->tasks_uid, cgroup->tasks_gid); } +err: + free(path); return error; } -/* - * WARNING: This API is not final. It WILL change format to use - * struct cgroup. This API will then become internal and be called something - * else. - * - * I am still not happy with how the data structure is looking at the moment, - * plus there are a couple of additional details to be worked out. Please - * do not rely on this API. - * - * Be prepared to change the implementation later once it shifts to - * struct cgroup in the real alpha release. - * - * The final version is expected to be - * - * int delete_cgroup(struct cgroup *group); - * - * where group is the group to be deleted. +/** cg_delete cgroup deletes a control group. + * struct cgroup *cgroup takes the group which is to be deleted. * - * Also this version is still at one level since we do not have - * multi-hierarchy support in kernel. The real alpha release should have this - * issue sorted out as well. + * returns 0 on success. */ -int cg_delete_cgroup(char *cgroup) +int cg_delete_cgroup(struct cgroup *cgroup, int ignore_migration) { FILE *delete_tasks, *base_tasks; int tids; char path[FILENAME_MAX]; - int error; + int error = ECGROUPNOTALLOWED; strcpy(path, MOUNT_POINT); - strcat(path,"/tasks"); + strcat(path,"tasks"); base_tasks = fopen(path, "w"); + if (!base_tasks) + goto base_open_err; - strcpy(path, MOUNT_POINT); - strcat(path, "/"); - strcat(path, cgroup); - strcat(path,"/tasks"); + cg_build_path(cgroup->name, path); + strcat(path,"tasks"); delete_tasks = fopen(path, "r"); + if (!delete_tasks) + goto del_open_err; while (!feof(delete_tasks)) { fscanf(delete_tasks, "%d", &tids); fprintf(base_tasks, "%d", tids); } - strcpy(path, MOUNT_POINT); - strcat(path, "/"); - strcat(path, cgroup); - + cg_build_path(cgroup->name, path); error = rmdir(path); - if (!error) { - return ECGROUPNOTALLOWED; - } - + fclose(delete_tasks); +del_open_err: + fclose(base_tasks); +base_open_err: + if (ignore_migration) { + cg_build_path(cgroup->name, path); + error = rmdir(path); + } return error; } |