1 files changed, 216 insertions, 121 deletions
diff --git a/api.c b/api.c
index f842040..9f144a3 100644
--- a/api.c
+++ b/api.c
@@ -2,6 +2,7 @@
  * Copyright IBM Corporation. 2007
  *
  * Author:	Dhaval Giani <dhaval@linux.vnet.ibm.com>
+ * Author:	Balbir Singh <balbir@linux.vnet.ibm.com>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2.1 of the GNU Lesser General Public License
@@ -31,24 +32,113 @@
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <unistd.h>
+#include <fts.h>
 
+/*
+ * Remember to bump this up for major API changes.
+ */
+const static char cg_version[] = "0.01";
+
+/*
+ * Only one mount point is currently supported. This will be enhanced to
+ * support several hierarchies in the future
+ */
 static char MOUNT_POINT[FILENAME_MAX];
 
+static int cg_chown_file(FTS *fts, FTSENT *ent, uid_t owner, gid_t group)
+{
+	int ret = 0;
+	const char *filename = fts->fts_path;
+	dbg("seeing file %s\n", filename);
+	switch (ent->fts_info) {
+	case FTS_ERR:
+		errno = ent->fts_errno;
+		break;
+	case FTS_D:
+	case FTS_DC:
+	case FTS_NSOK:
+	case FTS_NS:
+	case FTS_DNR:
+	case FTS_DP:
+	case FTS_F:
+	case FTS_DEFAULT:
+		ret = chown(filename, owner, group);
+		break;
+	}
+	return ret;
+}
+
+/*
+ * TODO: Need to decide a better place to put this function.
+ */
+static int cg_chown_recursive(char **path, uid_t owner, gid_t group)
+{
+	int ret = 0;
+	dbg("path is %s\n", *path);
+	FTS *fts = fts_open(path, FTS_PHYSICAL | FTS_NOCHDIR |
+				FTS_NOSTAT, NULL);
+	while (1) {
+		FTSENT *ent;
+		ent = fts_read(fts);
+		if (!ent) {
+			dbg("fts_read failed\n");
+			break;
+		}
+		ret = cg_chown_file(fts, ent, owner, group);
+	}
+	fts_close(fts);
+	return ret;
+}
+
+/**
+ * cg_init(), initializes the MOUNT_POINT.
+ * This code is not currently thread safe (hint: getmntent is not thread safe).
+ * This API is likely to change in the future to push state back to the caller
+ * to achieve thread safety. The code currently supports just one mount point.
+ * Complain if the cgroup filesystem controllers are bound to different mount
+ * points.
+ */
 int cg_init()
 {
 	FILE *proc_mount;
-	struct mntent *ent;
+	struct mntent *ent, *found_ent = NULL;
+	int found_mnt = 0;
+	int ret = 0;
 
 	proc_mount = fopen("/proc/mounts", "r");
-	ent = getmntent(proc_mount);
+	if (proc_mount == NULL) {
+		return EIO;
+	}
 
-	while (strcmp(ent->mnt_fsname,"cgroup") != 0) {
-		ent = getmntent(proc_mount);
-		if (ent == NULL)
-			return ECGROUPNOTMOUNTED;
+	while ((ent = getmntent(proc_mount)) != NULL) {
+		if (!strncmp(ent->mnt_fsname,"cgroup", strlen("cgroup"))) {
+			found_ent = ent;
+			found_mnt++;
+			dbg("Found cgroup option %s, count %d\n",
+				found_ent->mnt_opts, found_mnt);
+		}
 	}
-	strcpy(MOUNT_POINT, ent->mnt_dir);
-	return 0;
+
+	/*
+	 * Currently we require that all controllers be bound together
+	 */
+	if (!found_mnt)
+		ret = ECGROUPNOTMOUNTED;
+	else if (found_mnt > 1)
+		ret = ECGROUPMULTIMOUNTED;
+	else {
+		/*
+		 * NOTE: FILENAME_MAX ensures that we don't need to worry
+		 * about crossing MOUNT_POINT size. For the paranoid, yes
+		 * this is a potential security hole - Balbir
+		 * Dhaval - fix these things.
+		 */
+		strcpy(MOUNT_POINT, found_ent->mnt_dir);
+		strcat(MOUNT_POINT, "/");
+	}
+
+	fclose(proc_mount);
+	return ret;
 }
 
 static int cg_test_mounted_fs()
@@ -67,6 +157,7 @@ static int cg_test_mounted_fs()
 		if (ent == NULL)
 			return 0;
 	}
+	fclose(proc_mount);
 	return 1;
 }
 
@@ -75,21 +166,33 @@ static inline pid_t cg_gettid()
 	return syscall(__NR_gettid);
 }
 
-/*
+static char* cg_build_path(char *name, char *path)
+{
+	strcpy(path, MOUNT_POINT);
+	strcat(path, name);
+	strcat(path, "/");
+	return path;
+}
+
+/** cg_attach_task_pid is used to assign tasks to a cgroup.
+ *  struct cgroup *cgroup: The cgroup to assign the thread to.
+ *  pid_t tid: The thread to be assigned to the cgroup.
+ *
+ *  returns 0 on success.
+ *  returns ECGROUPNOTOWNER if the caller does not have access to the cgroup.
+ *  returns ECGROUPNOTALLOWED for other causes of failure.
  */
-int cg_attach_task_pid(char *cgroup, pid_t tid)
+int cg_attach_task_pid(struct cgroup *cgroup, pid_t tid)
 {
 	char path[FILENAME_MAX];
 	FILE *tasks;
 
-	if (cgroup == NULL) {
-		cgroup = (char *) malloc(sizeof(char));
-		cgroup = "\0";
+	if (cgroup == NULL)
+		strcpy(path, MOUNT_POINT);
+	else {
+		cg_build_path(cgroup->name, path);
 	}
 
-	strcpy(path, MOUNT_POINT);
-	strcat(path, "/");
-	strcat(path, cgroup);
 	strcat(path, "/tasks");
 
 	tasks = fopen(path, "w");
@@ -102,17 +205,18 @@ int cg_attach_task_pid(char *cgroup, pid_t tid)
 		}
 	}
 	fprintf(tasks, "%d", tid);
+	fclose(tasks);
 
 	return 0;
 
 }
 
-/*
- * Used to attach the task to a control group.
+/** cg_attach_task is used to attach the current thread to a cgroup.
+ *  struct cgroup *cgroup: The cgroup to assign the current thread to.
  *
- * WARNING: Will change to use struct cgroup when it is implemented.
+ *  See cg_attach_task_pid for return values.
  */
-int cg_attach_task(char *cgroup)
+int cg_attach_task(struct cgroup *cgroup)
 {
 	pid_t tid = cg_gettid();
 	int error;
@@ -133,8 +237,8 @@ static int cg_create_control_group(char *path)
 	int error;
 	if (!cg_test_mounted_fs())
 		return ECGROUPNOTMOUNTED;
-	error = mkdir(path, 0700);
-	if (!error) {
+	error = mkdir(path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+	if (error) {
 		switch(errno) {
 		case EPERM:
 			return ECGROUPNOTOWNER;
@@ -184,54 +288,48 @@ static int cg_set_control_value(char *path, char *val)
 				if (errno == ENOENT)
 					return ECGROUPSUBSYSNOTMOUNTED;
 			}
+			fclose(control_file);
 			return ECGROUPNOTALLOWED;
 		}
+		return errno;
 	}
 
 	fprintf(control_file, "%s", val);
+	fclose(control_file);
 	return 0;
 }
 
-/*
- * WARNING: This API is not final. It WILL change format to use
- * struct cgroup. This API will then become internal and be called something
- * else.
- *
- * I am still not happy with how the data structure is looking at the moment,
- * plus there are a couple of additional details to be worked out. Please
- * do not rely on this API.
- *
- * Be prepared to change the implementation later once it shifts to
- * struct cgroup in the real alpha release.
+/** cg_modify_cgroup modifies the cgroup control files.
+ * struct cgroup *cgroup: The name will be the cgroup to be modified.
+ * The values will be the values to be modified, those not mentioned
+ * in the structure will not be modified.
  *
- * The final version is expected to be
+ * The uids cannot be modified yet.
  *
- * int modify_cgroup(struct cgroup *original, struct cgroup *final);
+ * returns 0 on success.
  *
- * where original is the cgroup which is to be modified and final is how it
- * should look.
- *
- * Also this version is still at one level since we do not have
- * multi-hierarchy support in kernel. The real alpha release should have this
- * issue sorted out as well.
  */
 
-int cg_modify_cgroup(char *cgroup, struct control_value *values[], int n)
+int cg_modify_cgroup(struct cgroup *cgroup)
 {
 	char path[FILENAME_MAX], base[FILENAME_MAX];
 	int i;
 	int error;
 
-	strcpy(base, MOUNT_POINT);
-	strcat(base, "/");
-	strcat(base, cgroup);
-	strcat(base, "/");
-
-	for (i = 0; i < n; i++, strcpy(path, base)) {
-		strcat(path, values[i]->name);
-		error = cg_set_control_value(path, values[i]->value);
-		if (error)
-			goto err;
+	cg_build_path(cgroup->name, base);
+
+	for (i = 0; i < CG_CONTROLLER_MAX && cgroup->controller[i];
+						i++, strcpy(path, base)) {
+		int j;
+		for(j = 0; j < CG_NV_MAX &&
+			cgroup->controller[i]->values[j];
+			j++, strcpy(path, base)) {
+			strcat(path, cgroup->controller[i]->values[j]->name);
+			error = cg_set_control_value(path,
+				cgroup->controller[i]->values[j]->value);
+			if (error)
+				goto err;
+		}
 	}
 	return 0;
 err:
@@ -239,109 +337,106 @@ err:
 
 }
 
-/*
- * WARNING: This API is not final. It WILL change format to use
- * struct cgroup. This API will then become internal and be called something
- * else.
- *
- * I am still not happy with how the data structure is looking at the moment,
- * plus there are a couple of additional details to be worked out. Please
- * do not rely on this API.
+/** create_cgroup creates a new control group.
+ * struct cgroup *cgroup: The control group to be created
  *
- * Be prepared to change the implementation later once it shifts to
- * struct cgroup in the real alpha release.
- *
- * The final version is expected to be
- *
- * int create_cgroup(struct cgroup *group);
- *
- * where group is the group to be created
- *
- * Also this version is still at one level since we do not have
- * multi-hierarchy support in kernel. The real alpha release should have this
- * issue sorted out as well.
+ * returns 0 on success. We recommend calling cg_delete_cgroup
+ * if this routine fails. That should do the cleanup operation.
  */
-int cg_create_cgroup(char *cgroup, struct control_value *values[], int n)
+int cg_create_cgroup(struct cgroup *cgroup, int ignore_ownership)
 {
-	char path[FILENAME_MAX], base[FILENAME_MAX];
+	char *fts_path[2], base[FILENAME_MAX], *path;
 	int i;
 	int error;
 
-	if (MOUNT_POINT == NULL)
-		return ECGROUPNOTMOUNTED;
-
-	strcpy(path, MOUNT_POINT);
-	strcat(path, "/");
-	strcat(path, cgroup);
+	fts_path[0] = (char *)malloc(FILENAME_MAX);
+	if (!fts_path[0])
+		return ENOMEM;
+	fts_path[1] = NULL;
+	path = fts_path[0];
 
+	cg_build_path(cgroup->name, path);
 	error = cg_create_control_group(path);
-	strcat(path, "/");
+	if (error)
+		goto err;
+
 	strcpy(base, path);
 
-	for (i = 0; i < n; i++, strcpy(path, base)) {
-		strcat(path, values[i]->name);
-		error = cg_set_control_value(path, values[i]->value);
-		if (!error)
-			return error;
+	if (!ignore_ownership)
+		error = cg_chown_recursive(fts_path, cgroup->control_uid,
+						cgroup->control_gid);
+
+	if (error)
+		goto err;
+
+	for (i = 0; i < CG_CONTROLLER_MAX && cgroup->controller[i];
+						i++, strcpy(path, base)) {
+		int j;
+		for(j = 0; j < CG_NV_MAX && cgroup->controller[i]->values[j];
+						j++, strcpy(path, base)) {
+			strcat(path, cgroup->controller[i]->values[j]->name);
+			error = cg_set_control_value(path,
+				cgroup->controller[i]->values[j]->value);
+
+			/*
+ 			 * Should we undo, what we've done in the loops above?
+ 			 */
+			if (error)
+				goto err;
+		}
+	}
+
+	if (!ignore_ownership) {
+		strcpy(path, base);
+		strcat(path, "/tasks");
+		chown(path, cgroup->tasks_uid, cgroup->tasks_gid);
 	}
+err:
+	free(path);
 	return error;
 }
 
-/*
- * WARNING: This API is not final. It WILL change format to use
- * struct cgroup. This API will then become internal and be called something
- * else.
- *
- * I am still not happy with how the data structure is looking at the moment,
- * plus there are a couple of additional details to be worked out. Please
- * do not rely on this API.
- *
- * Be prepared to change the implementation later once it shifts to
- * struct cgroup in the real alpha release.
- *
- * The final version is expected to be
- *
- * int delete_cgroup(struct cgroup *group);
- *
- * where group is the group to be deleted.
+/** cg_delete cgroup deletes a control group.
+ *  struct cgroup *cgroup takes the group which is to be deleted.
  *
- * Also this version is still at one level since we do not have
- * multi-hierarchy support in kernel. The real alpha release should have this
- * issue sorted out as well.
+ *  returns 0 on success.
  */
-int cg_delete_cgroup(char *cgroup)
+int cg_delete_cgroup(struct cgroup *cgroup, int ignore_migration)
 {
 	FILE *delete_tasks, *base_tasks;
 	int tids;
 	char path[FILENAME_MAX];
-	int error;
+	int error = ECGROUPNOTALLOWED;
 
 	strcpy(path, MOUNT_POINT);
-	strcat(path,"/tasks");
+	strcat(path,"tasks");
 
 	base_tasks = fopen(path, "w");
+	if (!base_tasks)
+		goto base_open_err;
 
-	strcpy(path, MOUNT_POINT);
-	strcat(path, "/");
-	strcat(path, cgroup);
-	strcat(path,"/tasks");
+	cg_build_path(cgroup->name, path);
+	strcat(path,"tasks");
 
 	delete_tasks = fopen(path, "r");
+	if (!delete_tasks)
+		goto del_open_err;
 
 	while (!feof(delete_tasks)) {
 		fscanf(delete_tasks, "%d", &tids);
 		fprintf(base_tasks, "%d", tids);
 	}
 
-	strcpy(path, MOUNT_POINT);
-	strcat(path, "/");
-	strcat(path, cgroup);
-
+	cg_build_path(cgroup->name, path);
 	error = rmdir(path);
 
-	if (!error) {
-			return ECGROUPNOTALLOWED;
-		}
-
+	fclose(delete_tasks);
+del_open_err:
+	fclose(base_tasks);
+base_open_err:
+	if (ignore_migration) {
+		cg_build_path(cgroup->name, path);
+		error = rmdir(path);
+	}
 	return error;
 }