1 files changed, 0 insertions, 465 deletions
diff --git a/epoll-limit-paths.patch b/epoll-limit-paths.patch
deleted file mode 100644
index 4ed306af4..000000000
--- a/epoll-limit-paths.patch
+++ /dev/null
@@ -1,465 +0,0 @@
-From 0a6cc45426fe3baaf231efd7efe4300fd879efc8 Mon Sep 17 00:00:00 2001
-From: Jason Baron <jbaron@redhat.com>
-Date: Mon, 24 Oct 2011 14:59:02 +1100
-Subject: [PATCH] epoll: limit paths
-
-epoll: limit paths
-
-The current epoll code can be tickled to run basically indefinitely in
-both loop detection path check (on ep_insert()), and in the wakeup paths.
-The programs that tickle this behavior set up deeply linked networks of
-epoll file descriptors that cause the epoll algorithms to traverse them
-indefinitely.  A couple of these sample programs have been previously
-posted in this thread: https://lkml.org/lkml/2011/2/25/297.
-
-To fix the loop detection path check algorithms, I simply keep track of
-the epoll nodes that have been already visited.  Thus, the loop detection
-becomes proportional to the number of epoll file descriptor and links.
-This dramatically decreases the run-time of the loop check algorithm.  In
-one diabolical case I tried it reduced the run-time from 15 mintues (all
-in kernel time) to .3 seconds.
-
-Fixing the wakeup paths could be done at wakeup time in a similar manner
-by keeping track of nodes that have already been visited, but the
-complexity is harder, since there can be multiple wakeups on different
-cpus...Thus, I've opted to limit the number of possible wakeup paths when
-the paths are created.
-
-This is accomplished, by noting that the end file descriptor points that
-are found during the loop detection pass (from the newly added link), are
-actually the sources for wakeup events.  I keep a list of these file
-descriptors and limit the number and length of these paths that emanate
-from these 'source file descriptors'.  In the current implemetation I
-allow 1000 paths of length 1, 500 of length 2, 100 of length 3, 50 of
-length 4 and 10 of length 5.  Note that it is sufficient to check the
-'source file descriptors' reachable from the newly added link, since no
-other 'source file descriptors' will have newly added links.  This allows
-us to check only the wakeup paths that may have gotten too long, and not
-re-check all possible wakeup paths on the system.
-
-In terms of the path limit selection, I think its first worth noting that
-the most common case for epoll, is probably the model where you have 1
-epoll file descriptor that is monitoring n number of 'source file
-descriptors'.  In this case, each 'source file descriptor' has a 1 path of
-length 1.  Thus, I believe that the limits I'm proposing are quite
-reasonable and in fact may be too generous.  Thus, I'm hoping that the
-proposed limits will not prevent any workloads that currently work to
-fail.
-
-In terms of locking, I have extended the use of the 'epmutex' to all
-epoll_ctl add and remove operations.  Currently its only used in a subset
-of the add paths.  I need to hold the epmutex, so that we can correctly
-traverse a coherent graph, to check the number of paths.  I believe that
-this additional locking is probably ok, since its in the setup/teardown
-paths, and doesn't affect the running paths, but it certainly is going to
-add some extra overhead.  Also, worth noting is that the epmuex was
-recently added to the ep_ctl add operations in the initial path loop
-detection code using the argument that it was not on a critical path.
-
-Another thing to note here, is the length of epoll chains that is allowed.
-Currently, eventpoll.c defines:
-
-/* Maximum number of nesting allowed inside epoll sets */
-#define EP_MAX_NESTS 4
-
-This basically means that I am limited to a graph depth of 5 (EP_MAX_NESTS
-+ 1).  However, this limit is currently only enforced during the loop
-check detection code, and only when the epoll file descriptors are added
-in a certain order.  Thus, this limit is currently easily bypassed.  The
-newly added check for wakeup paths, stricly limits the wakeup paths to a
-length of 5, regardless of the order in which ep's are linked together.
-Thus, a side-effect of the new code is a more consistent enforcement of
-the graph depth.
-
-Thus far, I've tested this, using the sample programs previously
-mentioned, which now either return quickly or return -EINVAL.  I've also
-testing using the piptest.c epoll tester, which showed no difference in
-performance.  I've also created a number of different epoll networks and
-tested that they behave as expectded.
-
-I believe this solves the original diabolical test cases, while still
-preserving the sane epoll nesting.
-
-Signed-off-by: Jason Baron <jbaron@redhat.com>
-Cc: Nelson Elhage <nelhage@ksplice.com>
-Cc: Davide Libenzi <davidel@xmailserver.org>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
----
- fs/eventpoll.c            |  226 ++++++++++++++++++++++++++++++++++++++++-----
- include/linux/eventpoll.h |    1 +
- include/linux/fs.h        |    1 +
- 3 files changed, 203 insertions(+), 25 deletions(-)
-
-diff --git a/fs/eventpoll.c b/fs/eventpoll.c
-index b84fad9..414ac74 100644
---- a/fs/eventpoll.c
-+++ b/fs/eventpoll.c
-@@ -197,6 +197,12 @@ struct eventpoll {
- 
- 	/* The user that created the eventpoll descriptor */
- 	struct user_struct *user;
-+
-+	struct file *file;
-+
-+	/* used to optimize loop detection check */
-+	int visited;
-+	struct list_head visitedllink;
- };
- 
- /* Wait structure used by the poll hooks */
-@@ -255,6 +261,12 @@ static struct kmem_cache *epi_cache __read_mostly;
- /* Slab cache used to allocate "struct eppoll_entry" */
- static struct kmem_cache *pwq_cache __read_mostly;
- 
-+/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
-+LIST_HEAD(visited_list);
-+
-+/* Files with newly added links, which need a limit on emanating paths */
-+LIST_HEAD(tfile_check_list);
-+
- #ifdef CONFIG_SYSCTL
- 
- #include <linux/sysctl.h>
-@@ -276,6 +288,12 @@ ctl_table epoll_table[] = {
- };
- #endif /* CONFIG_SYSCTL */
- 
-+static const struct file_operations eventpoll_fops;
-+
-+static inline int is_file_epoll(struct file *f)
-+{
-+	return f->f_op == &eventpoll_fops;
-+}
- 
- /* Setup the structure that is used as key for the RB tree */
- static inline void ep_set_ffd(struct epoll_filefd *ffd,
-@@ -711,12 +729,6 @@ static const struct file_operations eventpoll_fops = {
- 	.llseek		= noop_llseek,
- };
- 
--/* Fast test to see if the file is an eventpoll file */
--static inline int is_file_epoll(struct file *f)
--{
--	return f->f_op == &eventpoll_fops;
--}
--
- /*
-  * This is called from eventpoll_release() to unlink files from the eventpoll
-  * interface. We need to have this facility to cleanup correctly files that are
-@@ -926,6 +938,96 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
- 	rb_insert_color(&epi->rbn, &ep->rbr);
- }
- 
-+
-+
-+#define PATH_ARR_SIZE 5
-+/* These are the number paths of length 1 to 5, that we are allowing to emanate
-+ * from a single file of interest. For example, we allow 1000 paths of length
-+ * 1, to emanate from each file of interest. This essentially represents the
-+ * potential wakeup paths, which need to be limited in order to avoid massive
-+ * uncontrolled wakeup storms. The common use case should be a single ep which
-+ * is connected to n file sources. In this case each file source has 1 path
-+ * of length 1. Thus, the numbers below should be more than sufficient.
-+ */
-+int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
-+int path_count[PATH_ARR_SIZE];
-+
-+static int path_count_inc(int nests)
-+{
-+	if (++path_count[nests] > path_limits[nests])
-+		return -1;
-+	return 0;
-+}
-+
-+static void path_count_init(void)
-+{
-+	int i;
-+
-+	for (i = 0; i < PATH_ARR_SIZE; i++)
-+		path_count[i] = 0;
-+}
-+
-+static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
-+{
-+	int error = 0;
-+	struct file *file = priv;
-+	struct file *child_file;
-+	struct epitem *epi;
-+
-+	list_for_each_entry(epi, &file->f_ep_links, fllink) {
-+		child_file = epi->ep->file;
-+		if (is_file_epoll(child_file)) {
-+			if (list_empty(&child_file->f_ep_links)) {
-+				if (path_count_inc(call_nests)) {
-+					error = -1;
-+					break;
-+				}
-+			} else {
-+				error = ep_call_nested(&poll_loop_ncalls,
-+							EP_MAX_NESTS,
-+							reverse_path_check_proc,
-+							child_file, child_file,
-+							current);
-+			}
-+			if (error != 0)
-+				break;
-+		} else {
-+			printk(KERN_ERR "reverse_path_check_proc: "
-+				"file is not an ep!\n");
-+		}
-+	}
-+	return error;
-+}
-+
-+/**
-+ * reverse_path_check - The tfile_check_list is list of file *, which have
-+ *                      links that are proposed to be newly added. We need to
-+ *                      make sure that those added links don't add too many
-+ *                      paths such that we will spend all our time waking up
-+ *                      eventpoll objects.
-+ *
-+ * Returns: Returns zero if the proposed links don't create too many paths,
-+ *	    -1 otherwise.
-+ */
-+static int reverse_path_check(void)
-+{
-+	int length = 0;
-+	int error = 0;
-+	struct file *current_file;
-+
-+	/* let's call this for all tfiles */
-+	list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
-+		length++;
-+		path_count_init();
-+		error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
-+					reverse_path_check_proc, current_file,
-+					current_file, current);
-+		if (error)
-+			break;
-+	}
-+	return error;
-+}
-+
- /*
-  * Must be called with "mtx" held.
-  */
-@@ -987,6 +1089,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
- 	 */
- 	ep_rbtree_insert(ep, epi);
- 
-+	/* now check if we've created too many backpaths */
-+	error = -EINVAL;
-+	if (reverse_path_check())
-+		goto error_remove_epi;
-+
- 	/* We have to drop the new item inside our item list to keep track of it */
- 	spin_lock_irqsave(&ep->lock, flags);
- 
-@@ -1011,6 +1118,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
- 
- 	return 0;
- 
-+error_remove_epi:
-+	spin_lock(&tfile->f_lock);
-+	if (ep_is_linked(&epi->fllink))
-+		list_del_init(&epi->fllink);
-+	spin_unlock(&tfile->f_lock);
-+
-+	rb_erase(&epi->rbn, &ep->rbr);
-+
- error_unregister:
- 	ep_unregister_pollwait(ep, epi);
- 
-@@ -1275,18 +1390,35 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
- 	int error = 0;
- 	struct file *file = priv;
- 	struct eventpoll *ep = file->private_data;
-+	struct eventpoll *ep_tovisit;
- 	struct rb_node *rbp;
- 	struct epitem *epi;
- 
- 	mutex_lock_nested(&ep->mtx, call_nests + 1);
-+	ep->visited = 1;
-+	list_add(&ep->visitedllink, &visited_list);
- 	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
- 		epi = rb_entry(rbp, struct epitem, rbn);
- 		if (unlikely(is_file_epoll(epi->ffd.file))) {
-+			ep_tovisit = epi->ffd.file->private_data;
-+			if (ep_tovisit->visited)
-+				continue;
- 			error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
--					       ep_loop_check_proc, epi->ffd.file,
--					       epi->ffd.file->private_data, current);
-+					ep_loop_check_proc, epi->ffd.file,
-+					ep_tovisit, current);
- 			if (error != 0)
- 				break;
-+		} else {
-+			/* if we've reached a file that is not associated with
-+			 * an ep, then then we need to check if the newly added
-+			 * links are going to add too many wakeup paths. We do
-+			 * this by adding it to the tfile_check_list, if it's
-+			 * not already there, and calling reverse_path_check()
-+			 * during ep_insert()
-+			 */
-+			if (list_empty(&epi->ffd.file->f_tfile_llink))
-+				list_add(&epi->ffd.file->f_tfile_llink,
-+					 &tfile_check_list);
- 		}
- 	}
- 	mutex_unlock(&ep->mtx);
-@@ -1307,8 +1439,30 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
-  */
- static int ep_loop_check(struct eventpoll *ep, struct file *file)
- {
--	return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
-+	int ret;
-+	struct eventpoll *ep_cur, *ep_next;
-+
-+	ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
- 			      ep_loop_check_proc, file, ep, current);
-+	/* clear visited list */
-+	list_for_each_entry_safe(ep_cur, ep_next, &visited_list, visitedllink) {
-+		ep_cur->visited = 0;
-+		list_del(&ep_cur->visitedllink);
-+	}
-+	return ret;
-+}
-+
-+static void clear_tfile_check_list(void)
-+{
-+	struct file *file;
-+
-+	/* first clear the tfile_check_list */
-+	while (!list_empty(&tfile_check_list)) {
-+		file = list_first_entry(&tfile_check_list, struct file,
-+					f_tfile_llink);
-+		list_del_init(&file->f_tfile_llink);
-+	}
-+	INIT_LIST_HEAD(&tfile_check_list);
- }
- 
- /*
-@@ -1316,8 +1470,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file)
-  */
- SYSCALL_DEFINE1(epoll_create1, int, flags)
- {
--	int error;
-+	int error, fd;
- 	struct eventpoll *ep = NULL;
-+	struct file *file;
- 
- 	/* Check the EPOLL_* constant for consistency.  */
- 	BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
-@@ -1334,11 +1489,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
- 	 * Creates all the items needed to setup an eventpoll file. That is,
- 	 * a file structure and a free file descriptor.
- 	 */
--	error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
-+	fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
-+	if (fd < 0) {
-+		error = fd;
-+		goto out_free_ep;
-+	}
-+	file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
- 				 O_RDWR | (flags & O_CLOEXEC));
--	if (error < 0)
--		ep_free(ep);
--
-+	if (IS_ERR(file)) {
-+		error = PTR_ERR(file);
-+		goto out_free_fd;
-+	}
-+	fd_install(fd, file);
-+	ep->file = file;
-+	return fd;
-+
-+out_free_fd:
-+	put_unused_fd(fd);
-+out_free_ep:
-+	ep_free(ep);
- 	return error;
- }
- 
-@@ -1404,21 +1573,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
- 	/*
- 	 * When we insert an epoll file descriptor, inside another epoll file
- 	 * descriptor, there is the change of creating closed loops, which are
--	 * better be handled here, than in more critical paths.
-+	 * better be handled here, than in more critical paths. While we are
-+	 * checking for loops we also determine the list of files reachable
-+	 * and hang them on the tfile_check_list, so we can check that we
-+	 * haven't created too many possible wakeup paths.
- 	 *
--	 * We hold epmutex across the loop check and the insert in this case, in
--	 * order to prevent two separate inserts from racing and each doing the
--	 * insert "at the same time" such that ep_loop_check passes on both
--	 * before either one does the insert, thereby creating a cycle.
-+	 * We need to hold the epmutex across both ep_insert and ep_remove
-+	 * b/c we want to make sure we are looking at a coherent view of
-+	 * epoll network.
- 	 */
--	if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
-+	if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
- 		mutex_lock(&epmutex);
- 		did_lock_epmutex = 1;
--		error = -ELOOP;
--		if (ep_loop_check(ep, tfile) != 0)
--			goto error_tgt_fput;
- 	}
--
-+	if (op == EPOLL_CTL_ADD) {
-+		if (is_file_epoll(tfile)) {
-+			error = -ELOOP;
-+			if (ep_loop_check(ep, tfile) != 0)
-+				goto error_tgt_fput;
-+		} else
-+			list_add(&tfile->f_tfile_llink, &tfile_check_list);
-+	}
- 
- 	mutex_lock_nested(&ep->mtx, 0);
- 
-@@ -1437,6 +1612,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
- 			error = ep_insert(ep, &epds, tfile, fd);
- 		} else
- 			error = -EEXIST;
-+		clear_tfile_check_list();
- 		break;
- 	case EPOLL_CTL_DEL:
- 		if (epi)
-@@ -1455,7 +1631,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
- 	mutex_unlock(&ep->mtx);
- 
- error_tgt_fput:
--	if (unlikely(did_lock_epmutex))
-+	if (did_lock_epmutex)
- 		mutex_unlock(&epmutex);
- 
- 	fput(tfile);
-diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
-index f362733..657ab55 100644
---- a/include/linux/eventpoll.h
-+++ b/include/linux/eventpoll.h
-@@ -61,6 +61,7 @@ struct file;
- static inline void eventpoll_init_file(struct file *file)
- {
- 	INIT_LIST_HEAD(&file->f_ep_links);
-+	INIT_LIST_HEAD(&file->f_tfile_llink);
- }
- 
- 
-diff --git a/include/linux/fs.h b/include/linux/fs.h
-index ba98668..d393a68 100644
---- a/include/linux/fs.h
-+++ b/include/linux/fs.h
-@@ -985,6 +985,7 @@ struct file {
- #ifdef CONFIG_EPOLL
- 	/* Used by fs/eventpoll.c to link all the hooks to this file */
- 	struct list_head	f_ep_links;
-+	struct list_head	f_tfile_llink;
- #endif /* #ifdef CONFIG_EPOLL */
- 	struct address_space	*f_mapping;
- #ifdef CONFIG_DEBUG_WRITECOUNT
--- 
-1.7.6.4
-