1 files changed, 106 insertions, 0 deletions
diff --git a/blk-loop-avoid-too-many-pending-work-IO.patch b/blk-loop-avoid-too-many-pending-work-IO.patch
new file mode 100644
index 000000000..568fc6b9c
--- /dev/null
+++ b/blk-loop-avoid-too-many-pending-work-IO.patch
@@ -0,0 +1,106 @@
+From: Ming Lei <ming.lei@canonical.com>
+Date: Tue, 28 Apr 2015 14:40:49 -0400
+Subject: [PATCH] blk: loop: avoid too many pending work IO
+
+If there are too many pending per work I/O, too many
+high priority work thread can be generated so that
+system performance can be effected.
+
+This patch limits the max pending per work I/O as 16,
+and will fackback to single queue mode when the max
+number is reached.
+
+This patch fixes Fedora 22 live booting performance
+regression when it is booted from squashfs over dm
+based on loop, and looks the following reasons are
+related with the problem:
+
+- not like other filesyststems(such as ext4), squashfs
+is a bit special, and I observed that increasing I/O jobs
+to access file in squashfs only improve I/O performance a
+little, but it can make big difference for ext4
+
+- nested loop: both squashfs.img and ext3fs.img are mounted
+as loop block, and ext3fs.img is inside the squashfs
+
+- during booting, lots of tasks may run concurrently
+
+Fixes: b5dd2f6047ca108001328aac0e8588edd15f1778
+Cc: stable@vger.kernel.org (v4.0)
+Reported-by: Justin M. Forbes <jforbes@fedoraproject.org>
+Tested-by: Justin M. Forbes <jforbes@fedoraproject.org>
+Signed-off-by: Ming Lei <ming.lei@canonical.com>
+---
+ drivers/block/loop.c | 19 +++++++++++++++++--
+ drivers/block/loop.h |  2 ++
+ 2 files changed, 19 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/block/loop.c b/drivers/block/loop.c
+index d7173cb1ea76..4db0301420b0 100644
+--- a/drivers/block/loop.c
++++ b/drivers/block/loop.c
+@@ -1425,13 +1425,24 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+ 		const struct blk_mq_queue_data *bd)
+ {
+ 	struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
++	struct loop_device *lo = cmd->rq->q->queuedata;
++	bool single_queue = !!(cmd->rq->cmd_flags & REQ_WRITE);
++
++	/*
++	 * Fallback to single queue mode if the pending per work
++	 * I/O number reaches 32, otherwise too many high priority
++	 * worker thread may effect system performance as reported
++	 * in fedora live booting from squashfs over loop.
++	 */
++	if (atomic_read(&lo->pending_per_work_io) >= 32)
++		single_queue = true;
+ 
+ 	blk_mq_start_request(bd->rq);
+ 
+-	if (cmd->rq->cmd_flags & REQ_WRITE) {
+-		struct loop_device *lo = cmd->rq->q->queuedata;
++	if (single_queue) {
+ 		bool need_sched = true;
+ 
++		cmd->per_work_io = false;
+ 		spin_lock_irq(&lo->lo_lock);
+ 		if (lo->write_started)
+ 			need_sched = false;
+@@ -1443,6 +1454,8 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+ 		if (need_sched)
+ 			queue_work(loop_wq, &lo->write_work);
+ 	} else {
++		cmd->per_work_io = true;
++		atomic_inc(&lo->pending_per_work_io);
+ 		queue_work(loop_wq, &cmd->read_work);
+ 	}
+ 
+@@ -1467,6 +1480,8 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
+ 	if (ret)
+ 		cmd->rq->errors = -EIO;
+ 	blk_mq_complete_request(cmd->rq);
++	if (cmd->per_work_io)
++		atomic_dec(&lo->pending_per_work_io);
+ }
+ 
+ static void loop_queue_write_work(struct work_struct *work)
+diff --git a/drivers/block/loop.h b/drivers/block/loop.h
+index 301c27f8323f..eb855f57a62d 100644
+--- a/drivers/block/loop.h
++++ b/drivers/block/loop.h
+@@ -57,6 +57,7 @@ struct loop_device {
+ 	struct list_head	write_cmd_head;
+ 	struct work_struct	write_work;
+ 	bool			write_started;
++	atomic_t		pending_per_work_io;
+ 	int			lo_state;
+ 	struct mutex		lo_ctl_mutex;
+ 
+@@ -68,6 +69,7 @@ struct loop_device {
+ struct loop_cmd {
+ 	struct work_struct read_work;
+ 	struct request *rq;
++	bool per_work_io;
+ 	struct list_head list;
+ };
+