From 2673b4cf5d59c3ee5e0c12f6d734d38770324dc4 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Sun, 29 Jan 2012 12:17:33 -0600
Subject: [PATCH 1/4] backing-dev: fix wakeup timer races with bdi_unregister()

While 7a401a972df8e18 ("backing-dev: ensure wakeup_timer is deleted")
addressed the problem of the bdi being freed with a queued wakeup
timer, there are other races that could happen if the wakeup timer
expires after/during bdi_unregister(), before bdi_destroy() is called.

wakeup_timer_fn() could attempt to wakeup a task which has already has
been freed, or could access a NULL bdi->dev via the wake_forker_thread
tracepoint.

Cc: <stable@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Reported-by: Chanho Min <chanho.min@lge.com>
Reviewed-by: Namjae Jeon <linkinjeon@gmail.com>
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 mm/backing-dev.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 7ba8feae11b8..dd8e2aafb07e 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -318,7 +318,7 @@ static void wakeup_timer_fn(unsigned long data)
 	if (bdi->wb.task) {
 		trace_writeback_wake_thread(bdi);
 		wake_up_process(bdi->wb.task);
-	} else {
+	} else if (bdi->dev) {
 		/*
 		 * When bdi tasks are inactive for long time, they are killed.
 		 * In this case we have to wake-up the forker thread which
@@ -584,6 +584,8 @@ EXPORT_SYMBOL(bdi_register_dev);
  */
 static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 {
+	struct task_struct *task;
+
 	if (!bdi_cap_writeback_dirty(bdi))
 		return;
 
@@ -602,8 +604,13 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 	 * Finally, kill the kernel thread. We don't need to be RCU
 	 * safe anymore, since the bdi is gone from visibility.
 	 */
-	if (bdi->wb.task)
-		kthread_stop(bdi->wb.task);
+	spin_lock_bh(&bdi->wb_lock);
+	task = bdi->wb.task;
+	bdi->wb.task = NULL;
+	spin_unlock_bh(&bdi->wb_lock);
+
+	if (task)
+		kthread_stop(task);
 }
 
 /*
@@ -623,7 +630,9 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
 
 void bdi_unregister(struct backing_dev_info *bdi)
 {
-	if (bdi->dev) {
+	struct device *dev = bdi->dev;
+
+	if (dev) {
 		bdi_set_min_ratio(bdi, 0);
 		trace_writeback_bdi_unregister(bdi);
 		bdi_prune_sb(bdi);
@@ -632,8 +641,12 @@ void bdi_unregister(struct backing_dev_info *bdi)
 		if (!bdi_cap_flush_forker(bdi))
 			bdi_wb_shutdown(bdi);
 		bdi_debug_unregister(bdi);
-		device_unregister(bdi->dev);
+
+		spin_lock_bh(&bdi->wb_lock);
 		bdi->dev = NULL;
+		spin_unlock_bh(&bdi->wb_lock);
+
+		device_unregister(dev);
 	}
 }
 EXPORT_SYMBOL(bdi_unregister);

From 15eb77a07c714ac80201abd0a9568888bcee6276 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Tue, 17 Jan 2012 11:18:56 -0600
Subject: [PATCH 2/4] writeback: fix NULL bdi->dev in trace
 writeback_single_inode

bdi_prune_sb() resets sb->s_bdi to default_backing_dev_info when the
tearing down the original bdi. Fix trace_writeback_single_inode to
use sb->s_bdi=default_backing_dev_info rather than bdi->dev=NULL for a
teared down bdi.

Cc: <stable@kernel.org>
Reported-by: Rabin Vincent <rabin@rab.in>
Tested-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 fs/fs-writeback.c                | 16 ++++++++--------
 include/trace/events/writeback.h |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f855916657ba..5b4a9362d5aa 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -52,14 +52,6 @@ struct wb_writeback_work {
 	struct completion *done;	/* set if the caller waits */
 };
 
-/*
- * Include the creation of the trace points after defining the
- * wb_writeback_work structure so that the definition remains local to this
- * file.
- */
-#define CREATE_TRACE_POINTS
-#include <trace/events/writeback.h>
-
 /*
  * We don't actually have pdflush, but this one is exported though /proc...
  */
@@ -92,6 +84,14 @@ static inline struct inode *wb_inode(struct list_head *head)
 	return list_entry(head, struct inode, i_wb_list);
 }
 
+/*
+ * Include the creation of the trace points after defining the
+ * wb_writeback_work structure and inline functions so that the definition
+ * remains local to this file.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
+
 /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
 static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
 {
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 8588a8918023..06d302ebcb72 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -426,7 +426,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
 
 	TP_fast_assign(
 		strncpy(__entry->name,
-			dev_name(inode->i_mapping->backing_dev_info->dev), 32);
+			dev_name(inode_to_bdi(inode)->dev), 32);
 		__entry->ino		= inode->i_ino;
 		__entry->state		= inode->i_state;
 		__entry->dirtied_when	= inode->dirtied_when;

From 3310225dfc71a35a2cc9340c15c0e08b14b3c754 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Mon, 9 Jan 2012 11:53:50 -0600
Subject: [PATCH 3/4] lib: proportion: lower PROP_MAX_SHIFT to 32 on 64-bit
 kernel

PROP_MAX_SHIFT should be set to <=32 on 64-bit box. This fixes two bugs
in the below lines of bdi_dirty_limit():

	bdi_dirty *= numerator;
	do_div(bdi_dirty, denominator);

1) divide error: do_div() only uses the lower 32 bit of the denominator,
   which may trimmed to be 0 when PROP_MAX_SHIFT > 32.

2) overflow: (bdi_dirty * numerator) could easily overflow if numerator
   used up to 48 bits, leaving only 16 bits to bdi_dirty

Cc: <stable@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reported-by: Ilya Tumaykin <librarian_rus@yahoo.com>
Tested-by: Ilya Tumaykin <librarian_rus@yahoo.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/proportions.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/proportions.h b/include/linux/proportions.h
index ef35bb73f69b..26a8a4ed9b07 100644
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -81,7 +81,11 @@ void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
  * Limit the time part in order to ensure there are some bits left for the
  * cycle counter and fraction multiply.
  */
+#if BITS_PER_LONG == 32
 #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
+#else
+#define PROP_MAX_SHIFT (BITS_PER_LONG/2)
+#endif
 
 #define PROP_FRAC_SHIFT		(BITS_PER_LONG - PROP_MAX_SHIFT - 1)
 #define PROP_FRAC_BASE		(1UL << PROP_FRAC_SHIFT)

From 977b7e3a52a7421ad33a393a38ece59f3d41c2fa Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Sat, 4 Feb 2012 20:54:03 -0600
Subject: [PATCH 4/4] writeback: fix dereferencing NULL bdi->dev on
 trace_writeback_queue

When a SD card is hot removed without umount, del_gendisk() will call
bdi_unregister() without destroying/freeing it. This leaves the bdi in
the bdi->dev = NULL, bdi->wb.task = NULL, bdi->bdi_list removed state.

When sync(2) gets the bdi before bdi_unregister() and calls
bdi_queue_work() after the unregister, trace_writeback_queue will be
dereferencing the NULL bdi->dev. Fix it with a simple test for NULL.

LKML-reference: http://lkml.org/lkml/2012/1/18/346
Cc: stable@kernel.org
Reported-by: Rabin Vincent <rabin@rab.in>
Tested-by: Namjae Jeon <linkinjeon@gmail.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/trace/events/writeback.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 06d302ebcb72..5973410e8f8c 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -47,7 +47,10 @@ DECLARE_EVENT_CLASS(writeback_work_class,
 		__field(int, reason)
 	),
 	TP_fast_assign(
-		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		struct device *dev = bdi->dev;
+		if (!dev)
+			dev = default_backing_dev_info.dev;
+		strncpy(__entry->name, dev_name(dev), 32);
 		__entry->nr_pages = work->nr_pages;
 		__entry->sb_dev = work->sb ? work->sb->s_dev : 0;
 		__entry->sync_mode = work->sync_mode;