Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: (107 commits) vfs: use ERR_CAST for err-ptr tossing in lookup_instantiate_filp isofs: Remove global fs lock jffs2: fix IN_DELETE_SELF on overwriting rename() killing a directory fix IN_DELETE_SELF on overwriting rename() on ramfs et.al. mm/truncate.c: fix build for CONFIG_BLOCK not enabled fs:update the NOTE of the file_operations structure Remove dead code in dget_parent() AFS: Fix silly characters in a comment switch d_add_ci() to d_splice_alias() in "found negative" case as well simplify gfs2_lookup() jfs_lookup(): don't bother with . or .. get rid of useless dget_parent() in btrfs rename() and link() get rid of useless dget_parent() in fs/btrfs/ioctl.c fs: push i_mutex and filemap_write_and_wait down into ->fsync() handlers drivers: fix up various ->llseek() implementations fs: handle SEEK_HOLE/SEEK_DATA properly in all fs's that define their own llseek Ext4: handle SEEK_HOLE/SEEK_DATA generically Btrfs: implement our own ->llseek fs: add SEEK_HOLE and SEEK_DATA flags reiserfs: make reiserfs default to barrier=flush ... Fix up trivial conflicts in fs/xfs/linux-2.6/xfs_super.c due to the new shrinker callout for the inode cache, that clashed with the xfs code to start the periodic workers later.
2011-07-22 19:02:39 -07:00 · 2011-07-22 19:02:39 -07:00 · bbd9d6f7fb
commit bbd9d6f7fb
parent 8e204874db 5a9a43646c
235 changed files with 2512 additions and 1943 deletions
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@ -52,7 +52,7 @@ ata *);
 	void (*put_link) (struct dentry *, struct nameidata *, void *);
 	void (*truncate) (struct inode *);
 	int (*permission) (struct inode *, int, unsigned int);
-	int (*check_acl)(struct inode *, int, unsigned int);
+	int (*check_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@ -412,7 +412,7 @@ prototypes:
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *);
 	int (*release) (struct inode *, struct file *);
-	int (*fsync) (struct file *, int datasync);
+	int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
 	int (*aio_fsync) (struct kiocb *, int datasync);
 	int (*fasync) (int, struct file *, int);
 	int (*lock) (struct file *, int, struct file_lock *);
@ -438,9 +438,7 @@ prototypes:

 locking rules:
 	All may block except for ->setlease.
-	No VFS locks held on entry except for ->fsync and ->setlease.
-
->fsync() has i_mutex on inode.
+	No VFS locks held on entry except for ->setlease.

 ->setlease has the file_list_lock held and must not sleep.

--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@ -398,12 +398,33 @@ Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set,
 so the i_size should not change when hole punching, even when puching the end of
 a file off.

--
-[mandatory]
-
 --
 [mandatory]
 	->get_sb() is gone.  Switch to use of ->mount().  Typically it's just
 a matter of switching from calling get_sb_... to mount_... and changing the
 function type.  If you were doing it manually, just switch from setting ->mnt_root
 to some pointer to returning that pointer.  On errors return ERR_PTR(...).
+
+--
+[mandatory]
+	->permission(), generic_permission() and ->check_acl() have lost flags
+argument; instead of passing IPERM_FLAG_RCU we add MAY_NOT_BLOCK into mask.
+	generic_permission() has also lost the check_acl argument; if you want
+non-NULL to be used for that inode, put it into ->i_op->check_acl.
+
+--
+[mandatory]
+	If you implement your own ->llseek() you must handle SEEK_HOLE and
+SEEK_DATA.  You can hanle this by returning -EINVAL, but it would be nicer to
+support it in some way.  The generic handler assumes that the entire file is
+data and there is a virtual hole at the end of the file.  So if the provided
+offset is less than i_size and SEEK_DATA is specified, return the same offset.
+If the above is true for the offset and you are given SEEK_HOLE, return the end
+of the file.  If the offset is i_size or greater return -ENXIO in either case.
+
+[mandatory]
+	If you have your own ->fsync() you must make sure to call
+filemap_write_and_wait_range() so that all dirty pages are synced out properly.
+You must also keep in mind that ->fsync() is not called with i_mutex held
+anymore, so if you require i_mutex locking you must make sure to take it and
+release it yourself.
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@ -229,6 +229,8 @@ struct super_operations {

        ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
        ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+	int (*nr_cached_objects)(struct super_block *);
+	void (*free_cached_objects)(struct super_block *, int);
 };

 All methods are called without any locks being held, unless otherwise
@ -301,6 +303,26 @@ or bottom half).

  quota_write: called by the VFS to write to filesystem quota file.

+  nr_cached_objects: called by the sb cache shrinking function for the
+	filesystem to return the number of freeable cached objects it contains.
+	Optional.
+
+  free_cache_objects: called by the sb cache shrinking function for the
+	filesystem to scan the number of objects indicated to try to free them.
+	Optional, but any filesystem implementing this method needs to also
+	implement ->nr_cached_objects for it to be called correctly.
+
+	We can't do anything with any errors that the filesystem might
+	encountered, hence the void return type. This will never be called if
+	the VM is trying to reclaim under GFP_NOFS conditions, hence this
+	method does not need to handle that situation itself.
+
+	Implementations must include conditional reschedule calls inside any
+	scanning loop that is done. This allows the VFS to determine
+	appropriate scan batch sizes without having to worry about whether
+	implementations will cause holdoff problems due to large scan batch
+	sizes.
+
 Whoever sets up the inode is responsible for filling in the "i_op" field. This
 is a pointer to a "struct inode_operations" which describes the methods that
 can be performed on individual inodes.
@ -333,8 +355,8 @@ struct inode_operations {
        void * (*follow_link) (struct dentry *, struct nameidata *);
        void (*put_link) (struct dentry *, struct nameidata *, void *);
 	void (*truncate) (struct inode *);
-	int (*permission) (struct inode *, int, unsigned int);
-	int (*check_acl)(struct inode *, int, unsigned int);
+	int (*permission) (struct inode *, int);
+	int (*check_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@ -423,7 +445,7 @@ otherwise noted.
  permission: called by the VFS to check for access rights on a POSIX-like
  	filesystem.

-	May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk
+	May be called in rcu-walk mode (mask & MAY_NOT_BLOCK). If in rcu-walk
        mode, the filesystem must check the permission without blocking or
 	storing to the inode.

@ -755,7 +777,7 @@ struct file_operations {
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *);
 	int (*release) (struct inode *, struct file *);
-	int (*fsync) (struct file *, int datasync);
+	int (*fsync) (struct file *, loff_t, loff_t, int datasync);
 	int (*aio_fsync) (struct kiocb *, int datasync);
 	int (*fasync) (int, struct file *, int);
 	int (*lock) (struct file *, int, struct file_lock *);
--- a/arch/arm/mach-tegra/clock.c
+++ b/arch/arm/mach-tegra/clock.c
@ -585,7 +585,7 @@ static const struct file_operations possible_parents_fops = {

 static int clk_debugfs_register_one(struct clk *c)
 {
-	struct dentry *d, *child, *child_tmp;
+	struct dentry *d;

 	d = debugfs_create_dir(c->name, clk_debugfs_root);
 	if (!d)
@ -614,10 +614,7 @@ static int clk_debugfs_register_one(struct clk *c)
 	return 0;

 err_out:
-	d = c->dent;
-	list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
-		debugfs_remove(child);
-	debugfs_remove(c->dent);
+	debugfs_remove_recursive(c->dent);
 	return -ENOMEM;
 }

--- a/arch/arm/mach-ux500/clock.c
+++ b/arch/arm/mach-ux500/clock.c
@ -635,16 +635,13 @@ static const struct file_operations set_rate_fops = {
 static struct dentry *clk_debugfs_register_dir(struct clk *c,
 						struct dentry *p_dentry)
 {
-	struct dentry *d, *clk_d, *child, *child_tmp;
-	char s[255];
-	char *p = s;
+	struct dentry *d, *clk_d;
+	const char *p = c->name;

-	if (c->name == NULL)
-		p += sprintf(p, "BUG");
-	else
-		p += sprintf(p, "%s", c->name);
+	if (!p)
+		p = "BUG";

-	clk_d = debugfs_create_dir(s, p_dentry);
+	clk_d = debugfs_create_dir(p, p_dentry);
 	if (!clk_d)
 		return NULL;

@ -666,24 +663,10 @@ static struct dentry *clk_debugfs_register_dir(struct clk *c,
 	return clk_d;

 err_out:
-	d = clk_d;
-	list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
-		debugfs_remove(child);
-	debugfs_remove(clk_d);
+	debugfs_remove_recursive(clk_d);
 	return NULL;
 }

-static void clk_debugfs_remove_dir(struct dentry *cdentry)
-{
-	struct dentry *d, *child, *child_tmp;
-
-	d = cdentry;
-	list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
-		debugfs_remove(child);
-	debugfs_remove(cdentry);
-	return ;
-}
-
 static int clk_debugfs_register_one(struct clk *c)
 {
 	struct clk *pa = c->parent_periph;
@ -700,7 +683,7 @@ static int clk_debugfs_register_one(struct clk *c)
 		c->dent_bus = clk_debugfs_register_dir(c,
 				bpa->dent_bus ? bpa->dent_bus : bpa->dent);
 		if ((!c->dent_bus) &&  (c->dent)) {
-			clk_debugfs_remove_dir(c->dent);
+			debugfs_remove_recursive(c->dent);
 			c->dent = NULL;
 			return -ENOMEM;
 		}
--- a/arch/arm/plat-omap/clock.c
+++ b/arch/arm/plat-omap/clock.c
@ -480,13 +480,10 @@ static struct dentry *clk_debugfs_root;
 static int clk_debugfs_register_one(struct clk *c)
 {
 	int err;
-	struct dentry *d, *child, *child_tmp;
+	struct dentry *d;
 	struct clk *pa = c->parent;
-	char s[255];
-	char *p = s;

-	p += sprintf(p, "%s", c->name);
-	d = debugfs_create_dir(s, pa ? pa->dent : clk_debugfs_root);
+	d = debugfs_create_dir(c->name, pa ? pa->dent : clk_debugfs_root);
 	if (!d)
 		return -ENOMEM;
 	c->dent = d;
@ -509,10 +506,7 @@ static int clk_debugfs_register_one(struct clk *c)
 	return 0;

 err_out:
-	d = c->dent;
-	list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
-		debugfs_remove(child);
-	debugfs_remove(c->dent);
+	debugfs_remove_recursive(c->dent);
 	return err;
 }

--- a/arch/arm/plat-samsung/clock.c
+++ b/arch/arm/plat-samsung/clock.c
@ -458,7 +458,7 @@ static struct dentry *clk_debugfs_root;
 static int clk_debugfs_register_one(struct clk *c)
 {
 	int err;
-	struct dentry *d, *child, *child_tmp;
+	struct dentry *d;
 	struct clk *pa = c->parent;
 	char s[255];
 	char *p = s;
@ -488,10 +488,7 @@ static int clk_debugfs_register_one(struct clk *c)
 	return 0;

 err_out:
-	d = c->dent;
-	list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
-		debugfs_remove(child);
-	debugfs_remove(c->dent);
+	debugfs_remove_recursive(c->dent);
 	return err;
 }

--- a/arch/arm/plat-spear/clock.c
+++ b/arch/arm/plat-spear/clock.c
@ -916,7 +916,7 @@ static struct dentry *clk_debugfs_root;
 static int clk_debugfs_register_one(struct clk *c)
 {
 	int err;
-	struct dentry *d, *child;
+	struct dentry *d;
 	struct clk *pa = c->pclk;
 	char s[255];
 	char *p = s;
@ -951,10 +951,7 @@ static int clk_debugfs_register_one(struct clk *c)
 	return 0;

 err_out:
-	d = c->dent;
-	list_for_each_entry(child, &d->d_subdirs, d_u.d_child)
-		debugfs_remove(child);
-	debugfs_remove(c->dent);
+	debugfs_remove_recursive(c->dent);
 	return err;
 }

--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@ -1850,9 +1850,16 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id)
 	return ret;
 }

-static int spufs_mfc_fsync(struct file *file, int datasync)
+static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
-	return spufs_mfc_flush(file, NULL);
+	struct inode *inode = file->f_path.dentry->d_inode;
+	int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (!err) {
+		mutex_lock(&inode->i_mutex);
+		err = spufs_mfc_flush(file, NULL);
+		mutex_unlock(&inode->i_mutex);
+	}
+	return err;
 }

 static int spufs_mfc_fasync(int fd, struct file *file, int on)
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@ -611,15 +611,14 @@ static int spufs_create_gang(struct inode *inode,

 static struct file_system_type spufs_type;

-long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
-							struct file *filp)
+long spufs_create(struct path *path, struct dentry *dentry,
+		unsigned int flags, mode_t mode, struct file *filp)
 {
-	struct dentry *dentry;
 	int ret;

 	ret = -EINVAL;
 	/* check if we are on spufs */
-	if (nd->path.dentry->d_sb->s_type != &spufs_type)
+	if (path->dentry->d_sb->s_type != &spufs_type)
 		goto out;

 	/* don't accept undefined flags */
@ -627,33 +626,27 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
 		goto out;

 	/* only threads can be underneath a gang */
-	if (nd->path.dentry != nd->path.dentry->d_sb->s_root) {
+	if (path->dentry != path->dentry->d_sb->s_root) {
 		if ((flags & SPU_CREATE_GANG) ||
-		    !SPUFS_I(nd->path.dentry->d_inode)->i_gang)
+		    !SPUFS_I(path->dentry->d_inode)->i_gang)
 			goto out;
 	}

-	dentry = lookup_create(nd, 1);
-	ret = PTR_ERR(dentry);
-	if (IS_ERR(dentry))
-		goto out_dir;
-
 	mode &= ~current_umask();

 	if (flags & SPU_CREATE_GANG)
-		ret = spufs_create_gang(nd->path.dentry->d_inode,
-					 dentry, nd->path.mnt, mode);
+		ret = spufs_create_gang(path->dentry->d_inode,
+					 dentry, path->mnt, mode);
 	else
-		ret = spufs_create_context(nd->path.dentry->d_inode,
-					    dentry, nd->path.mnt, flags, mode,
+		ret = spufs_create_context(path->dentry->d_inode,
+					    dentry, path->mnt, flags, mode,
 					    filp);
 	if (ret >= 0)
-		fsnotify_mkdir(nd->path.dentry->d_inode, dentry);
+		fsnotify_mkdir(path->dentry->d_inode, dentry);
 	return ret;

-out_dir:
-	mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
 out:
+	mutex_unlock(&path->dentry->d_inode->i_mutex);
 	return ret;
 }

--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@ -248,7 +248,7 @@ extern const struct spufs_tree_descr spufs_dir_debug_contents[];
 /* system call implementation */
 extern struct spufs_calls spufs_calls;
 long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
-long spufs_create(struct nameidata *nd, unsigned int flags,
+long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags,
 			mode_t mode, struct file *filp);
 /* ELF coredump callbacks for writing SPU ELF notes */
 extern int spufs_coredump_extra_notes_size(void);
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@ -62,21 +62,17 @@ static long do_spu_run(struct file *filp,
 static long do_spu_create(const char __user *pathname, unsigned int flags,
 		mode_t mode, struct file *neighbor)
 {
-	char *tmp;
+	struct path path;
+	struct dentry *dentry;
 	int ret;

-	tmp = getname(pathname);
-	ret = PTR_ERR(tmp);
-	if (!IS_ERR(tmp)) {
-		struct nameidata nd;
-
-		ret = kern_path_parent(tmp, &nd);
-		if (!ret) {
-			nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE;
-			ret = spufs_create(&nd, flags, mode, neighbor);
-			path_put(&nd.path);
-		}
-		putname(tmp);
+	dentry = user_path_create(AT_FDCWD, pathname, &path, 1);
+	ret = PTR_ERR(dentry);
+	if (!IS_ERR(dentry)) {
+		ret = spufs_create(&path, dentry, flags, mode, neighbor);
+		mutex_unlock(&path.dentry->d_inode->i_mutex);
+		dput(dentry);
+		path_put(&path);
 	}

 	return ret;
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@ -21,12 +21,11 @@
 #include <linux/fs.h>
 #include <linux/shmem_fs.h>
 #include <linux/ramfs.h>
-#include <linux/cred.h>
 #include <linux/sched.h>
-#include <linux/init_task.h>
 #include <linux/slab.h>
+#include <linux/kthread.h>

-static struct vfsmount *dev_mnt;
+static struct task_struct *thread;

 #if defined CONFIG_DEVTMPFS_MOUNT
 static int mount_dev = 1;
@ -34,7 +33,16 @@ static int mount_dev = 1;
 static int mount_dev;
 #endif

-static DEFINE_MUTEX(dirlock);
+static DEFINE_SPINLOCK(req_lock);
+
+static struct req {
+	struct req *next;
+	struct completion done;
+	int err;
+	const char *name;
+	mode_t mode;	/* 0 => delete */
+	struct device *dev;
+} *requests;

 static int __init mount_param(char *str)
 {
@ -68,49 +76,103 @@ static inline int is_blockdev(struct device *dev)
 static inline int is_blockdev(struct device *dev) { return 0; }
 #endif

-static int dev_mkdir(const char *name, mode_t mode)
+int devtmpfs_create_node(struct device *dev)
 {
-	struct nameidata nd;
-	struct dentry *dentry;
-	int err;
+	const char *tmp = NULL;
+	struct req req;

-	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-			      name, LOOKUP_PARENT, &nd);
-	if (err)
-		return err;
+	if (!thread)
+		return 0;

-	dentry = lookup_create(&nd, 1);
-	if (!IS_ERR(dentry)) {
-		err = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
-		if (!err)
-			/* mark as kernel-created inode */
-			dentry->d_inode->i_private = &dev_mnt;
-		dput(dentry);
-	} else {
-		err = PTR_ERR(dentry);
+	req.mode = 0;
+	req.name = device_get_devnode(dev, &req.mode, &tmp);
+	if (!req.name)
+		return -ENOMEM;
+
+	if (req.mode == 0)
+		req.mode = 0600;
+	if (is_blockdev(dev))
+		req.mode |= S_IFBLK;
+	else
+		req.mode |= S_IFCHR;
+
+	req.dev = dev;
+
+	init_completion(&req.done);
+
+	spin_lock(&req_lock);
+	req.next = requests;
+	requests = &req;
+	spin_unlock(&req_lock);
+
+	wake_up_process(thread);
+	wait_for_completion(&req.done);
+
+	kfree(tmp);
+
+	return req.err;
 }

-	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-	path_put(&nd.path);
+int devtmpfs_delete_node(struct device *dev)
+{
+	const char *tmp = NULL;
+	struct req req;
+
+	if (!thread)
+		return 0;
+
+	req.name = device_get_devnode(dev, NULL, &tmp);
+	if (!req.name)
+		return -ENOMEM;
+
+	req.mode = 0;
+	req.dev = dev;
+
+	init_completion(&req.done);
+
+	spin_lock(&req_lock);
+	req.next = requests;
+	requests = &req;
+	spin_unlock(&req_lock);
+
+	wake_up_process(thread);
+	wait_for_completion(&req.done);
+
+	kfree(tmp);
+	return req.err;
+}
+
+static int dev_mkdir(const char *name, mode_t mode)
+{
+	struct dentry *dentry;
+	struct path path;
+	int err;
+
+	dentry = kern_path_create(AT_FDCWD, name, &path, 1);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	err = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+	if (!err)
+		/* mark as kernel-created inode */
+		dentry->d_inode->i_private = &thread;
+	dput(dentry);
+	mutex_unlock(&path.dentry->d_inode->i_mutex);
+	path_put(&path);
 	return err;
 }

 static int create_path(const char *nodepath)
 {
-	int err;
-
-	mutex_lock(&dirlock);
-	err = dev_mkdir(nodepath, 0755);
-	if (err == -ENOENT) {
 	char *path;
 	char *s;
+	int err;

 	/* parent directories do not exist, create them */
 	path = kstrdup(nodepath, GFP_KERNEL);
-		if (!path) {
-			err = -ENOMEM;
-			goto out;
-		}
+	if (!path)
+		return -ENOMEM;
+
 	s = path;
 	for (;;) {
 		s = strchr(s, '/');
@ -124,51 +186,24 @@ static int create_path(const char *nodepath)
 		s++;
 	}
 	kfree(path);
-	}
-out:
-	mutex_unlock(&dirlock);
 	return err;
 }

-int devtmpfs_create_node(struct device *dev)
+static int handle_create(const char *nodename, mode_t mode, struct device *dev)
 {
-	const char *tmp = NULL;
-	const char *nodename;
-	const struct cred *curr_cred;
-	mode_t mode = 0;
-	struct nameidata nd;
 	struct dentry *dentry;
+	struct path path;
 	int err;

-	if (!dev_mnt)
-		return 0;
-
-	nodename = device_get_devnode(dev, &mode, &tmp);
-	if (!nodename)
-		return -ENOMEM;
-
-	if (mode == 0)
-		mode = 0600;
-	if (is_blockdev(dev))
-		mode |= S_IFBLK;
-	else
-		mode |= S_IFCHR;
-
-	curr_cred = override_creds(&init_cred);
-
-	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-			      nodename, LOOKUP_PARENT, &nd);
-	if (err == -ENOENT) {
+	dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
+	if (dentry == ERR_PTR(-ENOENT)) {
 		create_path(nodename);
-		err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-				      nodename, LOOKUP_PARENT, &nd);
+		dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
 	}
-	if (err)
-		goto out;
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);

-	dentry = lookup_create(&nd, 0);
-	if (!IS_ERR(dentry)) {
-		err = vfs_mknod(nd.path.dentry->d_inode,
+	err = vfs_mknod(path.dentry->d_inode,
 			dentry, mode, dev->devt);
 	if (!err) {
 		struct iattr newattrs;
@ -181,18 +216,12 @@ int devtmpfs_create_node(struct device *dev)
 		mutex_unlock(&dentry->d_inode->i_mutex);

 		/* mark as kernel-created inode */
-			dentry->d_inode->i_private = &dev_mnt;
+		dentry->d_inode->i_private = &thread;
 	}
 	dput(dentry);
-	} else {
-		err = PTR_ERR(dentry);
-	}

-	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-	path_put(&nd.path);
-out:
-	kfree(tmp);
-	revert_creds(curr_cred);
+	mutex_unlock(&path.dentry->d_inode->i_mutex);
+	path_put(&path);
 	return err;
 }

@ -202,8 +231,7 @@ static int dev_rmdir(const char *name)
 	struct dentry *dentry;
 	int err;

-	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-			      name, LOOKUP_PARENT, &nd);
+	err = kern_path_parent(name, &nd);
 	if (err)
 		return err;

@ -211,7 +239,7 @@ static int dev_rmdir(const char *name)
 	dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
 	if (!IS_ERR(dentry)) {
 		if (dentry->d_inode) {
-			if (dentry->d_inode->i_private == &dev_mnt)
+			if (dentry->d_inode->i_private == &thread)
 				err = vfs_rmdir(nd.path.dentry->d_inode,
 						dentry);
 			else
@ -238,7 +266,6 @@ static int delete_path(const char *nodepath)
 	if (!path)
 		return -ENOMEM;

-	mutex_lock(&dirlock);
 	for (;;) {
 		char *base;

@ -250,7 +277,6 @@ static int delete_path(const char *nodepath)
 		if (err)
 			break;
 	}
-	mutex_unlock(&dirlock);

 	kfree(path);
 	return err;
@ -259,7 +285,7 @@ static int delete_path(const char *nodepath)
 static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat)
 {
 	/* did we create it */
-	if (inode->i_private != &dev_mnt)
+	if (inode->i_private != &thread)
 		return 0;

 	/* does the dev_t match */
@ -277,29 +303,17 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta
 	return 1;
 }

-int devtmpfs_delete_node(struct device *dev)
+static int handle_remove(const char *nodename, struct device *dev)
 {
-	const char *tmp = NULL;
-	const char *nodename;
-	const struct cred *curr_cred;
 	struct nameidata nd;
 	struct dentry *dentry;
 	struct kstat stat;
 	int deleted = 1;
 	int err;

-	if (!dev_mnt)
-		return 0;
-
-	nodename = device_get_devnode(dev, NULL, &tmp);
-	if (!nodename)
-		return -ENOMEM;
-
-	curr_cred = override_creds(&init_cred);
-	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-			      nodename, LOOKUP_PARENT, &nd);
+	err = kern_path_parent(nodename, &nd);
 	if (err)
-		goto out;
+		return err;

 	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
@ -337,9 +351,6 @@ int devtmpfs_delete_node(struct device *dev)
 	path_put(&nd.path);
 	if (deleted && strchr(nodename, '/'))
 		delete_path(nodename);
-out:
-	kfree(tmp);
-	revert_creds(curr_cred);
 	return err;
 }

@ -354,7 +365,7 @@ int devtmpfs_mount(const char *mntdir)
 	if (!mount_dev)
 		return 0;

-	if (!dev_mnt)
+	if (!thread)
 		return 0;

 	err = sys_mount("devtmpfs", (char *)mntdir, "devtmpfs", MS_SILENT, NULL);
@ -365,31 +376,79 @@ int devtmpfs_mount(const char *mntdir)
 	return err;
 }

+static __initdata DECLARE_COMPLETION(setup_done);
+
+static int handle(const char *name, mode_t mode, struct device *dev)
+{
+	if (mode)
+		return handle_create(name, mode, dev);
+	else
+		return handle_remove(name, dev);
+}
+
+static int devtmpfsd(void *p)
+{
+	char options[] = "mode=0755";
+	int *err = p;
+	*err = sys_unshare(CLONE_NEWNS);
+	if (*err)
+		goto out;
+	*err = sys_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, options);
+	if (*err)
+		goto out;
+	sys_chdir("/.."); /* will traverse into overmounted root */
+	sys_chroot(".");
+	complete(&setup_done);
+	while (1) {
+		spin_lock(&req_lock);
+		while (requests) {
+			struct req *req = requests;
+			requests = NULL;
+			spin_unlock(&req_lock);
+			while (req) {
+				req->err = handle(req->name, req->mode, req->dev);
+				complete(&req->done);
+				req = req->next;
+			}
+			spin_lock(&req_lock);
+		}
+		set_current_state(TASK_INTERRUPTIBLE);
+		spin_unlock(&req_lock);
+		schedule();
+		__set_current_state(TASK_RUNNING);
+	}
+	return 0;
+out:
+	complete(&setup_done);
+	return *err;
+}
+
 /*
 * Create devtmpfs instance, driver-core devices will add their device
 * nodes here.
 */
 int __init devtmpfs_init(void)
 {
-	int err;
-	struct vfsmount *mnt;
-	char options[] = "mode=0755";
-
-	err = register_filesystem(&dev_fs_type);
+	int err = register_filesystem(&dev_fs_type);
 	if (err) {
 		printk(KERN_ERR "devtmpfs: unable to register devtmpfs "
 		       "type %i\n", err);
 		return err;
 	}

-	mnt = kern_mount_data(&dev_fs_type, options);
-	if (IS_ERR(mnt)) {
-		err = PTR_ERR(mnt);
+	thread = kthread_run(devtmpfsd, &err, "kdevtmpfs");
+	if (!IS_ERR(thread)) {
+		wait_for_completion(&setup_done);
+	} else {
+		err = PTR_ERR(thread);
+		thread = NULL;
+	}
+
+	if (err) {
 		printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err);
 		unregister_filesystem(&dev_fs_type);
 		return err;
 	}
-	dev_mnt = mnt;

 	printk(KERN_INFO "devtmpfs: initialized\n");
 	return 0;
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@ -1206,7 +1206,7 @@ static int pkt_start_recovery(struct packet_data *pkt)
 	if (!sb)
 		return 0;

-	if (!sb->s_op || !sb->s_op->relocate_blocks)
+	if (!sb->s_op->relocate_blocks)
 		goto out;

 	old_block = pkt->sector / (CD_FRAMESIZE >> 9);
--- a/drivers/char/generic_nvram.c
+++ b/drivers/char/generic_nvram.c
@ -34,12 +34,16 @@ static ssize_t nvram_len;
 static loff_t nvram_llseek(struct file *file, loff_t offset, int origin)
 {
 	switch (origin) {
+	case 0:
+		break;
 	case 1:
 		offset += file->f_pos;
 		break;
 	case 2:
 		offset += nvram_len;
 		break;
+	default:
+		offset = -1;
 	}
 	if (offset < 0)
 		return -EINVAL;
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@ -224,6 +224,8 @@ static loff_t nvram_llseek(struct file *file, loff_t offset, int origin)
 	case 2:
 		offset += NVRAM_BYTES;
 		break;
+	default:
+		return -EINVAL;
 	}

 	return (offset >= 0) ? (file->f_pos = offset) : -EINVAL;
--- a/drivers/char/ps3flash.c
+++ b/drivers/char/ps3flash.c
@ -101,12 +101,16 @@ static loff_t ps3flash_llseek(struct file *file, loff_t offset, int origin)

 	mutex_lock(&file->f_mapping->host->i_mutex);
 	switch (origin) {
+	case 0:
+		break;
 	case 1:
 		offset += file->f_pos;
 		break;
 	case 2:
 		offset += dev->regions[dev->region_idx].size*dev->blk_size;
 		break;
+	default:
+		offset = -1;
 	}
 	if (offset < 0) {
 		res = -EINVAL;
@ -305,9 +309,14 @@ static int ps3flash_flush(struct file *file, fl_owner_t id)
 	return ps3flash_writeback(ps3flash_dev);
 }

-static int ps3flash_fsync(struct file *file, int datasync)
+static int ps3flash_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
-	return ps3flash_writeback(ps3flash_dev);
+	struct inode *inode = file->f_path.dentry->d_inode;
+	int err;
+	mutex_lock(&inode->i_mutex);
+	err = ps3flash_writeback(ps3flash_dev);
+	mutex_unlock(&inode->i_mutex);
+	return err;
 }

 static irqreturn_t ps3flash_interrupt(int irq, void *data)
--- a/drivers/macintosh/nvram.c
+++ b/drivers/macintosh/nvram.c
@ -21,12 +21,16 @@
 static loff_t nvram_llseek(struct file *file, loff_t offset, int origin)
 {
 	switch (origin) {
+	case 0:
+		break;
 	case 1:
 		offset += file->f_pos;
 		break;
 	case 2:
 		offset += NVRAM_SIZE;
 		break;
+	default:
+		offset = -1;
 	}
 	if (offset < 0)
 		return -EINVAL;
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@ -6394,16 +6394,11 @@ static void md_seq_stop(struct seq_file *seq, void *v)
 		mddev_put(mddev);
 }

-struct mdstat_info {
-	int event;
-};
-
 static int md_seq_show(struct seq_file *seq, void *v)
 {
 	mddev_t *mddev = v;
 	sector_t sectors;
 	mdk_rdev_t *rdev;
-	struct mdstat_info *mi = seq->private;
 	struct bitmap *bitmap;

 	if (v == (void*)1) {
@ -6415,7 +6410,7 @@ static int md_seq_show(struct seq_file *seq, void *v)

 		spin_unlock(&pers_lock);
 		seq_printf(seq, "\n");
-		mi->event = atomic_read(&md_event_count);
+		seq->poll_event = atomic_read(&md_event_count);
 		return 0;
 	}
 	if (v == (void*)2) {
@ -6527,26 +6522,21 @@ static const struct seq_operations md_seq_ops = {

 static int md_seq_open(struct inode *inode, struct file *file)
 {
+	struct seq_file *seq;
 	int error;
-	struct mdstat_info *mi = kmalloc(sizeof(*mi), GFP_KERNEL);
-	if (mi == NULL)
-		return -ENOMEM;

 	error = seq_open(file, &md_seq_ops);
 	if (error)
-		kfree(mi);
-	else {
-		struct seq_file *p = file->private_data;
-		p->private = mi;
-		mi->event = atomic_read(&md_event_count);
-	}
+		return error;
+
+	seq = file->private_data;
+	seq->poll_event = atomic_read(&md_event_count);
 	return error;
 }

 static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
 {
-	struct seq_file *m = filp->private_data;
-	struct mdstat_info *mi = m->private;
+	struct seq_file *seq = filp->private_data;
 	int mask;

 	poll_wait(filp, &md_event_waiters, wait);
@ -6554,7 +6544,7 @@ static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
 	/* always allow read */
 	mask = POLLIN | POLLRDNORM;

-	if (mi->event != atomic_read(&md_event_count))
+	if (seq->poll_event != atomic_read(&md_event_count))
 		mask |= POLLERR | POLLPRI;
 	return mask;
 }
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@ -189,12 +189,16 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin)
 	return new_offset;
 }

-static int vol_cdev_fsync(struct file *file, int datasync)
+static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct ubi_volume_desc *desc = file->private_data;
 	struct ubi_device *ubi = desc->vol->ubi;
-
-	return ubi_sync(ubi->ubi_num);
+	struct inode *inode = file->f_path.dentry->d_inode;
+	int err;
+	mutex_lock(&inode->i_mutex);
+	err = ubi_sync(ubi->ubi_num);
+	mutex_unlock(&inode->i_mutex);
+	return err;
 }


--- a/drivers/sh/clk/core.c
+++ b/drivers/sh/clk/core.c
@ -670,7 +670,7 @@ static struct dentry *clk_debugfs_root;
 static int clk_debugfs_register_one(struct clk *c)
 {
 	int err;
-	struct dentry *d, *child, *child_tmp;
+	struct dentry *d;
 	struct clk *pa = c->parent;
 	char s[255];
 	char *p = s;
@ -699,10 +699,7 @@ static int clk_debugfs_register_one(struct clk *c)
 	return 0;

 err_out:
-	d = c->dentry;
-	list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
-		debugfs_remove(child);
-	debugfs_remove(c->dentry);
+	debugfs_remove_recursive(c->dentry);
 	return err;
 }

--- a/drivers/staging/pohmelfs/dir.c
+++ b/drivers/staging/pohmelfs/dir.c
@ -512,7 +512,7 @@ struct dentry *pohmelfs_lookup(struct inode *dir, struct dentry *dentry, struct
 	int err, lock_type = POHMELFS_READ_LOCK, need_lock = 1;
 	struct qstr str = dentry->d_name;

-	if ((nd->intent.open.flags & O_ACCMODE) > 1)
+	if ((nd->intent.open.flags & O_ACCMODE) != O_RDONLY)
 		lock_type = POHMELFS_WRITE_LOCK;

 	if (test_bit(NETFS_INODE_OWNED, &parent->state)) {
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@ -887,11 +887,16 @@ static struct inode *pohmelfs_alloc_inode(struct super_block *sb)
 /*
 * We want fsync() to work on POHMELFS.
 */
-static int pohmelfs_fsync(struct file *file, int datasync)
+static int pohmelfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct inode *inode = file->f_mapping->host;
-
-	return sync_inode_metadata(inode, 1);
+	int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (!err) {
+		mutex_lock(&inode->i_mutex);
+		err = sync_inode_metadata(inode, 1);
+		mutex_unlock(&inode->i_mutex);
+	}
+	return err;
 }

 ssize_t pohmelfs_write(struct file *file, const char __user *buf,
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@ -795,12 +795,14 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
 }

 static int
-printer_fsync(struct file *fd, int datasync)
+printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
 {
 	struct printer_dev	*dev = fd->private_data;
+	struct inode *inode = fd->f_path.dentry->d_inode;
 	unsigned long		flags;
 	int			tx_list_empty;

+	mutex_lock(&inode->i_mutex);
 	spin_lock_irqsave(&dev->lock, flags);
 	tx_list_empty = (likely(list_empty(&dev->tx_reqs)));
 	spin_unlock_irqrestore(&dev->lock, flags);
@ -810,6 +812,7 @@ printer_fsync(struct file *fd, int datasync)
 		wait_event_interruptible(dev->tx_flush_wait,
 				(likely(list_empty(&dev->tx_reqs_active))));
 	}
+	mutex_unlock(&inode->i_mutex);

 	return 0;
 }
--- a/drivers/video/fb_defio.c
+++ b/drivers/video/fb_defio.c
@ -66,19 +66,26 @@ static int fb_deferred_io_fault(struct vm_area_struct *vma,
 	return 0;
 }

-int fb_deferred_io_fsync(struct file *file, int datasync)
+int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct fb_info *info = file->private_data;
+	struct inode *inode = file->f_path.dentry->d_inode;
+	int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (err)
+		return err;

 	/* Skip if deferred io is compiled-in but disabled on this fbdev */
 	if (!info->fbdefio)
 		return 0;

+	mutex_lock(&inode->i_mutex);
 	/* Kill off the delayed work */
 	cancel_delayed_work_sync(&info->deferred_work);

 	/* Run it immediately */
-	return schedule_delayed_work(&info->deferred_work, 0);
+	err = schedule_delayed_work(&info->deferred_work, 0);
+	mutex_unlock(&inode->i_mutex);
+	return err;
 }
 EXPORT_SYMBOL_GPL(fb_deferred_io_fsync);

--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@ -96,12 +96,12 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
 	return acl;
 }

-int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
+int v9fs_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl;
 	struct v9fs_session_info *v9ses;

-	if (flags & IPERM_FLAG_RCU)
+	if (mask & MAY_NOT_BLOCK)
 		return -ECHILD;

 	v9ses = v9fs_inode2v9ses(inode);
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@ -16,7 +16,7 @@

 #ifdef CONFIG_9P_FS_POSIX_ACL
 extern int v9fs_get_acl(struct inode *, struct p9_fid *);
-extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags);
+extern int v9fs_check_acl(struct inode *inode, int mask);
 extern int v9fs_acl_chmod(struct dentry *);
 extern int v9fs_set_create_acl(struct dentry *,
 			       struct posix_acl *, struct posix_acl *);
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@ -70,7 +70,8 @@ ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
 ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
 void v9fs_blank_wstat(struct p9_wstat *wstat);
 int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
-int v9fs_file_fsync_dotl(struct file *filp, int datasync);
+int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
+			 int datasync);
 ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
 				 const char __user *, size_t, loff_t *, int);
 int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@ -519,32 +519,50 @@ v9fs_file_write(struct file *filp, const char __user * data,
 }


-static int v9fs_file_fsync(struct file *filp, int datasync)
+static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
+			   int datasync)
 {
 	struct p9_fid *fid;
+	struct inode *inode = filp->f_mapping->host;
 	struct p9_wstat wstat;
 	int retval;

+	retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (retval)
+		return retval;
+
+	mutex_lock(&inode->i_mutex);
 	P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);

 	fid = filp->private_data;
 	v9fs_blank_wstat(&wstat);

 	retval = p9_client_wstat(fid, &wstat);
+	mutex_unlock(&inode->i_mutex);
+
 	return retval;
 }

-int v9fs_file_fsync_dotl(struct file *filp, int datasync)
+int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
+			 int datasync)
 {
 	struct p9_fid *fid;
+	struct inode *inode = filp->f_mapping->host;
 	int retval;

+	retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (retval)
+		return retval;
+
+	mutex_lock(&inode->i_mutex);
 	P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n",
 			filp, datasync);

 	fid = filp->private_data;

 	retval = p9_client_fsync(fid, datasync);
+	mutex_unlock(&inode->i_mutex);
+
 	return retval;
 }

--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@ -633,8 +633,8 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	fid = NULL;
 	v9ses = v9fs_inode2v9ses(dir);
 	perm = unixmode2p9mode(v9ses, mode);
-	if (nd && nd->flags & LOOKUP_OPEN)
-		flags = nd->intent.open.flags - 1;
+	if (nd)
+		flags = nd->intent.open.flags;
 	else
 		flags = O_RDWR;

@ -649,7 +649,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,

 	v9fs_invalidate_inode_attr(dir);
 	/* if we are opening a file, assign the open fid to the file */
-	if (nd && nd->flags & LOOKUP_OPEN) {
+	if (nd) {
 		v9inode = V9FS_I(dentry->d_inode);
 		mutex_lock(&v9inode->v_mutex);
 		if (v9ses->cache && !v9inode->writeback_fid &&
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@ -173,8 +173,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
 	struct posix_acl *pacl = NULL, *dacl = NULL;

 	v9ses = v9fs_inode2v9ses(dir);
-	if (nd && nd->flags & LOOKUP_OPEN)
-		flags = nd->intent.open.flags - 1;
+	if (nd)
+		flags = nd->intent.open.flags;
 	else {
 		/*
 		 * create call without LOOKUP_OPEN is due
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@ -182,7 +182,7 @@ extern int			 affs_add_entry(struct inode *dir, struct inode *inode, struct dent

 void		affs_free_prealloc(struct inode *inode);
 extern void	affs_truncate(struct inode *);
-int		affs_file_fsync(struct file *, int);
+int		affs_file_fsync(struct file *, loff_t, loff_t, int);

 /* dir.c */

--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@ -923,14 +923,20 @@ affs_truncate(struct inode *inode)
 	affs_free_prealloc(inode);
 }

-int affs_file_fsync(struct file *filp, int datasync)
+int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 {
 	struct inode *inode = filp->f_mapping->host;
 	int ret, err;

+	err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (err)
+		return err;
+
+	mutex_lock(&inode->i_mutex);
 	ret = write_inode_now(inode, 0);
 	err = sync_blockdev(inode->i_sb->s_bdev);
 	if (!ret)
 		ret = err;
+	mutex_unlock(&inode->i_mutex);
 	return ret;
 }
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@ -49,7 +49,7 @@ enum AFSVL_Errors {
 	AFSVL_BADVOLOPER 	= 363542,	/* Bad volume operation code */
 	AFSVL_BADRELLOCKTYPE 	= 363543,	/* Bad release lock type */
 	AFSVL_RERELEASE 	= 363544,	/* Status report: last release was aborted */
-	AFSVL_BADSERVERFLAG 	= 363545,	/* Invalid replication site server °ag */
+	AFSVL_BADSERVERFLAG 	= 363545,	/* Invalid replication site server flag */
 	AFSVL_PERM 		= 363546,	/* No permission access */
 	AFSVL_NOMEM 		= 363547,	/* malloc/realloc failed to alloc enough memory */
 };
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@ -627,7 +627,7 @@ extern void afs_clear_permits(struct afs_vnode *);
 extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
 extern void afs_zap_permits(struct rcu_head *);
 extern struct key *afs_request_key(struct afs_cell *);
-extern int afs_permission(struct inode *, int, unsigned int);
+extern int afs_permission(struct inode *, int);

 /*
 * server.c
@ -750,7 +750,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
 extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
 			      unsigned long, loff_t);
 extern int afs_writeback_all(struct afs_vnode *);
-extern int afs_fsync(struct file *, int);
+extern int afs_fsync(struct file *, loff_t, loff_t, int);


 /*****************************************************************************/
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@ -285,14 +285,14 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
 * - AFS ACLs are attached to directories only, and a file is controlled by its
 *   parent directory's ACL
 */
-int afs_permission(struct inode *inode, int mask, unsigned int flags)
+int afs_permission(struct inode *inode, int mask)
 {
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 	afs_access_t uninitialized_var(access);
 	struct key *key;
 	int ret;

-	if (flags & IPERM_FLAG_RCU)
+	if (mask & MAY_NOT_BLOCK)
 		return -ECHILD;

 	_enter("{{%x:%u},%lx},%x,",
@ -350,7 +350,7 @@ int afs_permission(struct inode *inode, int mask, unsigned int flags)
 	}

 	key_put(key);
-	ret = generic_permission(inode, mask, flags, NULL);
+	ret = generic_permission(inode, mask);
 	_leave(" = %d", ret);
 	return ret;

--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@ -681,9 +681,10 @@ int afs_writeback_all(struct afs_vnode *vnode)
 * - the return status from this call provides a reliable indication of
 *   whether any write errors occurred for this process.
 */
-int afs_fsync(struct file *file, int datasync)
+int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct dentry *dentry = file->f_path.dentry;
+	struct inode *inode = file->f_mapping->host;
 	struct afs_writeback *wb, *xwb;
 	struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
 	int ret;
@ -692,12 +693,19 @@ int afs_fsync(struct file *file, int datasync)
 	       vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
 	       datasync);

+	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (ret)
+		return ret;
+	mutex_lock(&inode->i_mutex);
+
 	/* use a writeback record as a marker in the queue - when this reaches
 	 * the front of the queue, all the outstanding writes are either
 	 * completed or rejected */
 	wb = kzalloc(sizeof(*wb), GFP_KERNEL);
-	if (!wb)
-		return -ENOMEM;
+	if (!wb) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	wb->vnode = vnode;
 	wb->first = 0;
 	wb->last = -1;
@ -720,7 +728,7 @@ int afs_fsync(struct file *file, int datasync)
 	if (ret < 0) {
 		afs_put_writeback(wb);
 		_leave(" = %d [wb]", ret);
-		return ret;
+		goto out;
 	}

 	/* wait for the preceding writes to actually complete */
@ -729,6 +737,8 @@ int afs_fsync(struct file *file, int datasync)
 				       vnode->writebacks.next == &wb->link);
 	afs_put_writeback(wb);
 	_leave(" = %d", ret);
+out:
+	mutex_unlock(&inode->i_mutex);
 	return ret;
 }

--- a/fs/attr.c
+++ b/fs/attr.c
@ -232,17 +232,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
 	if (error)
 		return error;

-	if (ia_valid & ATTR_SIZE)
-		down_write(&dentry->d_inode->i_alloc_sem);
-
 	if (inode->i_op->setattr)
 		error = inode->i_op->setattr(dentry, attr);
 	else
 		error = simple_setattr(dentry, attr);

-	if (ia_valid & ATTR_SIZE)
-		up_write(&dentry->d_inode->i_alloc_sem);
-
 	if (!error)
 		fsnotify_change(dentry, ia_valid);

--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@ -87,7 +87,8 @@ static int bad_file_release(struct inode *inode, struct file *filp)
 	return -EIO;
 }

-static int bad_file_fsync(struct file *file, int datasync)
+static int bad_file_fsync(struct file *file, loff_t start, loff_t end,
+			  int datasync)
 {
 	return -EIO;
 }
@ -229,7 +230,7 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
 	return -EIO;
 }

-static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags)
+static int bad_inode_permission(struct inode *inode, int mask)
 {
 	return -EIO;
 }
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@ -668,8 +668,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			 * mm->dumpable = 0 regardless of the interpreter's
 			 * permissions.
 			 */
-			if (file_permission(interpreter, MAY_READ) < 0)
-				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+			would_dump(bprm, interpreter);

 			retval = kernel_read(interpreter, 0, bprm->buf,
 					     BINPRM_BUF_SIZE);
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@ -245,8 +245,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
 			 * mm->dumpable = 0 regardless of the interpreter's
 			 * permissions.
 			 */
-			if (file_permission(interpreter, MAY_READ) < 0)
-				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+			would_dump(bprm, interpreter);

 			retval = kernel_read(interpreter, 0, bprm->buf,
 					     BINPRM_BUF_SIZE);
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@ -149,8 +149,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)

 		/* if the binary is not readable than enforce mm->dumpable=0
 		   regardless of the interpreter's permissions */
-		if (file_permission(bprm->file, MAY_READ))
-			bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+		would_dump(bprm, bprm->file);

 		allow_write_access(bprm->file);
 		bprm->file = NULL;
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@ -355,25 +355,30 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
 	mutex_lock(&bd_inode->i_mutex);
 	size = i_size_read(bd_inode);

+	retval = -EINVAL;
 	switch (origin) {
-		case 2:
+		case SEEK_END:
 			offset += size;
 			break;
-		case 1:
+		case SEEK_CUR:
 			offset += file->f_pos;
+		case SEEK_SET:
+			break;
+		default:
+			goto out;
 	}
-	retval = -EINVAL;
 	if (offset >= 0 && offset <= size) {
 		if (offset != file->f_pos) {
 			file->f_pos = offset;
 		}
 		retval = offset;
 	}
+out:
 	mutex_unlock(&bd_inode->i_mutex);
 	return retval;
 }
 	
-int blkdev_fsync(struct file *filp, int datasync)
+int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 {
 	struct inode *bd_inode = filp->f_mapping->host;
 	struct block_device *bdev = I_BDEV(bd_inode);
@ -384,14 +389,10 @@ int blkdev_fsync(struct file *filp, int datasync)
 	 * i_mutex and doing so causes performance issues with concurrent
 	 * O_SYNC writers to a block device.
 	 */
-	mutex_unlock(&bd_inode->i_mutex);
-
 	error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
 	if (error == -EOPNOTSUPP)
 		error = 0;

-	mutex_lock(&bd_inode->i_mutex);
-
 	return error;
 }
 EXPORT_SYMBOL(blkdev_fsync);
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@ -195,14 +195,13 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
 	return ret;
 }

-int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags)
+int btrfs_check_acl(struct inode *inode, int mask)
 {
 	int error = -EAGAIN;

-	if (flags & IPERM_FLAG_RCU) {
+	if (mask & MAY_NOT_BLOCK) {
 		if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
 			error = -ECHILD;
-
 	} else {
 		struct posix_acl *acl;
 		acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@ -1219,7 +1219,7 @@ struct btrfs_root {
 	 * right now this just gets used so that a root has its own devid
 	 * for stat.  It may be used for more later
 	 */
-	struct super_block anon_super;
+	dev_t anon_dev;
 };

 struct btrfs_ioctl_defrag_range_args {
@ -2510,6 +2510,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 			     struct list_head *list, int search_commit);
 /* inode.c */
+struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
+					   size_t pg_offset, u64 start, u64 len,
+					   int create);

 /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
 #if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
@ -2602,7 +2605,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 			   struct inode *inode);
 int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
-int btrfs_sync_file(struct file *file, int datasync);
+int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
 int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 			    int skip_pinned);
 extern const struct file_operations btrfs_file_operations;
@ -2642,7 +2645,7 @@ do {								\

 /* acl.c */
 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
-int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags);
+int btrfs_check_acl(struct inode *inode, int mask);
 #else
 #define btrfs_check_acl NULL
 #endif
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@ -1077,12 +1077,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
 	init_completion(&root->kobj_unregister);
 	root->defrag_running = 0;
 	root->root_key.objectid = objectid;
-	root->anon_super.s_root = NULL;
-	root->anon_super.s_dev = 0;
-	INIT_LIST_HEAD(&root->anon_super.s_list);
-	INIT_LIST_HEAD(&root->anon_super.s_instances);
-	init_rwsem(&root->anon_super.s_umount);
-
+	root->anon_dev = 0;
 	return 0;
 }

@ -1311,7 +1306,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
 	spin_lock_init(&root->cache_lock);
 	init_waitqueue_head(&root->cache_wait);

-	ret = set_anon_super(&root->anon_super, NULL);
+	ret = get_anon_bdev(&root->anon_dev);
 	if (ret)
 		goto fail;

@ -2393,10 +2388,8 @@ static void free_fs_root(struct btrfs_root *root)
 {
 	iput(root->cache_inode);
 	WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
-	if (root->anon_super.s_dev) {
-		down_write(&root->anon_super.s_umount);
-		kill_anon_super(&root->anon_super);
-	}
+	if (root->anon_dev)
+		free_anon_bdev(root->anon_dev);
 	free_extent_buffer(root->node);
 	free_extent_buffer(root->commit_root);
 	kfree(root->free_ino_ctl);
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@ -1452,7 +1452,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
 * important optimization for directories because holding the mutex prevents
 * new operations on the dir while we write to disk.
 */
-int btrfs_sync_file(struct file *file, int datasync)
+int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
@ -1462,9 +1462,13 @@ int btrfs_sync_file(struct file *file, int datasync)

 	trace_btrfs_sync_file(file, datasync);

+	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (ret)
+		return ret;
+	mutex_lock(&inode->i_mutex);
+
 	/* we wait first, since the writeback may change the inode */
 	root->log_batch++;
-	/* the VFS called filemap_fdatawrite for us */
 	btrfs_wait_ordered_range(inode, 0, (u64)-1);
 	root->log_batch++;

@ -1472,8 +1476,10 @@ int btrfs_sync_file(struct file *file, int datasync)
 	 * check the transaction that last modified this inode
 	 * and see if its already been committed
 	 */
-	if (!BTRFS_I(inode)->last_trans)
+	if (!BTRFS_I(inode)->last_trans) {
+		mutex_unlock(&inode->i_mutex);
 		goto out;
+	}

 	/*
 	 * if the last transaction that changed this file was before
@ -1484,6 +1490,7 @@ int btrfs_sync_file(struct file *file, int datasync)
 	if (BTRFS_I(inode)->last_trans <=
 	    root->fs_info->last_trans_committed) {
 		BTRFS_I(inode)->last_trans = 0;
+		mutex_unlock(&inode->i_mutex);
 		goto out;
 	}

@ -1496,12 +1503,15 @@ int btrfs_sync_file(struct file *file, int datasync)
 	trans = btrfs_start_transaction(root, 0);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
+		mutex_unlock(&inode->i_mutex);
 		goto out;
 	}

 	ret = btrfs_log_dentry_safe(trans, root, dentry);
-	if (ret < 0)
+	if (ret < 0) {
+		mutex_unlock(&inode->i_mutex);
 		goto out;
+	}

 	/* we've logged all the items and now have a consistent
 	 * version of the file in the log.  It is possible that
@ -1513,7 +1523,7 @@ int btrfs_sync_file(struct file *file, int datasync)
 	 * file again, but that will end up using the synchronization
 	 * inside btrfs_sync_log to keep things safe.
 	 */
-	mutex_unlock(&dentry->d_inode->i_mutex);
+	mutex_unlock(&inode->i_mutex);

 	if (ret != BTRFS_NO_LOG_SYNC) {
 		if (ret > 0) {
@ -1528,7 +1538,6 @@ int btrfs_sync_file(struct file *file, int datasync)
 	} else {
 		ret = btrfs_end_transaction(trans, root);
 	}
-	mutex_lock(&dentry->d_inode->i_mutex);
 out:
 	return ret > 0 ? -EIO : ret;
 }
@ -1664,8 +1673,154 @@ static long btrfs_fallocate(struct file *file, int mode,
 	return ret;
 }

+static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct extent_map *em;
+	struct extent_state *cached_state = NULL;
+	u64 lockstart = *offset;
+	u64 lockend = i_size_read(inode);
+	u64 start = *offset;
+	u64 orig_start = *offset;
+	u64 len = i_size_read(inode);
+	u64 last_end = 0;
+	int ret = 0;
+
+	lockend = max_t(u64, root->sectorsize, lockend);
+	if (lockend <= lockstart)
+		lockend = lockstart + root->sectorsize;
+
+	len = lockend - lockstart + 1;
+
+	len = max_t(u64, len, root->sectorsize);
+	if (inode->i_size == 0)
+		return -ENXIO;
+
+	lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
+			 &cached_state, GFP_NOFS);
+
+	/*
+	 * Delalloc is such a pain.  If we have a hole and we have pending
+	 * delalloc for a portion of the hole we will get back a hole that
+	 * exists for the entire range since it hasn't been actually written
+	 * yet.  So to take care of this case we need to look for an extent just
+	 * before the position we want in case there is outstanding delalloc
+	 * going on here.
+	 */
+	if (origin == SEEK_HOLE && start != 0) {
+		if (start <= root->sectorsize)
+			em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
+						     root->sectorsize, 0);
+		else
+			em = btrfs_get_extent_fiemap(inode, NULL, 0,
+						     start - root->sectorsize,
+						     root->sectorsize, 0);
+		if (IS_ERR(em)) {
+			ret = -ENXIO;
+			goto out;
+		}
+		last_end = em->start + em->len;
+		if (em->block_start == EXTENT_MAP_DELALLOC)
+			last_end = min_t(u64, last_end, inode->i_size);
+		free_extent_map(em);
+	}
+
+	while (1) {
+		em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
+		if (IS_ERR(em)) {
+			ret = -ENXIO;
+			break;
+		}
+
+		if (em->block_start == EXTENT_MAP_HOLE) {
+			if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+				if (last_end <= orig_start) {
+					free_extent_map(em);
+					ret = -ENXIO;
+					break;
+				}
+			}
+
+			if (origin == SEEK_HOLE) {
+				*offset = start;
+				free_extent_map(em);
+				break;
+			}
+		} else {
+			if (origin == SEEK_DATA) {
+				if (em->block_start == EXTENT_MAP_DELALLOC) {
+					if (start >= inode->i_size) {
+						free_extent_map(em);
+						ret = -ENXIO;
+						break;
+					}
+				}
+
+				*offset = start;
+				free_extent_map(em);
+				break;
+			}
+		}
+
+		start = em->start + em->len;
+		last_end = em->start + em->len;
+
+		if (em->block_start == EXTENT_MAP_DELALLOC)
+			last_end = min_t(u64, last_end, inode->i_size);
+
+		if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+			free_extent_map(em);
+			ret = -ENXIO;
+			break;
+		}
+		free_extent_map(em);
+		cond_resched();
+	}
+	if (!ret)
+		*offset = min(*offset, inode->i_size);
+out:
+	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+			     &cached_state, GFP_NOFS);
+	return ret;
+}
+
+static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
+{
+	struct inode *inode = file->f_mapping->host;
+	int ret;
+
+	mutex_lock(&inode->i_mutex);
+	switch (origin) {
+	case SEEK_END:
+	case SEEK_CUR:
+		offset = generic_file_llseek_unlocked(file, offset, origin);
+		goto out;
+	case SEEK_DATA:
+	case SEEK_HOLE:
+		ret = find_desired_extent(inode, &offset, origin);
+		if (ret) {
+			mutex_unlock(&inode->i_mutex);
+			return ret;
+		}
+	}
+
+	if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
+		return -EINVAL;
+	if (offset > inode->i_sb->s_maxbytes)
+		return -EINVAL;
+
+	/* Special lock needed here? */
+	if (offset != file->f_pos) {
+		file->f_pos = offset;
+		file->f_version = 0;
+	}
+out:
+	mutex_unlock(&inode->i_mutex);
+	return offset;
+}
+
 const struct file_operations btrfs_file_operations = {
-	.llseek		= generic_file_llseek,
+	.llseek		= btrfs_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read       = generic_file_aio_read,
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@ -4079,13 +4079,7 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 				   struct nameidata *nd)
 {
-	struct inode *inode;
-
-	inode = btrfs_lookup_dentry(dir, dentry);
-	if (IS_ERR(inode))
-		return ERR_CAST(inode);
-
-	return d_splice_alias(inode, dentry);
+	return d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
 }

 unsigned char btrfs_filetype_table[] = {
@ -4772,11 +4766,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	if (err) {
 		drop_inode = 1;
 	} else {
-		struct dentry *parent = dget_parent(dentry);
+		struct dentry *parent = dentry->d_parent;
 		err = btrfs_update_inode(trans, root, inode);
 		BUG_ON(err);
 		btrfs_log_new_name(trans, inode, NULL, parent);
-		dput(parent);
 	}

 	nr = trans->blocks_used;
@ -6900,7 +6893,7 @@ static int btrfs_getattr(struct vfsmount *mnt,
 {
 	struct inode *inode = dentry->d_inode;
 	generic_fillattr(inode, stat);
-	stat->dev = BTRFS_I(inode)->root->anon_super.s_dev;
+	stat->dev = BTRFS_I(inode)->root->anon_dev;
 	stat->blksize = PAGE_CACHE_SIZE;
 	stat->blocks = (inode_get_bytes(inode) +
 			BTRFS_I(inode)->delalloc_bytes) >> 9;
@ -7068,9 +7061,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	BUG_ON(ret);

 	if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
-		struct dentry *parent = dget_parent(new_dentry);
+		struct dentry *parent = new_dentry->d_parent;
 		btrfs_log_new_name(trans, old_inode, old_dir, parent);
-		dput(parent);
 		btrfs_end_log_trans(root);
 	}
 out_fail:
@ -7331,7 +7323,7 @@ static int btrfs_set_page_dirty(struct page *page)
 	return __set_page_dirty_nobuffers(page);
 }

-static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
+static int btrfs_permission(struct inode *inode, int mask)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;

@ -7339,7 +7331,7 @@ static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
 		return -EROFS;
 	if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
 		return -EACCES;
-	return generic_permission(inode, mask, flags, btrfs_check_acl);
+	return generic_permission(inode, mask);
 }

 static const struct inode_operations btrfs_dir_inode_operations = {
@ -7359,10 +7351,12 @@ static const struct inode_operations btrfs_dir_inode_operations = {
 	.listxattr	= btrfs_listxattr,
 	.removexattr	= btrfs_removexattr,
 	.permission	= btrfs_permission,
+	.check_acl	= btrfs_check_acl,
 };
 static const struct inode_operations btrfs_dir_ro_inode_operations = {
 	.lookup		= btrfs_lookup,
 	.permission	= btrfs_permission,
+	.check_acl	= btrfs_check_acl,
 };

 static const struct file_operations btrfs_dir_file_operations = {
@ -7431,6 +7425,7 @@ static const struct inode_operations btrfs_file_inode_operations = {
 	.removexattr	= btrfs_removexattr,
 	.permission	= btrfs_permission,
 	.fiemap		= btrfs_fiemap,
+	.check_acl	= btrfs_check_acl,
 };
 static const struct inode_operations btrfs_special_inode_operations = {
 	.getattr	= btrfs_getattr,
@ -7440,6 +7435,7 @@ static const struct inode_operations btrfs_special_inode_operations = {
 	.getxattr	= btrfs_getxattr,
 	.listxattr	= btrfs_listxattr,
 	.removexattr	= btrfs_removexattr,
+	.check_acl	= btrfs_check_acl,
 };
 static const struct inode_operations btrfs_symlink_inode_operations = {
 	.readlink	= generic_readlink,
@ -7451,6 +7447,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
 	.getxattr	= btrfs_getxattr,
 	.listxattr	= btrfs_listxattr,
 	.removexattr	= btrfs_removexattr,
+	.check_acl	= btrfs_check_acl,
 };

 const struct dentry_operations btrfs_dentry_operations = {
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@ -323,7 +323,7 @@ static noinline int create_subvol(struct btrfs_root *root,
 	struct btrfs_inode_item *inode_item;
 	struct extent_buffer *leaf;
 	struct btrfs_root *new_root;
-	struct dentry *parent = dget_parent(dentry);
+	struct dentry *parent = dentry->d_parent;
 	struct inode *dir;
 	int ret;
 	int err;
@ -332,10 +332,8 @@ static noinline int create_subvol(struct btrfs_root *root,
 	u64 index = 0;

 	ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
-	if (ret) {
-		dput(parent);
+	if (ret)
 		return ret;
-	}

 	dir = parent->d_inode;

@ -346,10 +344,8 @@ static noinline int create_subvol(struct btrfs_root *root,
 	 * 2 - dir items
 	 */
 	trans = btrfs_start_transaction(root, 6);
-	if (IS_ERR(trans)) {
-		dput(parent);
+	if (IS_ERR(trans))
 		return PTR_ERR(trans);
-	}

 	leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
 				      0, objectid, NULL, 0, 0, 0);
@ -439,7 +435,6 @@ static noinline int create_subvol(struct btrfs_root *root,

 	d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
 fail:
-	dput(parent);
 	if (async_transid) {
 		*async_transid = trans->transid;
 		err = btrfs_commit_transaction_async(trans, root, 1);
@ -456,7 +451,6 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
 			   bool readonly)
 {
 	struct inode *inode;
-	struct dentry *parent;
 	struct btrfs_pending_snapshot *pending_snapshot;
 	struct btrfs_trans_handle *trans;
 	int ret;
@ -504,9 +498,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
 	if (ret)
 		goto fail;

-	parent = dget_parent(dentry);
-	inode = btrfs_lookup_dentry(parent->d_inode, dentry);
-	dput(parent);
+	inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
 	if (IS_ERR(inode)) {
 		ret = PTR_ERR(inode);
 		goto fail;
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@ -129,8 +129,6 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
 	    !root->d_inode->i_op->mkdir ||
 	    !root->d_inode->i_op->setxattr ||
 	    !root->d_inode->i_op->getxattr ||
-	    !root->d_sb ||
-	    !root->d_sb->s_op ||
 	    !root->d_sb->s_op->statfs ||
 	    !root->d_sb->s_op->sync_fs)
 		goto error_unsupported;
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@ -1811,7 +1811,7 @@ static void sync_write_wait(struct inode *inode)
 	spin_unlock(&ci->i_unsafe_lock);
 }

-int ceph_fsync(struct file *file, int datasync)
+int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct inode *inode = file->f_mapping->host;
 	struct ceph_inode_info *ci = ceph_inode(inode);
@ -1822,9 +1822,10 @@ int ceph_fsync(struct file *file, int datasync)
 	dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
 	sync_write_wait(inode);

-	ret = filemap_write_and_wait(inode->i_mapping);
+	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
 	if (ret < 0)
 		return ret;
+	mutex_lock(&inode->i_mutex);

 	dirty = try_flush_caps(inode, NULL, &flush_tid);
 	dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
@ -1841,6 +1842,7 @@ int ceph_fsync(struct file *file, int datasync)
 	}

 	dout("fsync %p%s done\n", inode, datasync ? " datasync" : "");
+	mutex_unlock(&inode->i_mutex);
 	return ret;
 }

--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@ -252,7 +252,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		off = 1;
 	}
 	if (filp->f_pos == 1) {
-		ino_t ino = filp->f_dentry->d_parent->d_inode->i_ino;
+		ino_t ino = parent_ino(filp->f_dentry);
 		dout("readdir off 1 -> '..'\n");
 		if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
 			    ceph_translate_ino(inode->i_sb, ino),
@ -446,14 +446,19 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
 	loff_t retval;

 	mutex_lock(&inode->i_mutex);
+	retval = -EINVAL;
 	switch (origin) {
 	case SEEK_END:
 		offset += inode->i_size + 2;   /* FIXME */
 		break;
 	case SEEK_CUR:
 		offset += file->f_pos;
+	case SEEK_SET:
+		break;
+	default:
+		goto out;
 	}
-	retval = -EINVAL;
+
 	if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
 		if (offset != file->f_pos) {
 			file->f_pos = offset;
@ -477,6 +482,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
 		if (offset > old_offset)
 			fi->dir_release_count--;
 	}
+out:
 	mutex_unlock(&inode->i_mutex);
 	return retval;
 }
@ -566,7 +572,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 	/* open (but not create!) intent? */
 	if (nd &&
 	    (nd->flags & LOOKUP_OPEN) &&
-	    (nd->flags & LOOKUP_CONTINUE) == 0 && /* only open last component */
 	    !(nd->intent.open.flags & O_CREAT)) {
 		int mode = nd->intent.open.create_mode & ~current->fs->umask;
 		return ceph_lookup_open(dir, dentry, nd, mode, 1);
@ -1113,7 +1118,8 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 * an fsync() on a dir will wait for any uncommitted directory
 * operations to commit.
 */
-static int ceph_dir_fsync(struct file *file, int datasync)
+static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
+			  int datasync)
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
@ -1123,6 +1129,11 @@ static int ceph_dir_fsync(struct file *file, int datasync)
 	int ret = 0;

 	dout("dir_fsync %p\n", inode);
+	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (ret)
+		return ret;
+	mutex_lock(&inode->i_mutex);
+
 	spin_lock(&ci->i_unsafe_lock);
 	if (list_empty(head))
 		goto out;
@ -1156,6 +1167,8 @@ static int ceph_dir_fsync(struct file *file, int datasync)
 	} while (req->r_tid < last_tid);
 out:
 	spin_unlock(&ci->i_unsafe_lock);
+	mutex_unlock(&inode->i_mutex);
+
 	return ret;
 }

--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@ -226,7 +226,7 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
 	struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
 	struct ceph_mds_request *req;
 	int err;
-	int flags = nd->intent.open.flags - 1;  /* silly vfs! */
+	int flags = nd->intent.open.flags;

 	dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n",
 	     dentry, dentry->d_name.len, dentry->d_name.name, flags, mode);
@ -768,13 +768,16 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)

 	mutex_lock(&inode->i_mutex);
 	__ceph_do_pending_vmtruncate(inode);
-	switch (origin) {
-	case SEEK_END:
+	if (origin != SEEK_CUR || origin != SEEK_SET) {
 		ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
 		if (ret < 0) {
 			offset = ret;
 			goto out;
 		}
+	}
+
+	switch (origin) {
+	case SEEK_END:
 		offset += inode->i_size;
 		break;
 	case SEEK_CUR:
@ -790,6 +793,19 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
 		}
 		offset += file->f_pos;
 		break;
+	case SEEK_DATA:
+		if (offset >= inode->i_size) {
+			ret = -ENXIO;
+			goto out;
+		}
+		break;
+	case SEEK_HOLE:
+		if (offset >= inode->i_size) {
+			ret = -ENXIO;
+			goto out;
+		}
+		offset = inode->i_size;
+		break;
 	}

 	if (offset < 0 || offset > inode->i_sb->s_maxbytes) {
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@ -1795,17 +1795,17 @@ int ceph_do_getattr(struct inode *inode, int mask)
 * Check inode permissions.  We verify we have a valid value for
 * the AUTH cap, then call the generic handler.
 */
-int ceph_permission(struct inode *inode, int mask, unsigned int flags)
+int ceph_permission(struct inode *inode, int mask)
 {
 	int err;

-	if (flags & IPERM_FLAG_RCU)
+	if (mask & MAY_NOT_BLOCK)
 		return -ECHILD;

 	err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);

 	if (!err)
-		err = generic_permission(inode, mask, flags, NULL);
+		err = generic_permission(inode, mask);
 	return err;
 }

--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@ -692,7 +692,7 @@ extern void ceph_queue_invalidate(struct inode *inode);
 extern void ceph_queue_writeback(struct inode *inode);

 extern int ceph_do_getattr(struct inode *inode, int mask);
-extern int ceph_permission(struct inode *inode, int mask, unsigned int flags);
+extern int ceph_permission(struct inode *inode, int mask);
 extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
 extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
 			struct kstat *stat);
@ -728,7 +728,8 @@ extern void ceph_put_cap(struct ceph_mds_client *mdsc,

 extern void ceph_queue_caps_release(struct inode *inode);
 extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
-extern int ceph_fsync(struct file *file, int datasync);
+extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
+		      int datasync);
 extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
 				    struct ceph_mds_session *session);
 extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@ -224,7 +224,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }

-static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
+static int cifs_permission(struct inode *inode, int mask)
 {
 	struct cifs_sb_info *cifs_sb;

@ -239,7 +239,7 @@ static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
 		on the client (above and beyond ACL on servers) for
 		servers which do not support setting and viewing mode bits,
 		so allowing client to check permissions is useful */
-		return generic_permission(inode, mask, flags, NULL);
+		return generic_permission(inode, mask);
 }

 static struct kmem_cache *cifs_inode_cachep;
@ -704,8 +704,11 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,

 static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
 {
-	/* origin == SEEK_END => we must revalidate the cached file length */
-	if (origin == SEEK_END) {
+	/*
+	 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
+	 * the cached file length
+	 */
+	if (origin != SEEK_SET || origin != SEEK_CUR) {
 		int rc;
 		struct inode *inode = file->f_path.dentry->d_inode;

--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@ -91,8 +91,8 @@ extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
 extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
 				  unsigned long nr_segs, loff_t pos);
 extern int cifs_lock(struct file *, int, struct file_lock *);
-extern int cifs_fsync(struct file *, int);
-extern int cifs_strict_fsync(struct file *, int);
+extern int cifs_fsync(struct file *, loff_t, loff_t, int);
+extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int);
 extern int cifs_flush(struct file *, fl_owner_t id);
 extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
 extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *);
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@ -320,9 +320,10 @@ cifs_echo_request(struct work_struct *work)
 }

 static int
-cifs_demultiplex_thread(struct TCP_Server_Info *server)
+cifs_demultiplex_thread(void *p)
 {
 	int length;
+	struct TCP_Server_Info *server = p;
 	unsigned int pdu_length, total_read;
 	struct smb_hdr *smb_buffer = NULL;
 	struct smb_hdr *bigbuf = NULL;
@ -1791,7 +1792,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 	 * this will succeed. No need for try_module_get().
 	 */
 	__module_get(THIS_MODULE);
-	tcp_ses->tsk = kthread_run((void *)(void *)cifs_demultiplex_thread,
+	tcp_ses->tsk = kthread_run(cifs_demultiplex_thread,
 				  tcp_ses, "cifsd");
 	if (IS_ERR(tcp_ses->tsk)) {
 		rc = PTR_ERR(tcp_ses->tsk);
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@ -179,7 +179,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 	if (oplockEnabled)
 		oplock = REQ_OPLOCK;

-	if (nd && (nd->flags & LOOKUP_OPEN))
+	if (nd)
 		oflags = nd->intent.open.file->f_flags;
 	else
 		oflags = O_RDONLY | O_CREAT;
@ -214,7 +214,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 		   which should be rare for path not covered on files) */
 	}

-	if (nd && (nd->flags & LOOKUP_OPEN)) {
+	if (nd) {
 		/* if the file is going to stay open, then we
 		   need to set the desired access properly */
 		desiredAccess = 0;
@ -328,7 +328,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 	else
 		cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);

-	if (newinode && nd && (nd->flags & LOOKUP_OPEN)) {
+	if (newinode && nd) {
 		struct cifsFileInfo *pfile_info;
 		struct file *filp;

@ -568,7 +568,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 	 * reduction in network traffic in the other paths.
 	 */
 	if (pTcon->unix_ext) {
-		if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
+		if (nd && !(nd->flags & LOOKUP_DIRECTORY) &&
 		     (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
 		     (nd->intent.open.file->f_flags & O_CREAT)) {
 			rc = cifs_posix_open(full_path, &newInode,
@ -663,10 +663,8 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
 	 * case sensitive name which is specified by user if this is
 	 * for creation.
 	 */
-	if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
 	if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
 		return 0;
-	}

 	if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled)
 		return 0;
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@ -1401,7 +1401,8 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
 	return rc;
 }

-int cifs_strict_fsync(struct file *file, int datasync)
+int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
+		      int datasync)
 {
 	int xid;
 	int rc = 0;
@ -1410,6 +1411,11 @@ int cifs_strict_fsync(struct file *file, int datasync)
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);

+	rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (rc)
+		return rc;
+	mutex_lock(&inode->i_mutex);
+
 	xid = GetXid();

 	cFYI(1, "Sync file - name: %s datasync: 0x%x",
@ -1428,16 +1434,23 @@ int cifs_strict_fsync(struct file *file, int datasync)
 		rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);

 	FreeXid(xid);
+	mutex_unlock(&inode->i_mutex);
 	return rc;
 }

-int cifs_fsync(struct file *file, int datasync)
+int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	int xid;
 	int rc = 0;
 	struct cifs_tcon *tcon;
 	struct cifsFileInfo *smbfile = file->private_data;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	struct inode *inode = file->f_mapping->host;
+
+	rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (rc)
+		return rc;
+	mutex_lock(&inode->i_mutex);

 	xid = GetXid();

@ -1449,6 +1462,7 @@ int cifs_fsync(struct file *file, int datasync)
 		rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);

 	FreeXid(xid);
+	mutex_unlock(&inode->i_mutex);
 	return rc;
 }

--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@ -796,7 +796,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 		file->f_pos++;
 	case 1:
 		if (filldir(direntry, "..", 2, file->f_pos,
-		     file->f_path.dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) {
+		     parent_ino(file->f_path.dentry), DT_DIR) < 0) {
 			cERROR(1, "Filldir for parent dir failed");
 			rc = -ENOMEM;
 			break;
--- a/fs/coda/coda_int.h
+++ b/fs/coda/coda_int.h
@ -11,7 +11,7 @@ extern int coda_fake_statfs;

 void coda_destroy_inodecache(void);
 int coda_init_inodecache(void);
-int coda_fsync(struct file *coda_file, int datasync);
+int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync);
 void coda_sysctl_init(void);
 void coda_sysctl_clean(void);

--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@ -39,7 +39,7 @@ extern const struct file_operations coda_ioctl_operations;
 /* operations shared over more than one file */
 int coda_open(struct inode *i, struct file *f);
 int coda_release(struct inode *i, struct file *f);
-int coda_permission(struct inode *inode, int mask, unsigned int flags);
+int coda_permission(struct inode *inode, int mask);
 int coda_revalidate_inode(struct dentry *);
 int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 int coda_setattr(struct dentry *, struct iattr *);
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@ -132,11 +132,11 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc
 }


-int coda_permission(struct inode *inode, int mask, unsigned int flags)
+int coda_permission(struct inode *inode, int mask)
 {
 	int error;

-	if (flags & IPERM_FLAG_RCU)
+	if (mask & MAY_NOT_BLOCK)
 		return -ECHILD;

 	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
@ -449,8 +449,7 @@ static int coda_venus_readdir(struct file *coda_file, void *buf,
 	struct file *host_file;
 	struct dentry *de;
 	struct venus_dirent *vdir;
-	unsigned long vdir_size =
-	    (unsigned long)(&((struct venus_dirent *)0)->d_name);
+	unsigned long vdir_size = offsetof(struct venus_dirent, d_name);
 	unsigned int type;
 	struct qstr name;
 	ino_t ino;
@ -474,7 +473,7 @@ static int coda_venus_readdir(struct file *coda_file, void *buf,
 		coda_file->f_pos++;
 	}
 	if (coda_file->f_pos == 1) {
-		ret = filldir(buf, "..", 2, 1, de->d_parent->d_inode->i_ino, DT_DIR);
+		ret = filldir(buf, "..", 2, 1, parent_ino(de), DT_DIR);
 		if (ret < 0)
 			goto out;
 		result++;
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@ -199,7 +199,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
 	return 0;
 }

-int coda_fsync(struct file *coda_file, int datasync)
+int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
 {
 	struct file *host_file;
 	struct inode *coda_inode = coda_file->f_path.dentry->d_inode;
@ -210,6 +210,11 @@ int coda_fsync(struct file *coda_file, int datasync)
 	      S_ISLNK(coda_inode->i_mode)))
 		return -EINVAL;

+	err = filemap_write_and_wait_range(coda_inode->i_mapping, start, end);
+	if (err)
+		return err;
+	mutex_lock(&coda_inode->i_mutex);
+
 	cfi = CODA_FTOC(coda_file);
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
@ -217,6 +222,7 @@ int coda_fsync(struct file *coda_file, int datasync)
 	err = vfs_fsync(host_file, datasync);
 	if (!err && !datasync)
 		err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
+	mutex_unlock(&coda_inode->i_mutex);

 	return err;
 }
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@ -24,7 +24,7 @@
 #include "coda_linux.h"

 /* pioctl ops */
-static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags);
+static int coda_ioctl_permission(struct inode *inode, int mask);
 static long coda_pioctl(struct file *filp, unsigned int cmd,
 			unsigned long user_data);

@ -41,7 +41,7 @@ const struct file_operations coda_ioctl_operations = {
 };

 /* the coda pioctl inode ops */
-static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)
+static int coda_ioctl_permission(struct inode *inode, int mask)
 {
 	return (mask & MAY_EXEC) ? -EACCES : 0;
 }
--- a/fs/dcache.c
+++ b/fs/dcache.c
@ -343,6 +343,24 @@ void d_drop(struct dentry *dentry)
 }
 EXPORT_SYMBOL(d_drop);

+/*
+ * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag
+ * @dentry: dentry to drop
+ *
+ * This is called when we do a lookup on a placeholder dentry that needed to be
+ * looked up.  The dentry should have been hashed in order for it to be found by
+ * the lookup code, but now needs to be unhashed while we do the actual lookup
+ * and clear the DCACHE_NEED_LOOKUP flag.
+ */
+void d_clear_need_lookup(struct dentry *dentry)
+{
+	spin_lock(&dentry->d_lock);
+	__d_drop(dentry);
+	dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
+	spin_unlock(&dentry->d_lock);
+}
+EXPORT_SYMBOL(d_clear_need_lookup);
+
 /*
 * Finish off a dentry we've decided to kill.
 * dentry->d_lock must be held, returns with it unlocked.
@ -432,7 +450,12 @@ void dput(struct dentry *dentry)
 	if (d_unhashed(dentry))
 		goto kill_it;

-	/* Otherwise leave it cached and ensure it's on the LRU */
+	/*
+	 * If this dentry needs lookup, don't set the referenced flag so that it
+	 * is more likely to be cleaned up by the dcache shrinker in case of
+	 * memory pressure.
+	 */
+	if (!d_need_lookup(dentry))
 		dentry->d_flags |= DCACHE_REFERENCED;
 	dentry_lru_add(dentry);

@ -526,10 +549,6 @@ struct dentry *dget_parent(struct dentry *dentry)
 	 */
 	rcu_read_lock();
 	ret = dentry->d_parent;
-	if (!ret) {
-		rcu_read_unlock();
-		goto out;
-	}
 	spin_lock(&ret->d_lock);
 	if (unlikely(ret != dentry->d_parent)) {
 		spin_unlock(&ret->d_lock);
@ -540,7 +559,6 @@ struct dentry *dget_parent(struct dentry *dentry)
 	BUG_ON(!ret->d_count);
 	ret->d_count++;
 	spin_unlock(&ret->d_lock);
-out:
 	return ret;
 }
 EXPORT_SYMBOL(dget_parent);
@ -720,13 +738,11 @@ static void shrink_dentry_list(struct list_head *list)
 *
 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
 */
-static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
+static void __shrink_dcache_sb(struct super_block *sb, int count, int flags)
 {
-	/* called from prune_dcache() and shrink_dcache_parent() */
 	struct dentry *dentry;
 	LIST_HEAD(referenced);
 	LIST_HEAD(tmp);
-	int cnt = *count;

 relock:
 	spin_lock(&dcache_lru_lock);
@ -754,7 +770,7 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
 		} else {
 			list_move_tail(&dentry->d_lru, &tmp);
 			spin_unlock(&dentry->d_lock);
-			if (!--cnt)
+			if (!--count)
 				break;
 		}
 		cond_resched_lock(&dcache_lru_lock);
@ -764,83 +780,22 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
 	spin_unlock(&dcache_lru_lock);

 	shrink_dentry_list(&tmp);
-
-	*count = cnt;
 }

 /**
- * prune_dcache - shrink the dcache
- * @count: number of entries to try to free
+ * prune_dcache_sb - shrink the dcache
+ * @nr_to_scan: number of entries to try to free
 *
- * Shrink the dcache. This is done when we need more memory, or simply when we
- * need to unmount something (at which point we need to unuse all dentries).
+ * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
+ * done when we need more memory an called from the superblock shrinker
+ * function.
 *
- * This function may fail to free any resources if all the dentries are in use.
+ * This function may fail to free any resources if all the dentries are in
+ * use.
 */
-static void prune_dcache(int count)
+void prune_dcache_sb(struct super_block *sb, int nr_to_scan)
 {
-	struct super_block *sb, *p = NULL;
-	int w_count;
-	int unused = dentry_stat.nr_unused;
-	int prune_ratio;
-	int pruned;
-
-	if (unused == 0 || count == 0)
-		return;
-	if (count >= unused)
-		prune_ratio = 1;
-	else
-		prune_ratio = unused / count;
-	spin_lock(&sb_lock);
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (list_empty(&sb->s_instances))
-			continue;
-		if (sb->s_nr_dentry_unused == 0)
-			continue;
-		sb->s_count++;
-		/* Now, we reclaim unused dentrins with fairness.
-		 * We reclaim them same percentage from each superblock.
-		 * We calculate number of dentries to scan on this sb
-		 * as follows, but the implementation is arranged to avoid
-		 * overflows:
-		 * number of dentries to scan on this sb =
-		 * count * (number of dentries on this sb /
-		 * number of dentries in the machine)
-		 */
-		spin_unlock(&sb_lock);
-		if (prune_ratio != 1)
-			w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
-		else
-			w_count = sb->s_nr_dentry_unused;
-		pruned = w_count;
-		/*
-		 * We need to be sure this filesystem isn't being unmounted,
-		 * otherwise we could race with generic_shutdown_super(), and
-		 * end up holding a reference to an inode while the filesystem
-		 * is unmounted.  So we try to get s_umount, and make sure
-		 * s_root isn't NULL.
-		 */
-		if (down_read_trylock(&sb->s_umount)) {
-			if ((sb->s_root != NULL) &&
-			    (!list_empty(&sb->s_dentry_lru))) {
-				__shrink_dcache_sb(sb, &w_count,
-						DCACHE_REFERENCED);
-				pruned -= w_count;
-			}
-			up_read(&sb->s_umount);
-		}
-		spin_lock(&sb_lock);
-		if (p)
-			__put_super(p);
-		count -= pruned;
-		p = sb;
-		/* more work left to do? */
-		if (count <= 0)
-			break;
-	}
-	if (p)
-		__put_super(p);
-	spin_unlock(&sb_lock);
+	__shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED);
 }

 /**
@ -1215,45 +1170,13 @@ void shrink_dcache_parent(struct dentry * parent)
 	int found;

 	while ((found = select_parent(parent)) != 0)
-		__shrink_dcache_sb(sb, &found, 0);
+		__shrink_dcache_sb(sb, found, 0);
 }
 EXPORT_SYMBOL(shrink_dcache_parent);

-/*
- * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
- *
- * We need to avoid reentering the filesystem if the caller is performing a
- * GFP_NOFS allocation attempt.  One example deadlock is:
- *
- * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
- * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
- * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
- *
- * In this case we return -1 to tell the caller that we baled.
- */
-static int shrink_dcache_memory(struct shrinker *shrink,
-				struct shrink_control *sc)
-{
-	int nr = sc->nr_to_scan;
-	gfp_t gfp_mask = sc->gfp_mask;
-
-	if (nr) {
-		if (!(gfp_mask & __GFP_FS))
-			return -1;
-		prune_dcache(nr);
-	}
-
-	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
-}
-
-static struct shrinker dcache_shrinker = {
-	.shrink = shrink_dcache_memory,
-	.seeks = DEFAULT_SEEKS,
-};
-
 /**
- * d_alloc	-	allocate a dcache entry
- * @parent: parent of entry to allocate
+ * __d_alloc	-	allocate a dcache entry
+ * @sb: filesystem it will belong to
 * @name: qstr of the name
 *
 * Allocates a dentry. It returns %NULL if there is insufficient memory
@ -1261,7 +1184,7 @@ static struct shrinker dcache_shrinker = {
 * copied and the copy passed in may be reused after this call.
 */
 
-struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
+struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 {
 	struct dentry *dentry;
 	char *dname;
@ -1291,8 +1214,8 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	spin_lock_init(&dentry->d_lock);
 	seqcount_init(&dentry->d_seq);
 	dentry->d_inode = NULL;
-	dentry->d_parent = NULL;
-	dentry->d_sb = NULL;
+	dentry->d_parent = dentry;
+	dentry->d_sb = sb;
 	dentry->d_op = NULL;
 	dentry->d_fsdata = NULL;
 	INIT_HLIST_BL_NODE(&dentry->d_hash);
@ -1300,8 +1223,28 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	INIT_LIST_HEAD(&dentry->d_subdirs);
 	INIT_LIST_HEAD(&dentry->d_alias);
 	INIT_LIST_HEAD(&dentry->d_u.d_child);
+	d_set_d_op(dentry, dentry->d_sb->s_d_op);
+
+	this_cpu_inc(nr_dentry);
+
+	return dentry;
+}
+
+/**
+ * d_alloc	-	allocate a dcache entry
+ * @parent: parent of entry to allocate
+ * @name: qstr of the name
+ *
+ * Allocates a dentry. It returns %NULL if there is insufficient memory
+ * available. On a success the dentry is returned. The name passed in is
+ * copied and the copy passed in may be reused after this call.
+ */
+struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
+{
+	struct dentry *dentry = __d_alloc(parent->d_sb, name);
+	if (!dentry)
+		return NULL;

-	if (parent) {
 	spin_lock(&parent->d_lock);
 	/*
 	 * don't need child lock because it is not subject
@ -1309,13 +1252,8 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	 */
 	__dget_dlock(parent);
 	dentry->d_parent = parent;
-		dentry->d_sb = parent->d_sb;
-		d_set_d_op(dentry, dentry->d_sb->s_d_op);
 	list_add(&dentry->d_u.d_child, &parent->d_subdirs);
 	spin_unlock(&parent->d_lock);
-	}
-
-	this_cpu_inc(nr_dentry);

 	return dentry;
 }
@ -1323,13 +1261,9 @@ EXPORT_SYMBOL(d_alloc);

 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
 {
-	struct dentry *dentry = d_alloc(NULL, name);
-	if (dentry) {
-		dentry->d_sb = sb;
-		d_set_d_op(dentry, dentry->d_sb->s_d_op);
-		dentry->d_parent = dentry;
+	struct dentry *dentry = __d_alloc(sb, name);
+	if (dentry)
 		dentry->d_flags |= DCACHE_DISCONNECTED;
-	}
 	return dentry;
 }
 EXPORT_SYMBOL(d_alloc_pseudo);
@ -1499,14 +1433,10 @@ struct dentry * d_alloc_root(struct inode * root_inode)
 	if (root_inode) {
 		static const struct qstr name = { .name = "/", .len = 1 };

-		res = d_alloc(NULL, &name);
-		if (res) {
-			res->d_sb = root_inode->i_sb;
-			d_set_d_op(res, res->d_sb->s_d_op);
-			res->d_parent = res;
+		res = __d_alloc(root_inode->i_sb, &name);
+		if (res)
 			d_instantiate(res, root_inode);
 	}
-	}
 	return res;
 }
 EXPORT_SYMBOL(d_alloc_root);
@ -1566,13 +1496,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	if (res)
 		goto out_iput;

-	tmp = d_alloc(NULL, &anonstring);
+	tmp = __d_alloc(inode->i_sb, &anonstring);
 	if (!tmp) {
 		res = ERR_PTR(-ENOMEM);
 		goto out_iput;
 	}
-	tmp->d_parent = tmp; /* make sure dput doesn't croak */
-

 	spin_lock(&inode->i_lock);
 	res = __d_find_any_alias(inode);
@ -1584,8 +1512,6 @@ struct dentry *d_obtain_alias(struct inode *inode)

 	/* attach a disconnected dentry */
 	spin_lock(&tmp->d_lock);
-	tmp->d_sb = inode->i_sb;
-	d_set_d_op(tmp, tmp->d_sb->s_d_op);
 	tmp->d_inode = inode;
 	tmp->d_flags |= DCACHE_DISCONNECTED;
 	list_add(&tmp->d_alias, &inode->i_dentry);
@ -1626,6 +1552,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 {
 	struct dentry *new = NULL;

+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+
 	if (inode && S_ISDIR(inode->i_mode)) {
 		spin_lock(&inode->i_lock);
 		new = __d_find_alias(inode, 1);
@ -1707,30 +1636,23 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 		return found;
 	}

+	/*
+	 * We are going to instantiate this dentry, unhash it and clear the
+	 * lookup flag so we can do that.
+	 */
+	if (unlikely(d_need_lookup(found)))
+		d_clear_need_lookup(found);
+
 	/*
 	 * Negative dentry: instantiate it unless the inode is a directory and
 	 * already has a dentry.
 	 */
-	spin_lock(&inode->i_lock);
-	if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
-		__d_instantiate(found, inode);
-		spin_unlock(&inode->i_lock);
-		security_d_instantiate(found, inode);
-		return found;
-	}
-
-	/*
-	 * In case a directory already has a (disconnected) entry grab a
-	 * reference to it, move it in place and use it.
-	 */
-	new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
-	__dget(new);
-	spin_unlock(&inode->i_lock);
-	security_d_instantiate(found, inode);
-	d_move(new, found);
-	iput(inode);
+	new = d_splice_alias(inode, found);
+	if (new) {
 		dput(found);
-	return new;
+		found = new;
+	}
+	return found;

 err_out:
 	iput(inode);
@ -3046,8 +2968,6 @@ static void __init dcache_init(void)
 	dentry_cache = KMEM_CACHE(dentry,
 		SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);

-	register_shrinker(&dcache_shrinker);
-
 	/* Hash may have been set up in dcache_init_early */
 	if (!hashdist)
 		return;
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@ -135,6 +135,50 @@ struct dio {
 	struct page *pages[DIO_PAGES];	/* page buffer */
 };

+static void __inode_dio_wait(struct inode *inode)
+{
+	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
+	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
+
+	do {
+		prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
+		if (atomic_read(&inode->i_dio_count))
+			schedule();
+	} while (atomic_read(&inode->i_dio_count));
+	finish_wait(wq, &q.wait);
+}
+
+/**
+ * inode_dio_wait - wait for outstanding DIO requests to finish
+ * @inode: inode to wait for
+ *
+ * Waits for all pending direct I/O requests to finish so that we can
+ * proceed with a truncate or equivalent operation.
+ *
+ * Must be called under a lock that serializes taking new references
+ * to i_dio_count, usually by inode->i_mutex.
+ */
+void inode_dio_wait(struct inode *inode)
+{
+	if (atomic_read(&inode->i_dio_count))
+		__inode_dio_wait(inode);
+}
+EXPORT_SYMBOL_GPL(inode_dio_wait);
+
+/*
+ * inode_dio_done - signal finish of a direct I/O requests
+ * @inode: inode the direct I/O happens on
+ *
+ * This is called once we've finished processing a direct I/O request,
+ * and is used to wake up callers waiting for direct I/O to be quiesced.
+ */
+void inode_dio_done(struct inode *inode)
+{
+	if (atomic_dec_and_test(&inode->i_dio_count))
+		wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
+}
+EXPORT_SYMBOL_GPL(inode_dio_done);
+
 /*
 * How many pages are in the queue?
 */
@ -249,14 +293,12 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
 	if (dio->end_io && dio->result) {
 		dio->end_io(dio->iocb, offset, transferred,
 			    dio->map_bh.b_private, ret, is_async);
-	} else if (is_async) {
+	} else {
+		if (is_async)
 			aio_complete(dio->iocb, ret, 0);
+		inode_dio_done(dio->inode);
 	}

-	if (dio->flags & DIO_LOCKING)
-		/* lockdep: non-owner release */
-		up_read_non_owner(&dio->inode->i_alloc_sem);
-
 	return ret;
 }

@ -980,9 +1022,6 @@ static int do_direct_IO(struct dio *dio)
 	return ret;
 }

-/*
- * Releases both i_mutex and i_alloc_sem
- */
 static ssize_t
 direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 
 	const struct iovec *iov, loff_t offset, unsigned long nr_segs, 
@ -1146,15 +1185,16 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 *    For writes this function is called under i_mutex and returns with
 *    i_mutex held, for reads, i_mutex is not held on entry, but it is
 *    taken and dropped again before returning.
- *    For reads and writes i_alloc_sem is taken in shared mode and released
- *    on I/O completion (which may happen asynchronously after returning to
- *    the caller).
- *
 *  - if the flags value does NOT contain DIO_LOCKING we don't use any
 *    internal locking but rather rely on the filesystem to synchronize
 *    direct I/O reads/writes versus each other and truncate.
- *    For reads and writes both i_mutex and i_alloc_sem are not held on
- *    entry and are never taken.
+ *
+ * To help with locking against truncate we incremented the i_dio_count
+ * counter before starting direct I/O, and decrement it once we are done.
+ * Truncate can wait for it to reach zero to provide exclusion.  It is
+ * expected that filesystem provide exclusion between new direct I/O
+ * and truncates.  For DIO_LOCKING filesystems this is done by i_mutex,
+ * but other filesystems need to take care of this on their own.
 */
 ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
@ -1200,6 +1240,10 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 		}
 	}

+	/* watch out for a 0 len io from a tricksy fs */
+	if (rw == READ && end == offset)
+		return 0;
+
 	dio = kmalloc(sizeof(*dio), GFP_KERNEL);
 	retval = -ENOMEM;
 	if (!dio)
@ -1213,8 +1257,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,

 	dio->flags = flags;
 	if (dio->flags & DIO_LOCKING) {
-		/* watch out for a 0 len io from a tricksy fs */
-		if (rw == READ && end > offset) {
+		if (rw == READ) {
 			struct address_space *mapping =
 					iocb->ki_filp->f_mapping;

@ -1229,13 +1272,12 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 				goto out;
 			}
 		}
+	}

 	/*
-		 * Will be released at I/O completion, possibly in a
-		 * different thread.
+	 * Will be decremented at I/O completion time.
 	 */
-		down_read_non_owner(&inode->i_alloc_sem);
-	}
+	atomic_inc(&inode->i_dio_count);

 	/*
 	 * For file extending writes updating i_size before data
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@ -270,14 +270,15 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
 }

 static int
-ecryptfs_fsync(struct file *file, int datasync)
+ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	int rc = 0;

-	rc = generic_file_fsync(file, datasync);
+	rc = generic_file_fsync(file, start, end, datasync);
 	if (rc)
 		goto out;
-	rc = vfs_fsync(ecryptfs_file_to_lower(file), datasync);
+	rc = vfs_fsync_range(ecryptfs_file_to_lower(file), start, end,
+			     datasync);
 out:
 	return rc;
 }
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@ -147,7 +147,6 @@ static int ecryptfs_interpose(struct dentry *lower_dentry,
 * @lower_dir_inode: inode of the parent in the lower fs of the new file
 * @dentry: New file's dentry
 * @mode: The mode of the new file
- * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
 *
 * Creates the file in the lower file system.
 *
@ -155,31 +154,10 @@ static int ecryptfs_interpose(struct dentry *lower_dentry,
 */
 static int
 ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
-				struct dentry *dentry, int mode,
-				struct nameidata *nd)
+				struct dentry *dentry, int mode)
 {
 	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
-	struct dentry *dentry_save;
-	struct vfsmount *vfsmount_save;
-	unsigned int flags_save;
-	int rc;
-
-	if (nd) {
-		dentry_save = nd->path.dentry;
-		vfsmount_save = nd->path.mnt;
-		flags_save = nd->flags;
-		nd->path.dentry = lower_dentry;
-		nd->path.mnt = lower_mnt;
-		nd->flags &= ~LOOKUP_OPEN;
-	}
-	rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
-	if (nd) {
-		nd->path.dentry = dentry_save;
-		nd->path.mnt = vfsmount_save;
-		nd->flags = flags_save;
-	}
-	return rc;
+	return vfs_create(lower_dir_inode, lower_dentry, mode, NULL);
 }

 /**
@ -197,8 +175,7 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
 */
 static int
 ecryptfs_do_create(struct inode *directory_inode,
-		   struct dentry *ecryptfs_dentry, int mode,
-		   struct nameidata *nd)
+		   struct dentry *ecryptfs_dentry, int mode)
 {
 	int rc;
 	struct dentry *lower_dentry;
@ -213,7 +190,7 @@ ecryptfs_do_create(struct inode *directory_inode,
 		goto out;
 	}
 	rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
-					     ecryptfs_dentry, mode, nd);
+					     ecryptfs_dentry, mode);
 	if (rc) {
 		printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
 		       "rc = [%d]\n", __func__, rc);
@ -294,7 +271,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
 	int rc;

 	/* ecryptfs_do_create() calls ecryptfs_interpose() */
-	rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd);
+	rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode);
 	if (unlikely(rc)) {
 		ecryptfs_printk(KERN_WARNING, "Failed to create file in"
 				"lower filesystem\n");
@ -942,10 +919,8 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 }

 static int
-ecryptfs_permission(struct inode *inode, int mask, unsigned int flags)
+ecryptfs_permission(struct inode *inode, int mask)
 {
-	if (flags & IPERM_FLAG_RCU)
-		return -ECHILD;
 	return inode_permission(ecryptfs_inode_to_lower(inode), mask);
 }

--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@ -63,11 +63,8 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei
 	struct inode *inode = NULL;

 	inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len);
-	if (inodenum) {
+	if (inodenum)
 		inode = efs_iget(dir->i_sb, inodenum);
-		if (IS_ERR(inode))
-			return ERR_CAST(inode);
-	}

 	return d_splice_alias(inode, dentry);
 }
--- a/fs/exec.c
+++ b/fs/exec.c
@ -1114,6 +1114,13 @@ int flush_old_exec(struct linux_binprm * bprm)
 }
 EXPORT_SYMBOL(flush_old_exec);

+void would_dump(struct linux_binprm *bprm, struct file *file)
+{
+	if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0)
+		bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+}
+EXPORT_SYMBOL(would_dump);
+
 void setup_new_exec(struct linux_binprm * bprm)
 {
 	int i, ch;
@ -1153,8 +1160,9 @@ void setup_new_exec(struct linux_binprm * bprm)
 	if (bprm->cred->uid != current_euid() ||
 	    bprm->cred->gid != current_egid()) {
 		current->pdeath_signal = 0;
-	} else if (file_permission(bprm->file, MAY_READ) ||
-		   bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) {
+	} else {
+		would_dump(bprm, bprm->file);
+		if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
 			set_dumpable(current->mm, suid_dumpable);
 	}

--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@ -42,11 +42,19 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
 *   Note, in exofs all metadata is written as part of inode, regardless.
 *   The writeout is synchronous
 */
-static int exofs_file_fsync(struct file *filp, int datasync)
+static int exofs_file_fsync(struct file *filp, loff_t start, loff_t end,
+			    int datasync)
 {
+	struct inode *inode = filp->f_mapping->host;
 	int ret;

+	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (ret)
+		return ret;
+
+	mutex_lock(&inode->i_mutex);
 	ret = sync_inode_metadata(filp->f_mapping->host, 1);
+	mutex_unlock(&inode->i_mutex);
 	return ret;
 }

--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@ -55,12 +55,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
 		return ERR_PTR(-ENAMETOOLONG);

 	ino = exofs_inode_by_name(dir, dentry);
-	inode = NULL;
-	if (ino) {
-		inode = exofs_iget(dir->i_sb, ino);
-		if (IS_ERR(inode))
-			return ERR_CAST(inode);
-	}
+	inode = ino ? exofs_iget(dir->i_sb, ino) : NULL;
 	return d_splice_alias(inode, dentry);
 }

--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@ -232,11 +232,11 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 }

 int
-ext2_check_acl(struct inode *inode, int mask, unsigned int flags)
+ext2_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl;

-	if (flags & IPERM_FLAG_RCU) {
+	if (mask & MAY_NOT_BLOCK) {
 		if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
 			return -ECHILD;
 		return -EAGAIN;
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@ -54,7 +54,7 @@ static inline int ext2_acl_count(size_t size)
 #ifdef CONFIG_EXT2_FS_POSIX_ACL

 /* acl.c */
-extern int ext2_check_acl (struct inode *, int, unsigned int);
+extern int ext2_check_acl (struct inode *, int);
 extern int ext2_acl_chmod (struct inode *);
 extern int ext2_init_acl (struct inode *, struct inode *);

--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@ -150,7 +150,8 @@ extern void ext2_write_super (struct super_block *);
 extern const struct file_operations ext2_dir_operations;

 /* file.c */
-extern int ext2_fsync(struct file *file, int datasync);
+extern int ext2_fsync(struct file *file, loff_t start, loff_t end,
+		      int datasync);
 extern const struct inode_operations ext2_file_inode_operations;
 extern const struct file_operations ext2_file_operations;
 extern const struct file_operations ext2_xip_file_operations;
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@ -40,13 +40,13 @@ static int ext2_release_file (struct inode * inode, struct file * filp)
 	return 0;
 }

-int ext2_fsync(struct file *file, int datasync)
+int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	int ret;
 	struct super_block *sb = file->f_mapping->host->i_sb;
 	struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;

-	ret = generic_file_fsync(file, datasync);
+	ret = generic_file_fsync(file, start, end, datasync);
 	if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) {
 		/* We don't really know where the IO error happened... */
 		ext2_error(sb, __func__,
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@ -843,8 +843,8 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	struct inode *inode = mapping->host;
 	ssize_t ret;

-	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
-				iov, offset, nr_segs, ext2_get_block, NULL);
+	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+				 ext2_get_block);
 	if (ret < 0 && (rw & WRITE))
 		ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
 	return ret;
@ -1184,6 +1184,8 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 		return -EPERM;

+	inode_dio_wait(inode);
+
 	if (mapping_is_xip(inode->i_mapping))
 		error = xip_truncate_page(inode->i_mapping, newsize);
 	else if (test_opt(inode->i_sb, NOBH))
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@ -67,15 +67,11 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
 	inode = NULL;
 	if (ino) {
 		inode = ext2_iget(dir->i_sb, ino);
-		if (IS_ERR(inode)) {
-			if (PTR_ERR(inode) == -ESTALE) {
+		if (inode == ERR_PTR(-ESTALE)) {
 			ext2_error(dir->i_sb, __func__,
 					"deleted inode referenced: %lu",
 					(unsigned long) ino);
 			return ERR_PTR(-EIO);
-			} else {
-				return ERR_CAST(inode);
-			}
 		}
 	}
 	return d_splice_alias(inode, dentry);
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@ -240,11 +240,11 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 }

 int
-ext3_check_acl(struct inode *inode, int mask, unsigned int flags)
+ext3_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl;

-	if (flags & IPERM_FLAG_RCU) {
+	if (mask & MAY_NOT_BLOCK) {
 		if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
 			return -ECHILD;
 		return -EAGAIN;
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@ -54,7 +54,7 @@ static inline int ext3_acl_count(size_t size)
 #ifdef CONFIG_EXT3_FS_POSIX_ACL

 /* acl.c */
-extern int ext3_check_acl (struct inode *, int, unsigned int);
+extern int ext3_check_acl (struct inode *, int);
 extern int ext3_acl_chmod (struct inode *);
 extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);

--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@ -43,7 +43,7 @@
 * inode to disk.
 */

-int ext3_sync_file(struct file *file, int datasync)
+int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct inode *inode = file->f_mapping->host;
 	struct ext3_inode_info *ei = EXT3_I(inode);
@ -54,6 +54,17 @@ int ext3_sync_file(struct file *file, int datasync)
 	if (inode->i_sb->s_flags & MS_RDONLY)
 		return 0;

+	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (ret)
+		return ret;
+
+	/*
+	 * Taking the mutex here just to keep consistent with how fsync was
+	 * called previously, however it looks like we don't need to take
+	 * i_mutex at all.
+	 */
+	mutex_lock(&inode->i_mutex);
+
 	J_ASSERT(ext3_journal_current_handle() == NULL);

 	/*
@ -70,8 +81,10 @@ int ext3_sync_file(struct file *file, int datasync)
 	 *  (they were dirtied by commit).  But that's OK - the blocks are
 	 *  safe in-journal, which is all fsync() needs to ensure.
 	 */
-	if (ext3_should_journal_data(inode))
+	if (ext3_should_journal_data(inode)) {
+		mutex_unlock(&inode->i_mutex);
 		return ext3_force_commit(inode->i_sb);
+	}

 	if (datasync)
 		commit_tid = atomic_read(&ei->i_datasync_tid);
@ -91,5 +104,6 @@ int ext3_sync_file(struct file *file, int datasync)
 	 */
 	if (needs_barrier)
 		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+	mutex_unlock(&inode->i_mutex);
 	return ret;
 }
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@ -1816,9 +1816,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 	}

 retry:
-	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				 offset, nr_segs,
-				 ext3_get_block, NULL);
+	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+				 ext3_get_block);
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again.
@ -3216,6 +3215,9 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 		ext3_journal_stop(handle);
 	}

+	if (attr->ia_valid & ATTR_SIZE)
+		inode_dio_wait(inode);
+
 	if (S_ISREG(inode->i_mode) &&
 	    attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
 		handle_t *handle;
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@ -1038,15 +1038,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
 			return ERR_PTR(-EIO);
 		}
 		inode = ext3_iget(dir->i_sb, ino);
-		if (IS_ERR(inode)) {
-			if (PTR_ERR(inode) == -ESTALE) {
+		if (inode == ERR_PTR(-ESTALE)) {
 			ext3_error(dir->i_sb, __func__,
 					"deleted inode referenced: %lu",
 					ino);
 			return ERR_PTR(-EIO);
-			} else {
-				return ERR_CAST(inode);
-			}
 		}
 	}
 	return d_splice_alias(inode, dentry);
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@ -1718,6 +1718,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);

+	/* enable barriers by default */
+	set_opt(sbi->s_mount_opt, BARRIER);
 	set_opt(sbi->s_mount_opt, RESERVATION);

 	if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@ -238,11 +238,11 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 }

 int
-ext4_check_acl(struct inode *inode, int mask, unsigned int flags)
+ext4_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl;

-	if (flags & IPERM_FLAG_RCU) {
+	if (mask & MAY_NOT_BLOCK) {
 		if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
 			return -ECHILD;
 		return -EAGAIN;
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size)
 #ifdef CONFIG_EXT4_FS_POSIX_ACL

 /* acl.c */
-extern int ext4_check_acl(struct inode *, int, unsigned int);
+extern int ext4_check_acl(struct inode *, int);
 extern int ext4_acl_chmod(struct inode *);
 extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);

--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@ -1758,7 +1758,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
 extern void ext4_htree_free_dir_info(struct dir_private_info *p);

 /* fsync.c */
-extern int ext4_sync_file(struct file *, int);
+extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
 extern int ext4_flush_completed_IO(struct inode *);

 /* hash.c */
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@ -236,6 +236,27 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
 		}
 		offset += file->f_pos;
 		break;
+	case SEEK_DATA:
+		/*
+		 * In the generic case the entire file is data, so as long as
+		 * offset isn't at the end of the file then the offset is data.
+		 */
+		if (offset >= inode->i_size) {
+			mutex_unlock(&inode->i_mutex);
+			return -ENXIO;
+		}
+		break;
+	case SEEK_HOLE:
+		/*
+		 * There is a virtual hole at the end of the file, so as long as
+		 * offset isn't i_size or larger, return i_size.
+		 */
+		if (offset >= inode->i_size) {
+			mutex_unlock(&inode->i_mutex);
+			return -ENXIO;
+		}
+		offset = inode->i_size;
+		break;
 	}

 	if (offset < 0 || offset > maxbytes) {
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@ -151,6 +151,32 @@ static int ext4_sync_parent(struct inode *inode)
 	return ret;
 }

+/**
+ * __sync_file - generic_file_fsync without the locking and filemap_write
+ * @inode:	inode to sync
+ * @datasync:	only sync essential metadata if true
+ *
+ * This is just generic_file_fsync without the locking.  This is needed for
+ * nojournal mode to make sure this inodes data/metadata makes it to disk
+ * properly.  The i_mutex should be held already.
+ */
+static int __sync_inode(struct inode *inode, int datasync)
+{
+	int err;
+	int ret;
+
+	ret = sync_mapping_buffers(inode->i_mapping);
+	if (!(inode->i_state & I_DIRTY))
+		return ret;
+	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+		return ret;
+
+	err = sync_inode_metadata(inode, 1);
+	if (ret == 0)
+		ret = err;
+	return ret;
+}
+
 /*
 * akpm: A new design for ext4_sync_file().
 *
@ -165,7 +191,7 @@ static int ext4_sync_parent(struct inode *inode)
 * i_mutex lock is held when entering and exiting this function
 */

-int ext4_sync_file(struct file *file, int datasync)
+int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct inode *inode = file->f_mapping->host;
 	struct ext4_inode_info *ei = EXT4_I(inode);
@ -178,15 +204,20 @@ int ext4_sync_file(struct file *file, int datasync)

 	trace_ext4_sync_file_enter(file, datasync);

+	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (ret)
+		return ret;
+	mutex_lock(&inode->i_mutex);
+
 	if (inode->i_sb->s_flags & MS_RDONLY)
-		return 0;
+		goto out;

 	ret = ext4_flush_completed_IO(inode);
 	if (ret < 0)
 		goto out;

 	if (!journal) {
-		ret = generic_file_fsync(file, datasync);
+		ret = __sync_inode(inode, datasync);
 		if (!ret && !list_empty(&inode->i_dentry))
 			ret = ext4_sync_parent(inode);
 		goto out;
@ -220,6 +251,7 @@ int ext4_sync_file(struct file *file, int datasync)
 	if (needs_barrier)
 		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
 out:
+	mutex_unlock(&inode->i_mutex);
 	trace_ext4_sync_file_exit(inode, ret);
 	return ret;
 }
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@ -3501,10 +3501,8 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 				 offset, nr_segs,
 				 ext4_get_block, NULL, NULL, 0);
 	else {
-		ret = blockdev_direct_IO(rw, iocb, inode,
-				 inode->i_sb->s_bdev, iov,
-				 offset, nr_segs,
-				 ext4_get_block, NULL);
+		ret = blockdev_direct_IO(rw, iocb, inode, iov,
+				 offset, nr_segs, ext4_get_block);

 		if (unlikely((rw & WRITE) && ret < 0)) {
 			loff_t isize = i_size_read(inode);
@ -3575,6 +3573,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 			    ssize_t size, void *private, int ret,
 			    bool is_async)
 {
+	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
        ext4_io_end_t *io_end = iocb->private;
 	struct workqueue_struct *wq;
 	unsigned long flags;
@ -3596,6 +3595,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 out:
 		if (is_async)
 			aio_complete(iocb, ret, 0);
+		inode_dio_done(inode);
 		return;
 	}

@ -3616,6 +3616,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 	/* queue the work to convert unwritten extents to written */
 	queue_work(wq, &io_end->work);
 	iocb->private = NULL;
+
+	/* XXX: probably should move into the real I/O completion handler */
+	inode_dio_done(inode);
 }

 static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
@ -3748,11 +3751,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 			EXT4_I(inode)->cur_aio_dio = iocb->private;
 		}

-		ret = blockdev_direct_IO(rw, iocb, inode,
+		ret = __blockdev_direct_IO(rw, iocb, inode,
 					 inode->i_sb->s_bdev, iov,
 					 offset, nr_segs,
 					 ext4_get_block_write,
-					 ext4_end_io_dio);
+					 ext4_end_io_dio,
+					 NULL,
+					 DIO_LOCKING | DIO_SKIP_HOLES);
 		if (iocb->private)
 			EXT4_I(inode)->cur_aio_dio = NULL;
 		/*
@ -5351,6 +5356,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 	}

 	if (attr->ia_valid & ATTR_SIZE) {
+		inode_dio_wait(inode);
+
 		if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
 			struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);

@ -5843,80 +5850,84 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct page *page = vmf->page;
 	loff_t size;
 	unsigned long len;
-	int ret = -EINVAL;
-	void *fsdata;
+	int ret;
 	struct file *file = vma->vm_file;
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
+	handle_t *handle;
+	get_block_t *get_block;
+	int retries = 0;

 	/*
-	 * Get i_alloc_sem to stop truncates messing with the inode. We cannot
-	 * get i_mutex because we are already holding mmap_sem.
+	 * This check is racy but catches the common case. We rely on
+	 * __block_page_mkwrite() to do a reliable check.
 	 */
-	down_read(&inode->i_alloc_sem);
-	size = i_size_read(inode);
-	if (page->mapping != mapping || size <= page_offset(page)
-	    || !PageUptodate(page)) {
-		/* page got truncated from under us? */
-		goto out_unlock;
+	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+	/* Delalloc case is easy... */
+	if (test_opt(inode->i_sb, DELALLOC) &&
+	    !ext4_should_journal_data(inode) &&
+	    !ext4_nonda_switch(inode->i_sb)) {
+		do {
+			ret = __block_page_mkwrite(vma, vmf,
+						   ext4_da_get_block_prep);
+		} while (ret == -ENOSPC &&
+		       ext4_should_retry_alloc(inode->i_sb, &retries));
+		goto out_ret;
 	}
-	ret = 0;

 	lock_page(page);
-	wait_on_page_writeback(page);
-	if (PageMappedToDisk(page)) {
-		up_read(&inode->i_alloc_sem);
-		return VM_FAULT_LOCKED;
+	size = i_size_read(inode);
+	/* Page got truncated from under us? */
+	if (page->mapping != mapping || page_offset(page) > size) {
+		unlock_page(page);
+		ret = VM_FAULT_NOPAGE;
+		goto out;
 	}

 	if (page->index == size >> PAGE_CACHE_SHIFT)
 		len = size & ~PAGE_CACHE_MASK;
 	else
 		len = PAGE_CACHE_SIZE;
-
 	/*
-	 * return if we have all the buffers mapped. This avoid
-	 * the need to call write_begin/write_end which does a
-	 * journal_start/journal_stop which can block and take
-	 * long time
+	 * Return if we have all the buffers mapped. This avoids the need to do
+	 * journal_start/journal_stop which can block and take a long time
 	 */
 	if (page_has_buffers(page)) {
 		if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
 					ext4_bh_unmapped)) {
-			up_read(&inode->i_alloc_sem);
-			return VM_FAULT_LOCKED;
+			/* Wait so that we don't change page under IO */
+			wait_on_page_writeback(page);
+			ret = VM_FAULT_LOCKED;
+			goto out;
 		}
 	}
 	unlock_page(page);
-	/*
-	 * OK, we need to fill the hole... Do write_begin write_end
-	 * to do block allocation/reservation.We are not holding
-	 * inode.i__mutex here. That allow * parallel write_begin,
-	 * write_end call. lock_page prevent this from happening
-	 * on the same page though
-	 */
-	ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
-			len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
-	if (ret < 0)
-		goto out_unlock;
-	ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
-			len, len, page, fsdata);
-	if (ret < 0)
-		goto out_unlock;
-	ret = 0;
-
-	/*
-	 * write_begin/end might have created a dirty page and someone
-	 * could wander in and start the IO.  Make sure that hasn't
-	 * happened.
-	 */
-	lock_page(page);
-	wait_on_page_writeback(page);
-	up_read(&inode->i_alloc_sem);
-	return VM_FAULT_LOCKED;
-out_unlock:
-	if (ret)
+	/* OK, we need to fill the hole... */
+	if (ext4_should_dioread_nolock(inode))
+		get_block = ext4_get_block_write;
+	else
+		get_block = ext4_get_block;
+retry_alloc:
+	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
+	if (IS_ERR(handle)) {
 		ret = VM_FAULT_SIGBUS;
-	up_read(&inode->i_alloc_sem);
+		goto out;
+	}
+	ret = __block_page_mkwrite(vma, vmf, get_block);
+	if (!ret && ext4_should_journal_data(inode)) {
+		if (walk_page_buffers(handle, page_buffers(page), 0,
+			  PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
+			unlock_page(page);
+			ret = VM_FAULT_SIGBUS;
+			goto out;
+		}
+		ext4_set_inode_state(inode, EXT4_STATE_JDATA);
+	}
+	ext4_journal_stop(handle);
+	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+		goto retry_alloc;
+out_ret:
+	ret = block_page_mkwrite_return(ret);
+out:
 	return ret;
 }
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@ -1037,15 +1037,11 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
 			return ERR_PTR(-EIO);
 		}
 		inode = ext4_iget(dir->i_sb, ino);
-		if (IS_ERR(inode)) {
-			if (PTR_ERR(inode) == -ESTALE) {
+		if (inode == ERR_PTR(-ESTALE)) {
 			EXT4_ERROR_INODE(dir,
 					 "deleted inode referenced: %u",
 					 ino);
 			return ERR_PTR(-EIO);
-			} else {
-				return ERR_CAST(inode);
-			}
 		}
 	}
 	return d_splice_alias(inode, dentry);
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@ -109,6 +109,7 @@ struct msdos_inode_info {
 	int i_attrs;		/* unused attribute bits */
 	loff_t i_pos;		/* on-disk position of directory entry or 0 */
 	struct hlist_node i_fat_hash;	/* hash by i_location */
+	struct rw_semaphore truncate_lock; /* protect bmap against truncate */
 	struct inode vfs_inode;
 };

@ -309,7 +310,8 @@ extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
 extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
 extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		       struct kstat *stat);
-extern int fat_file_fsync(struct file *file, int datasync);
+extern int fat_file_fsync(struct file *file, loff_t start, loff_t end,
+			  int datasync);

 /* fat/inode.c */
 extern void fat_attach(struct inode *inode, loff_t i_pos);
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@ -149,12 +149,12 @@ static int fat_file_release(struct inode *inode, struct file *filp)
 	return 0;
 }

-int fat_file_fsync(struct file *filp, int datasync)
+int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 {
 	struct inode *inode = filp->f_mapping->host;
 	int res, err;

-	res = generic_file_fsync(filp, datasync);
+	res = generic_file_fsync(filp, start, end, datasync);
 	err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);

 	return res ? res : err;
@ -397,6 +397,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 	 * sequence.
 	 */
 	if (attr->ia_valid & ATTR_SIZE) {
+		inode_dio_wait(inode);
+
 		if (attr->ia_size > inode->i_size) {
 			error = fat_cont_expand(inode, attr->ia_size);
 			if (error || attr->ia_valid == ATTR_SIZE)
@ -429,8 +431,10 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 	}

 	if (attr->ia_valid & ATTR_SIZE) {
+		down_write(&MSDOS_I(inode)->truncate_lock);
 		truncate_setsize(inode, attr->ia_size);
 		fat_truncate_blocks(inode, attr->ia_size);
+		up_write(&MSDOS_I(inode)->truncate_lock);
 	}

 	setattr_copy(inode, attr);
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@ -211,8 +211,8 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
 	 * FAT need to use the DIO_LOCKING for avoiding the race
 	 * condition of fat_get_block() and ->truncate().
 	 */
-	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
-				 iov, offset, nr_segs, fat_get_block, NULL);
+	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+				 fat_get_block);
 	if (ret < 0 && (rw & WRITE))
 		fat_write_failed(mapping, offset + iov_length(iov, nr_segs));

@ -224,9 +224,9 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
 	sector_t blocknr;

 	/* fat_get_cluster() assumes the requested blocknr isn't truncated. */
-	down_read(&mapping->host->i_alloc_sem);
+	down_read(&MSDOS_I(mapping->host)->truncate_lock);
 	blocknr = generic_block_bmap(mapping, block, fat_get_block);
-	up_read(&mapping->host->i_alloc_sem);
+	up_read(&MSDOS_I(mapping->host)->truncate_lock);

 	return blocknr;
 }
@ -510,6 +510,8 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
 	ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
+
+	init_rwsem(&ei->truncate_lock);
 	return &ei->vfs_inode;
 }

--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@ -209,29 +209,20 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
 	int err;

 	lock_super(sb);
-
 	err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo);
-	if (err) {
-		if (err == -ENOENT) {
+	switch (err) {
+	case -ENOENT:
 		inode = NULL;
-			goto out;
-		}
-		goto error;
-	}
-
+		break;
+	case 0:
 		inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
 		brelse(sinfo.bh);
-	if (IS_ERR(inode)) {
-		err = PTR_ERR(inode);
-		goto error;
+		break;
+	default:
+		inode = ERR_PTR(err);
 	}
-out:
 	unlock_super(sb);
 	return d_splice_alias(inode, dentry);
-
-error:
-	unlock_super(sb);
-	return ERR_PTR(err);
 }

 /***** Creates a directory entry (name is already formatted). */
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@ -82,10 +82,8 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
 	 * case sensitive name which is specified by user if this is
 	 * for creation.
 	 */
-	if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
 	if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
 		return 0;
-	}

 	return vfat_revalidate_shortname(dentry);
 }
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@ -460,32 +460,6 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	return ret;
 }

-/*
- * For background writeback the caller does not have the sb pinned
- * before calling writeback. So make sure that we do pin it, so it doesn't
- * go away while we are writing inodes from it.
- */
-static bool pin_sb_for_writeback(struct super_block *sb)
-{
-	spin_lock(&sb_lock);
-	if (list_empty(&sb->s_instances)) {
-		spin_unlock(&sb_lock);
-		return false;
-	}
-
-	sb->s_count++;
-	spin_unlock(&sb_lock);
-
-	if (down_read_trylock(&sb->s_umount)) {
-		if (sb->s_root)
-			return true;
-		up_read(&sb->s_umount);
-	}
-
-	put_super(sb);
-	return false;
-}
-
 /*
 * Write a portion of b_io inodes which belong to @sb.
 *
@ -585,7 +559,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
 		struct inode *inode = wb_inode(wb->b_io.prev);
 		struct super_block *sb = inode->i_sb;

-		if (!pin_sb_for_writeback(sb)) {
+		if (!grab_super_passive(sb)) {
 			requeue_io(inode);
 			continue;
 		}
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@ -382,7 +382,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	struct fuse_entry_out outentry;
 	struct fuse_file *ff;
 	struct file *file;
-	int flags = nd->intent.open.flags - 1;
+	int flags = nd->intent.open.flags;

 	if (fc->no_create)
 		return -ENOSYS;
@ -576,7 +576,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
 static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
 		       struct nameidata *nd)
 {
-	if (nd && (nd->flags & LOOKUP_OPEN)) {
+	if (nd) {
 		int err = fuse_create_open(dir, entry, mode, nd);
 		if (err != -ENOSYS)
 			return err;
@ -971,9 +971,9 @@ static int fuse_access(struct inode *inode, int mask)
 	return err;
 }

-static int fuse_perm_getattr(struct inode *inode, int flags)
+static int fuse_perm_getattr(struct inode *inode, int mask)
 {
-	if (flags & IPERM_FLAG_RCU)
+	if (mask & MAY_NOT_BLOCK)
 		return -ECHILD;

 	return fuse_do_getattr(inode, NULL, NULL);
@ -992,7 +992,7 @@ static int fuse_perm_getattr(struct inode *inode, int flags)
 * access request is sent.  Execute permission is still checked
 * locally based on file mode.
 */
-static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
+static int fuse_permission(struct inode *inode, int mask)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	bool refreshed = false;
@ -1011,23 +1011,22 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
 		if (fi->i_time < get_jiffies_64()) {
 			refreshed = true;

-			err = fuse_perm_getattr(inode, flags);
+			err = fuse_perm_getattr(inode, mask);
 			if (err)
 				return err;
 		}
 	}

 	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
-		err = generic_permission(inode, mask, flags, NULL);
+		err = generic_permission(inode, mask);

 		/* If permission is denied, try to refresh file
 		   attributes.  This is also needed, because the root
 		   node will at first have no permissions */
 		if (err == -EACCES && !refreshed) {
-			err = fuse_perm_getattr(inode, flags);
+			err = fuse_perm_getattr(inode, mask);
 			if (!err)
-				err = generic_permission(inode, mask,
-							flags, NULL);
+				err = generic_permission(inode, mask);
 		}

 		/* Note: the opposite of the above test does not
@ -1035,7 +1034,7 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
 		   noticed immediately, only after the attribute
 		   timeout has expired */
 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
-		if (flags & IPERM_FLAG_RCU)
+		if (mask & MAY_NOT_BLOCK)
 			return -ECHILD;

 		err = fuse_access(inode, mask);
@ -1044,7 +1043,7 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
 			if (refreshed)
 				return -EACCES;

-			err = fuse_perm_getattr(inode, flags);
+			err = fuse_perm_getattr(inode, mask);
 			if (!err && !(inode->i_mode & S_IXUGO))
 				return -EACCES;
 		}
@ -1177,9 +1176,10 @@ static int fuse_dir_release(struct inode *inode, struct file *file)
 	return 0;
 }

-static int fuse_dir_fsync(struct file *file, int datasync)
+static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
+			  int datasync)
 {
-	return fuse_fsync_common(file, datasync, 1);
+	return fuse_fsync_common(file, start, end, datasync, 1);
 }

 static bool update_mtime(unsigned ivalid)
--- a/Show more
+++ b/Show more