From 44b1d53043c482225196e8a9cd9f35163a1b3336 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Apr 2014 17:08:41 +0200
Subject: [PATCH 01/12] vfs: add d_is_dir()

Add d_is_dir(dentry) helper which is analogous to S_ISDIR().

To avoid confusion, rename d_is_directory() to d_can_lookup().

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/namei.c             | 23 +++++++++++------------
 include/linux/dcache.h |  7 ++++++-
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 4b491b431990..e987ea7c305f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1796,7 +1796,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 			if (err)
 				return err;
 		}
-		if (!d_is_directory(nd->path.dentry)) {
+		if (!d_can_lookup(nd->path.dentry)) {
 			err = -ENOTDIR; 
 			break;
 		}
@@ -1817,7 +1817,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 		struct dentry *root = nd->root.dentry;
 		struct inode *inode = root->d_inode;
 		if (*name) {
-			if (!d_is_directory(root))
+			if (!d_can_lookup(root))
 				return -ENOTDIR;
 			retval = inode_permission(inode, MAY_EXEC);
 			if (retval)
@@ -1873,7 +1873,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 		dentry = f.file->f_path.dentry;
 
 		if (*name) {
-			if (!d_is_directory(dentry)) {
+			if (!d_can_lookup(dentry)) {
 				fdput(f);
 				return -ENOTDIR;
 			}
@@ -1955,7 +1955,7 @@ static int path_lookupat(int dfd, const char *name,
 		err = complete_walk(nd);
 
 	if (!err && nd->flags & LOOKUP_DIRECTORY) {
-		if (!d_is_directory(nd->path.dentry)) {
+		if (!d_can_lookup(nd->path.dentry)) {
 			path_put(&nd->path);
 			err = -ENOTDIR;
 		}
@@ -2414,11 +2414,11 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
 	    IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
 		return -EPERM;
 	if (isdir) {
-		if (!d_is_directory(victim) && !d_is_autodir(victim))
+		if (!d_is_dir(victim))
 			return -ENOTDIR;
 		if (IS_ROOT(victim))
 			return -EBUSY;
-	} else if (d_is_directory(victim) || d_is_autodir(victim))
+	} else if (d_is_dir(victim))
 		return -EISDIR;
 	if (IS_DEADDIR(dir))
 		return -ENOENT;
@@ -3016,11 +3016,10 @@ static int do_last(struct nameidata *nd, struct path *path,
 	}
 	audit_inode(name, nd->path.dentry, 0);
 	error = -EISDIR;
-	if ((open_flag & O_CREAT) &&
-	    (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry)))
+	if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
 		goto out;
 	error = -ENOTDIR;
-	if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry))
+	if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
 		goto out;
 	if (!S_ISREG(nd->inode->i_mode))
 		will_truncate = false;
@@ -3744,7 +3743,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 slashes:
 	if (d_is_negative(dentry))
 		error = -ENOENT;
-	else if (d_is_directory(dentry) || d_is_autodir(dentry))
+	else if (d_is_dir(dentry))
 		error = -EISDIR;
 	else
 		error = -ENOTDIR;
@@ -4123,7 +4122,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	       struct inode **delegated_inode)
 {
 	int error;
-	int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
+	int is_dir = d_is_dir(old_dentry);
 	const unsigned char *old_name;
 
 	if (old_dentry->d_inode == new_dentry->d_inode)
@@ -4216,7 +4215,7 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
 	if (d_is_negative(old_dentry))
 		goto exit4;
 	/* unless the source is a directory trailing slashes give -ENOTDIR */
-	if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) {
+	if (!d_is_dir(old_dentry)) {
 		error = -ENOTDIR;
 		if (oldnd.last.name[oldnd.last.len])
 			goto exit4;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index bf72e9ac6de0..3b50cac7ccb3 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -429,7 +429,7 @@ static inline unsigned __d_entry_type(const struct dentry *dentry)
 	return dentry->d_flags & DCACHE_ENTRY_TYPE;
 }
 
-static inline bool d_is_directory(const struct dentry *dentry)
+static inline bool d_can_lookup(const struct dentry *dentry)
 {
 	return __d_entry_type(dentry) == DCACHE_DIRECTORY_TYPE;
 }
@@ -439,6 +439,11 @@ static inline bool d_is_autodir(const struct dentry *dentry)
 	return __d_entry_type(dentry) == DCACHE_AUTODIR_TYPE;
 }
 
+static inline bool d_is_dir(const struct dentry *dentry)
+{
+	return d_can_lookup(dentry) || d_is_autodir(dentry);
+}
+
 static inline bool d_is_symlink(const struct dentry *dentry)
 {
 	return __d_entry_type(dentry) == DCACHE_SYMLINK_TYPE;

From de22a4c3720a96f1c2ebf12b0857b6db6a991f2c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Apr 2014 17:08:42 +0200
Subject: [PATCH 02/12] vfs: rename: move d_move() up

Move the d_move() in vfs_rename_dir() up, similarly to how it's done in
vfs_rename_other().  The next patch will consolidate these two functions
and this is the only structural difference between them.

I'm not sure if doing the d_move() after the dput is even valid.  But there
may be a logical explanation for that.  But moving the d_move() before the
dput() (and the mutex_unlock()) should definitely not hurt.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/namei.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index e987ea7c305f..2e86d2c4ec8a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4045,13 +4045,12 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 		target->i_flags |= S_DEAD;
 		dont_mount(new_dentry);
 	}
+	if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
+		d_move(old_dentry, new_dentry);
 out:
 	if (target)
 		mutex_unlock(&target->i_mutex);
 	dput(new_dentry);
-	if (!error)
-		if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
-			d_move(old_dentry,new_dentry);
 	return error;
 }
 

From bc27027a73e8b80376b51a1583ad1c7445605e8a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Apr 2014 17:08:42 +0200
Subject: [PATCH 03/12] vfs: rename: use common code for dir and non-dir

There's actually very little difference between vfs_rename_dir() and
vfs_rename_other() so move both inline into vfs_rename() which still stays
reasonably readable.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/namei.c | 217 ++++++++++++++++++++++-------------------------------
 1 file changed, 90 insertions(+), 127 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 2e86d2c4ec8a..12b8f56ba942 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3973,7 +3973,27 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
 	return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
 }
 
-/*
+/**
+ * vfs_rename - rename a filesystem object
+ * @old_dir:	parent of source
+ * @old_dentry:	source
+ * @new_dir:	parent of destination
+ * @new_dentry:	destination
+ * @delegated_inode: returns an inode needing a delegation break
+ *
+ * The caller must hold multiple mutexes--see lock_rename()).
+ *
+ * If vfs_rename discovers a delegation in need of breaking at either
+ * the source or destination, it will return -EWOULDBLOCK and return a
+ * reference to the inode in delegated_inode.  The caller should then
+ * break the delegation and retry.  Because breaking a delegation may
+ * take a long time, the caller should drop all locks before doing
+ * so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ *
  * The worst of all namespace operations - renaming directory. "Perverted"
  * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
  * Problems:
@@ -4001,137 +4021,24 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
  *	   ->i_mutex on parents, which works but leads to some truly excessive
  *	   locking].
  */
-static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
-			  struct inode *new_dir, struct dentry *new_dentry)
-{
-	int error = 0;
-	struct inode *target = new_dentry->d_inode;
-	unsigned max_links = new_dir->i_sb->s_max_links;
-
-	/*
-	 * If we are going to change the parent - check write permissions,
-	 * we'll need to flip '..'.
-	 */
-	if (new_dir != old_dir) {
-		error = inode_permission(old_dentry->d_inode, MAY_WRITE);
-		if (error)
-			return error;
-	}
-
-	error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
-	if (error)
-		return error;
-
-	dget(new_dentry);
-	if (target)
-		mutex_lock(&target->i_mutex);
-
-	error = -EBUSY;
-	if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
-		goto out;
-
-	error = -EMLINK;
-	if (max_links && !target && new_dir != old_dir &&
-	    new_dir->i_nlink >= max_links)
-		goto out;
-
-	if (target)
-		shrink_dcache_parent(new_dentry);
-	error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-	if (error)
-		goto out;
-
-	if (target) {
-		target->i_flags |= S_DEAD;
-		dont_mount(new_dentry);
-	}
-	if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
-		d_move(old_dentry, new_dentry);
-out:
-	if (target)
-		mutex_unlock(&target->i_mutex);
-	dput(new_dentry);
-	return error;
-}
-
-static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
-			    struct inode *new_dir, struct dentry *new_dentry,
-			    struct inode **delegated_inode)
-{
-	struct inode *target = new_dentry->d_inode;
-	struct inode *source = old_dentry->d_inode;
-	int error;
-
-	error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
-	if (error)
-		return error;
-
-	dget(new_dentry);
-	lock_two_nondirectories(source, target);
-
-	error = -EBUSY;
-	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
-		goto out;
-
-	error = try_break_deleg(source, delegated_inode);
-	if (error)
-		goto out;
-	if (target) {
-		error = try_break_deleg(target, delegated_inode);
-		if (error)
-			goto out;
-	}
-	error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-	if (error)
-		goto out;
-
-	if (target)
-		dont_mount(new_dentry);
-	if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
-		d_move(old_dentry, new_dentry);
-out:
-	unlock_two_nondirectories(source, target);
-	dput(new_dentry);
-	return error;
-}
-
-/**
- * vfs_rename - rename a filesystem object
- * @old_dir:	parent of source
- * @old_dentry:	source
- * @new_dir:	parent of destination
- * @new_dentry:	destination
- * @delegated_inode: returns an inode needing a delegation break
- *
- * The caller must hold multiple mutexes--see lock_rename()).
- *
- * If vfs_rename discovers a delegation in need of breaking at either
- * the source or destination, it will return -EWOULDBLOCK and return a
- * reference to the inode in delegated_inode.  The caller should then
- * break the delegation and retry.  Because breaking a delegation may
- * take a long time, the caller should drop all locks before doing
- * so.
- *
- * Alternatively, a caller may pass NULL for delegated_inode.  This may
- * be appropriate for callers that expect the underlying filesystem not
- * to be NFS exported.
- */
 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	       struct inode *new_dir, struct dentry *new_dentry,
 	       struct inode **delegated_inode)
 {
 	int error;
-	int is_dir = d_is_dir(old_dentry);
+	bool is_dir = d_is_dir(old_dentry);
 	const unsigned char *old_name;
+	struct inode *source = old_dentry->d_inode;
+	struct inode *target = new_dentry->d_inode;
+
+	if (source == target)
+		return 0;
 
-	if (old_dentry->d_inode == new_dentry->d_inode)
- 		return 0;
- 
 	error = may_delete(old_dir, old_dentry, is_dir);
 	if (error)
 		return error;
 
-	if (!new_dentry->d_inode)
+	if (!target)
 		error = may_create(new_dir, new_dentry);
 	else
 		error = may_delete(new_dir, new_dentry, is_dir);
@@ -4141,15 +4048,71 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (!old_dir->i_op->rename)
 		return -EPERM;
 
-	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
+	/*
+	 * If we are going to change the parent - check write permissions,
+	 * we'll need to flip '..'.
+	 */
+	if (is_dir && new_dir != old_dir) {
+		error = inode_permission(source, MAY_WRITE);
+		if (error)
+			return error;
+	}
 
-	if (is_dir)
-		error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
-	else
-		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode);
+	error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
+	if (error)
+		return error;
+
+	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
+	dget(new_dentry);
+	if (!is_dir)
+		lock_two_nondirectories(source, target);
+	else if (target)
+		mutex_lock(&target->i_mutex);
+
+	error = -EBUSY;
+	if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
+		goto out;
+
+	if (is_dir) {
+		unsigned max_links = new_dir->i_sb->s_max_links;
+
+		error = -EMLINK;
+		if (max_links && !target && new_dir != old_dir &&
+		    new_dir->i_nlink >= max_links)
+			goto out;
+
+		if (target)
+			shrink_dcache_parent(new_dentry);
+	} else {
+		error = try_break_deleg(source, delegated_inode);
+		if (error)
+			goto out;
+		if (target) {
+			error = try_break_deleg(target, delegated_inode);
+			if (error)
+				goto out;
+		}
+	}
+	error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
+	if (error)
+		goto out;
+
+	if (target) {
+		if (is_dir)
+			target->i_flags |= S_DEAD;
+		dont_mount(new_dentry);
+	}
+	if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
+		d_move(old_dentry, new_dentry);
+out:
+	if (!is_dir)
+		unlock_two_nondirectories(source, target);
+	else if (target)
+		mutex_unlock(&target->i_mutex);
+	dput(new_dentry);
 	if (!error)
 		fsnotify_move(old_dir, new_dir, old_name, is_dir,
-			      new_dentry->d_inode, old_dentry);
+			      target, old_dentry);
 	fsnotify_oldname_free(old_name);
 
 	return error;

From 520c8b16505236fc82daa352e6c5e73cd9870cff Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Apr 2014 17:08:42 +0200
Subject: [PATCH 04/12] vfs: add renameat2 syscall

Add new renameat2 syscall, which is the same as renameat with an added
flags argument.

Pass flags to vfs_rename() and to i_op->rename() as well.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: J. Bruce Fields <bfields@redhat.com>
---
 Documentation/filesystems/Locking             |  6 +++-
 Documentation/filesystems/vfs.txt             | 16 +++++++++
 arch/x86/syscalls/syscall_64.tbl              |  1 +
 .../lustre/include/linux/lustre_compat25.h    |  4 +--
 .../staging/lustre/lustre/lvfs/lvfs_linux.c   |  2 +-
 fs/cachefiles/namei.c                         |  2 +-
 fs/ecryptfs/inode.c                           |  2 +-
 fs/namei.c                                    | 34 +++++++++++++++----
 fs/nfsd/vfs.c                                 |  2 +-
 include/linux/fs.h                            |  4 ++-
 10 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 5b0c083d7c0e..f424e0e5b46b 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -47,6 +47,8 @@ prototypes:
 	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *);
+	int (*rename2) (struct inode *, struct dentry *,
+			struct inode *, struct dentry *, unsigned int);
 	int (*readlink) (struct dentry *, char __user *,int);
 	void * (*follow_link) (struct dentry *, struct nameidata *);
 	void (*put_link) (struct dentry *, struct nameidata *, void *);
@@ -78,6 +80,7 @@ mkdir:		yes
 unlink:		yes (both)
 rmdir:		yes (both)	(see below)
 rename:		yes (all)	(see below)
+rename2:	yes (all)	(see below)
 readlink:	no
 follow_link:	no
 put_link:	no
@@ -96,7 +99,8 @@ tmpfile:	no
 
 	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
 victim.
-	cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
+	cross-directory ->rename() and rename2() has (per-superblock)
+->s_vfs_rename_sem.
 
 See Documentation/filesystems/directory-locking for more detailed discussion
 of the locking scheme for directory operations.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index c53784c119c8..94eb86287bcb 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -347,6 +347,8 @@ struct inode_operations {
 	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *);
+	int (*rename2) (struct inode *, struct dentry *,
+			struct inode *, struct dentry *, unsigned int);
 	int (*readlink) (struct dentry *, char __user *,int);
         void * (*follow_link) (struct dentry *, struct nameidata *);
         void (*put_link) (struct dentry *, struct nameidata *, void *);
@@ -414,6 +416,20 @@ otherwise noted.
   rename: called by the rename(2) system call to rename the object to
 	have the parent and name given by the second inode and dentry.
 
+  rename2: this has an additional flags argument compared to rename.
+	If no flags are supported by the filesystem then this method
+	need not be implemented.  If some flags are supported then the
+	filesystem must return -EINVAL for any unsupported or unknown
+	flags.  Currently the following flags are implemented:
+	(1) RENAME_NOREPLACE: this flag indicates that if the target
+	of the rename exists the rename should fail with -EEXIST
+	instead of replacing the target.  The VFS already checks for
+	existence, so for local filesystems the RENAME_NOREPLACE
+	implementation is equivalent to plain rename.
+	(2) RENAME_EXCHANGE: exchange source and target.  Both must
+	exist; this is checked by the VFS.  Unlike plain rename,
+	source and target may be of different type.
+
   readlink: called by the readlink(2) system call. Only required if
 	you want to support reading symbolic links
 
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index a12bddc7ccea..04376ac3d9ef 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -322,6 +322,7 @@
 313	common	finit_module		sys_finit_module
 314	common	sched_setattr		sys_sched_setattr
 315	common	sched_getattr		sys_sched_getattr
+316	common	renameat2		sys_renameat2
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
index eefdb8d061b1..81cc7a0134bb 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
@@ -105,8 +105,8 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
 #define ll_vfs_unlink(inode,entry,mnt)	  vfs_unlink(inode,entry)
 #define ll_vfs_mknod(dir,entry,mnt,mode,dev)    vfs_mknod(dir,entry,mode,dev)
 #define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry)
-#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1,delegated_inode) \
-		vfs_rename(old,old_dir,new,new_dir,delegated_inode)
+#define ll_vfs_rename(old, old_dir, mnt, new, new_dir, mnt1) \
+		vfs_rename(old, old_dir, new, new_dir, NULL, 0)
 
 #define cfs_bio_io_error(a,b)   bio_io_error((a))
 #define cfs_bio_endio(a,b,c)    bio_endio((a),(c))
diff --git a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
index 428ffd8c37b7..d50822be3230 100644
--- a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
+++ b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
@@ -223,7 +223,7 @@ int lustre_rename(struct dentry *dir, struct vfsmount *mnt,
 		GOTO(put_old, err = PTR_ERR(dchild_new));
 
 	err = ll_vfs_rename(dir->d_inode, dchild_old, mnt,
-			    dir->d_inode, dchild_new, mnt, NULL);
+			    dir->d_inode, dchild_new, mnt);
 
 	dput(dchild_new);
 put_old:
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index ca65f39dc8dc..31088a969351 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -396,7 +396,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
 		cachefiles_io_error(cache, "Rename security error %d", ret);
 	} else {
 		ret = vfs_rename(dir->d_inode, rep,
-				 cache->graveyard->d_inode, grave, NULL);
+				 cache->graveyard->d_inode, grave, NULL, 0);
 		if (ret != 0 && ret != -ENOMEM)
 			cachefiles_io_error(cache,
 					    "Rename failed with error %d", ret);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index b167ca48b8ee..d4a9431ec73c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -641,7 +641,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 	rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
 			lower_new_dir_dentry->d_inode, lower_new_dentry,
-			NULL);
+			NULL, 0);
 	if (rc)
 		goto out_lock;
 	if (target_inode)
diff --git a/fs/namei.c b/fs/namei.c
index 12b8f56ba942..ab4e48c4a80a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3980,6 +3980,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
  * @new_dir:	parent of destination
  * @new_dentry:	destination
  * @delegated_inode: returns an inode needing a delegation break
+ * @flags:	rename flags
  *
  * The caller must hold multiple mutexes--see lock_rename()).
  *
@@ -4023,7 +4024,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
  */
 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	       struct inode *new_dir, struct dentry *new_dentry,
-	       struct inode **delegated_inode)
+	       struct inode **delegated_inode, unsigned int flags)
 {
 	int error;
 	bool is_dir = d_is_dir(old_dentry);
@@ -4048,6 +4049,9 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (!old_dir->i_op->rename)
 		return -EPERM;
 
+	if (flags && !old_dir->i_op->rename2)
+		return -EINVAL;
+
 	/*
 	 * If we are going to change the parent - check write permissions,
 	 * we'll need to flip '..'.
@@ -4093,7 +4097,13 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 				goto out;
 		}
 	}
-	error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
+	if (!flags) {
+		error = old_dir->i_op->rename(old_dir, old_dentry,
+					      new_dir, new_dentry);
+	} else {
+		error = old_dir->i_op->rename2(old_dir, old_dentry,
+					       new_dir, new_dentry, flags);
+	}
 	if (error)
 		goto out;
 
@@ -4118,8 +4128,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return error;
 }
 
-SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
-		int, newdfd, const char __user *, newname)
+SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
+		int, newdfd, const char __user *, newname, unsigned int, flags)
 {
 	struct dentry *old_dir, *new_dir;
 	struct dentry *old_dentry, *new_dentry;
@@ -4131,6 +4141,10 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
 	unsigned int lookup_flags = 0;
 	bool should_retry = false;
 	int error;
+
+	if (flags)
+		return -EINVAL;
+
 retry:
 	from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags);
 	if (IS_ERR(from)) {
@@ -4202,8 +4216,8 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
 	if (error)
 		goto exit5;
 	error = vfs_rename(old_dir->d_inode, old_dentry,
-				   new_dir->d_inode, new_dentry,
-				   &delegated_inode);
+			   new_dir->d_inode, new_dentry,
+			   &delegated_inode, flags);
 exit5:
 	dput(new_dentry);
 exit4:
@@ -4233,9 +4247,15 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
 	return error;
 }
 
+SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
+		int, newdfd, const char __user *, newname)
+{
+	return sys_renameat2(olddfd, oldname, newdfd, newname, 0);
+}
+
 SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
 {
-	return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname);
+	return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
 }
 
 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6d7be3f80356..915808b36df7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1694,7 +1694,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 	if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
 		goto out_dput_new;
 
-	host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
+	host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
 	if (!host_err) {
 		host_err = commit_metadata(tfhp);
 		if (!host_err)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 23b2a35d712e..3b3670e97da6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1460,7 +1460,7 @@ extern int vfs_symlink(struct inode *, struct dentry *, const char *);
 extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
 extern int vfs_rmdir(struct inode *, struct dentry *);
 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **);
+extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
 
 /*
  * VFS dentry helper functions.
@@ -1571,6 +1571,8 @@ struct inode_operations {
 	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *);
+	int (*rename2) (struct inode *, struct dentry *,
+			struct inode *, struct dentry *, unsigned int);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);

From 0a7c3937a1f23f8cb5fc77ae01661e9968a51d0c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Apr 2014 17:08:43 +0200
Subject: [PATCH 05/12] vfs: add RENAME_NOREPLACE flag

If this flag is specified and the target of the rename exists then the
rename syscall fails with EEXIST.

The VFS does the existence checking, so it is trivial to enable for most
local filesystems.  This patch only enables it in ext4.

For network filesystems the VFS check is not enough as there may be a race
between a remote create and the rename, so these filesystems need to handle
this flag in their ->rename() implementations to ensure atomicity.

Andy writes about why this is useful:

"The trivial answer: to eliminate the race condition from 'mv -i'.

Another answer: there's a common pattern to atomically create a file
with contents: open a temporary file, write to it, optionally fsync
it, close it, then link(2) it to the final name, then unlink the
temporary file.

The reason to use link(2) is because it won't silently clobber the destination.

This is annoying:
 - It requires an extra system call that shouldn't be necessary.
 - It doesn't work on (IMO sensible) filesystems that don't support
hard links (e.g. vfat).
 - It's not atomic -- there's an intermediate state where both files exist.
 - It's ugly.

The new rename flag will make this totally sensible.

To be fair, on new enough kernels, you can also use O_TMPFILE and
linkat to achieve the same thing even more cleanly."

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/ext4/namei.c         | 11 +++++++++++
 fs/namei.c              | 31 ++++++++++++++++++-------------
 include/uapi/linux/fs.h |  2 ++
 3 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index d050e043e884..5f19171b3e1f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3204,6 +3204,16 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return retval;
 }
 
+static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
+			struct inode *new_dir, struct dentry *new_dentry,
+			unsigned int flags)
+{
+	if (flags & ~RENAME_NOREPLACE)
+		return -EINVAL;
+
+	return ext4_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+
 /*
  * directories can handle most operations...
  */
@@ -3218,6 +3228,7 @@ const struct inode_operations ext4_dir_inode_operations = {
 	.mknod		= ext4_mknod,
 	.tmpfile	= ext4_tmpfile,
 	.rename		= ext4_rename,
+	.rename2	= ext4_rename2,
 	.setattr	= ext4_setattr,
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
diff --git a/fs/namei.c b/fs/namei.c
index ab4e48c4a80a..0e9d186b7f77 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4142,7 +4142,7 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
 	bool should_retry = false;
 	int error;
 
-	if (flags)
+	if (flags & ~RENAME_NOREPLACE)
 		return -EINVAL;
 
 retry:
@@ -4168,6 +4168,8 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
 		goto exit2;
 
 	new_dir = newnd.path.dentry;
+	if (flags & RENAME_NOREPLACE)
+		error = -EEXIST;
 	if (newnd.last_type != LAST_NORM)
 		goto exit2;
 
@@ -4190,22 +4192,25 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
 	error = -ENOENT;
 	if (d_is_negative(old_dentry))
 		goto exit4;
-	/* unless the source is a directory trailing slashes give -ENOTDIR */
-	if (!d_is_dir(old_dentry)) {
-		error = -ENOTDIR;
-		if (oldnd.last.name[oldnd.last.len])
-			goto exit4;
-		if (newnd.last.name[newnd.last.len])
-			goto exit4;
-	}
-	/* source should not be ancestor of target */
-	error = -EINVAL;
-	if (old_dentry == trap)
-		goto exit4;
 	new_dentry = lookup_hash(&newnd);
 	error = PTR_ERR(new_dentry);
 	if (IS_ERR(new_dentry))
 		goto exit4;
+	error = -EEXIST;
+	if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
+		goto exit5;
+	/* unless the source is a directory trailing slashes give -ENOTDIR */
+	if (!d_is_dir(old_dentry)) {
+		error = -ENOTDIR;
+		if (oldnd.last.name[oldnd.last.len])
+			goto exit5;
+		if (newnd.last.name[newnd.last.len])
+			goto exit5;
+	}
+	/* source should not be ancestor of target */
+	error = -EINVAL;
+	if (old_dentry == trap)
+		goto exit5;
 	/* target should not be an ancestor of source */
 	error = -ENOTEMPTY;
 	if (new_dentry == trap)
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 6c28b61bb690..9250f4dd7d96 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -35,6 +35,8 @@
 #define SEEK_HOLE	4	/* seek to the next hole */
 #define SEEK_MAX	SEEK_HOLE
 
+#define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
+
 struct fstrim_range {
 	__u64 start;
 	__u64 len;

From 0b3974eb04c4874e85fa1d4fc70450d12f28611d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Apr 2014 17:08:43 +0200
Subject: [PATCH 06/12] security: add flags to rename hooks

Add flags to security_path_rename() and security_inode_rename() hooks.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/cachefiles/namei.c    |  2 +-
 fs/namei.c               |  5 +++--
 include/linux/security.h | 12 ++++++++----
 security/security.c      |  6 ++++--
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 31088a969351..6494d9f673aa 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -391,7 +391,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
 	path.dentry = dir;
 	path_to_graveyard.mnt = cache->mnt;
 	path_to_graveyard.dentry = cache->graveyard;
-	ret = security_path_rename(&path, rep, &path_to_graveyard, grave);
+	ret = security_path_rename(&path, rep, &path_to_graveyard, grave, 0);
 	if (ret < 0) {
 		cachefiles_io_error(cache, "Rename security error %d", ret);
 	} else {
diff --git a/fs/namei.c b/fs/namei.c
index 0e9d186b7f77..4096d589bb3f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4062,7 +4062,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			return error;
 	}
 
-	error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
+	error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry,
+				      flags);
 	if (error)
 		return error;
 
@@ -4217,7 +4218,7 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
 		goto exit5;
 
 	error = security_path_rename(&oldnd.path, old_dentry,
-				     &newnd.path, new_dentry);
+				     &newnd.path, new_dentry, flags);
 	if (error)
 		goto exit5;
 	error = vfs_rename(old_dir->d_inode, old_dentry,
diff --git a/include/linux/security.h b/include/linux/security.h
index 2fc42d191f79..6478ce3252c7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1793,7 +1793,8 @@ int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 int security_inode_rmdir(struct inode *dir, struct dentry *dentry);
 int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev);
 int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
-			  struct inode *new_dir, struct dentry *new_dentry);
+			  struct inode *new_dir, struct dentry *new_dentry,
+			  unsigned int flags);
 int security_inode_readlink(struct dentry *dentry);
 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
 int security_inode_permission(struct inode *inode, int mask);
@@ -2161,7 +2162,8 @@ static inline int security_inode_mknod(struct inode *dir,
 static inline int security_inode_rename(struct inode *old_dir,
 					 struct dentry *old_dentry,
 					 struct inode *new_dir,
-					 struct dentry *new_dentry)
+					 struct dentry *new_dentry,
+					 unsigned int flags)
 {
 	return 0;
 }
@@ -2955,7 +2957,8 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
 int security_path_link(struct dentry *old_dentry, struct path *new_dir,
 		       struct dentry *new_dentry);
 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
-			 struct path *new_dir, struct dentry *new_dentry);
+			 struct path *new_dir, struct dentry *new_dentry,
+			 unsigned int flags);
 int security_path_chmod(struct path *path, umode_t mode);
 int security_path_chown(struct path *path, kuid_t uid, kgid_t gid);
 int security_path_chroot(struct path *path);
@@ -3003,7 +3006,8 @@ static inline int security_path_link(struct dentry *old_dentry,
 static inline int security_path_rename(struct path *old_dir,
 				       struct dentry *old_dentry,
 				       struct path *new_dir,
-				       struct dentry *new_dentry)
+				       struct dentry *new_dentry,
+				       unsigned int flags)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 919cad93ac82..284fbc99aa9d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -433,7 +433,8 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
 }
 
 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
-			 struct path *new_dir, struct dentry *new_dentry)
+			 struct path *new_dir, struct dentry *new_dentry,
+			 unsigned int flags)
 {
 	if (unlikely(IS_PRIVATE(old_dentry->d_inode) ||
 		     (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode))))
@@ -524,7 +525,8 @@ int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 }
 
 int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
-			   struct inode *new_dir, struct dentry *new_dentry)
+			   struct inode *new_dir, struct dentry *new_dentry,
+			   unsigned int flags)
 {
         if (unlikely(IS_PRIVATE(old_dentry->d_inode) ||
             (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode))))

From 4fd699ae3fbca2ac760137e1d26f98a105f59f05 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 1 Apr 2014 17:08:43 +0200
Subject: [PATCH 07/12] vfs: lock_two_nondirectories: allow directory args

lock_two_nondirectories warned if either of its args was a directory.
Instead just ignore the directory args.  This is needed for locking in
cross rename.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/inode.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 4bcdad3c9361..73779c090f0d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -944,24 +944,22 @@ EXPORT_SYMBOL(unlock_new_inode);
 
 /**
  * lock_two_nondirectories - take two i_mutexes on non-directory objects
+ *
+ * Lock any non-NULL argument that is not a directory.
+ * Zero, one or two objects may be locked by this function.
+ *
  * @inode1: first inode to lock
  * @inode2: second inode to lock
  */
 void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
 {
-	WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
-	if (inode1 == inode2 || !inode2) {
-		mutex_lock(&inode1->i_mutex);
-		return;
-	}
-	WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
-	if (inode1 < inode2) {
+	if (inode1 > inode2)
+		swap(inode1, inode2);
+
+	if (inode1 && !S_ISDIR(inode1->i_mode))
 		mutex_lock(&inode1->i_mutex);
+	if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
 		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2);
-	} else {
-		mutex_lock(&inode2->i_mutex);
-		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_NONDIR2);
-	}
 }
 EXPORT_SYMBOL(lock_two_nondirectories);
 
@@ -972,8 +970,9 @@ EXPORT_SYMBOL(lock_two_nondirectories);
  */
 void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
 {
-	mutex_unlock(&inode1->i_mutex);
-	if (inode2 && inode2 != inode1)
+	if (inode1 && !S_ISDIR(inode1->i_mode))
+		mutex_unlock(&inode1->i_mutex);
+	if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
 		mutex_unlock(&inode2->i_mutex);
 }
 EXPORT_SYMBOL(unlock_two_nondirectories);

From da1ce0670c14d8380e423a3239e562a1dc15fa9e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Apr 2014 17:08:43 +0200
Subject: [PATCH 08/12] vfs: add cross-rename

If flags contain RENAME_EXCHANGE then exchange source and destination files.
There's no restriction on the type of the files; e.g. a directory can be
exchanged with a symlink.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/dcache.c             |  50 +++++++++++++++----
 fs/namei.c              | 104 +++++++++++++++++++++++++++-------------
 include/linux/dcache.h  |   1 +
 include/uapi/linux/fs.h |   1 +
 security/security.c     |  16 +++++++
 5 files changed, 131 insertions(+), 41 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index ca02c13a84aa..66cba5a8a346 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2483,12 +2483,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
 			dentry->d_name.name = dentry->d_iname;
 		} else {
 			/*
-			 * Both are internal.  Just copy target to dentry
+			 * Both are internal.
 			 */
-			memcpy(dentry->d_iname, target->d_name.name,
-					target->d_name.len + 1);
-			dentry->d_name.len = target->d_name.len;
-			return;
+			unsigned int i;
+			BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
+			for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
+				swap(((long *) &dentry->d_iname)[i],
+				     ((long *) &target->d_iname)[i]);
+			}
 		}
 	}
 	swap(dentry->d_name.len, target->d_name.len);
@@ -2545,13 +2547,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry,
  * __d_move - move a dentry
  * @dentry: entry to move
  * @target: new dentry
+ * @exchange: exchange the two dentries
  *
  * Update the dcache to reflect the move of a file name. Negative
  * dcache entries should not be moved in this way. Caller must hold
  * rename_lock, the i_mutex of the source and target directories,
  * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
  */
-static void __d_move(struct dentry * dentry, struct dentry * target)
+static void __d_move(struct dentry *dentry, struct dentry *target,
+		     bool exchange)
 {
 	if (!dentry->d_inode)
 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
@@ -2573,8 +2577,15 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
 	__d_drop(dentry);
 	__d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
 
-	/* Unhash the target: dput() will then get rid of it */
+	/*
+	 * Unhash the target (d_delete() is not usable here).  If exchanging
+	 * the two dentries, then rehash onto the other's hash queue.
+	 */
 	__d_drop(target);
+	if (exchange) {
+		__d_rehash(target,
+			   d_hash(dentry->d_parent, dentry->d_name.hash));
+	}
 
 	list_del(&dentry->d_u.d_child);
 	list_del(&target->d_u.d_child);
@@ -2601,6 +2612,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
 	write_seqcount_end(&dentry->d_seq);
 
 	dentry_unlock_parents_for_move(dentry, target);
+	if (exchange)
+		fsnotify_d_move(target);
 	spin_unlock(&target->d_lock);
 	fsnotify_d_move(dentry);
 	spin_unlock(&dentry->d_lock);
@@ -2618,11 +2631,30 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
 void d_move(struct dentry *dentry, struct dentry *target)
 {
 	write_seqlock(&rename_lock);
-	__d_move(dentry, target);
+	__d_move(dentry, target, false);
 	write_sequnlock(&rename_lock);
 }
 EXPORT_SYMBOL(d_move);
 
+/*
+ * d_exchange - exchange two dentries
+ * @dentry1: first dentry
+ * @dentry2: second dentry
+ */
+void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
+{
+	write_seqlock(&rename_lock);
+
+	WARN_ON(!dentry1->d_inode);
+	WARN_ON(!dentry2->d_inode);
+	WARN_ON(IS_ROOT(dentry1));
+	WARN_ON(IS_ROOT(dentry2));
+
+	__d_move(dentry1, dentry2, true);
+
+	write_sequnlock(&rename_lock);
+}
+
 /**
  * d_ancestor - search for an ancestor
  * @p1: ancestor dentry
@@ -2670,7 +2702,7 @@ static struct dentry *__d_unalias(struct inode *inode,
 	m2 = &alias->d_parent->d_inode->i_mutex;
 out_unalias:
 	if (likely(!d_mountpoint(alias))) {
-		__d_move(alias, dentry);
+		__d_move(alias, dentry, false);
 		ret = alias;
 	}
 out_err:
diff --git a/fs/namei.c b/fs/namei.c
index 4096d589bb3f..c1178880f23c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4031,6 +4031,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	const unsigned char *old_name;
 	struct inode *source = old_dentry->d_inode;
 	struct inode *target = new_dentry->d_inode;
+	bool new_is_dir = false;
+	unsigned max_links = new_dir->i_sb->s_max_links;
 
 	if (source == target)
 		return 0;
@@ -4039,10 +4041,16 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (error)
 		return error;
 
-	if (!target)
+	if (!target) {
 		error = may_create(new_dir, new_dentry);
-	else
-		error = may_delete(new_dir, new_dentry, is_dir);
+	} else {
+		new_is_dir = d_is_dir(new_dentry);
+
+		if (!(flags & RENAME_EXCHANGE))
+			error = may_delete(new_dir, new_dentry, is_dir);
+		else
+			error = may_delete(new_dir, new_dentry, new_is_dir);
+	}
 	if (error)
 		return error;
 
@@ -4056,10 +4064,17 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 * If we are going to change the parent - check write permissions,
 	 * we'll need to flip '..'.
 	 */
-	if (is_dir && new_dir != old_dir) {
-		error = inode_permission(source, MAY_WRITE);
-		if (error)
-			return error;
+	if (new_dir != old_dir) {
+		if (is_dir) {
+			error = inode_permission(source, MAY_WRITE);
+			if (error)
+				return error;
+		}
+		if ((flags & RENAME_EXCHANGE) && new_is_dir) {
+			error = inode_permission(target, MAY_WRITE);
+			if (error)
+				return error;
+		}
 	}
 
 	error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry,
@@ -4069,7 +4084,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
 	dget(new_dentry);
-	if (!is_dir)
+	if (!is_dir || (flags & RENAME_EXCHANGE))
 		lock_two_nondirectories(source, target);
 	else if (target)
 		mutex_lock(&target->i_mutex);
@@ -4078,25 +4093,25 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
 		goto out;
 
-	if (is_dir) {
-		unsigned max_links = new_dir->i_sb->s_max_links;
-
+	if (max_links && new_dir != old_dir) {
 		error = -EMLINK;
-		if (max_links && !target && new_dir != old_dir &&
-		    new_dir->i_nlink >= max_links)
+		if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links)
 			goto out;
-
-		if (target)
-			shrink_dcache_parent(new_dentry);
-	} else {
+		if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir &&
+		    old_dir->i_nlink >= max_links)
+			goto out;
+	}
+	if (is_dir && !(flags & RENAME_EXCHANGE) && target)
+		shrink_dcache_parent(new_dentry);
+	if (!is_dir) {
 		error = try_break_deleg(source, delegated_inode);
 		if (error)
 			goto out;
-		if (target) {
-			error = try_break_deleg(target, delegated_inode);
-			if (error)
-				goto out;
-		}
+	}
+	if (target && !new_is_dir) {
+		error = try_break_deleg(target, delegated_inode);
+		if (error)
+			goto out;
 	}
 	if (!flags) {
 		error = old_dir->i_op->rename(old_dir, old_dentry,
@@ -4108,22 +4123,31 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (error)
 		goto out;
 
-	if (target) {
+	if (!(flags & RENAME_EXCHANGE) && target) {
 		if (is_dir)
 			target->i_flags |= S_DEAD;
 		dont_mount(new_dentry);
 	}
-	if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
-		d_move(old_dentry, new_dentry);
+	if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
+		if (!(flags & RENAME_EXCHANGE))
+			d_move(old_dentry, new_dentry);
+		else
+			d_exchange(old_dentry, new_dentry);
+	}
 out:
-	if (!is_dir)
+	if (!is_dir || (flags & RENAME_EXCHANGE))
 		unlock_two_nondirectories(source, target);
 	else if (target)
 		mutex_unlock(&target->i_mutex);
 	dput(new_dentry);
-	if (!error)
+	if (!error) {
 		fsnotify_move(old_dir, new_dir, old_name, is_dir,
-			      target, old_dentry);
+			      !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
+		if (flags & RENAME_EXCHANGE) {
+			fsnotify_move(new_dir, old_dir, old_dentry->d_name.name,
+				      new_is_dir, NULL, new_dentry);
+		}
+	}
 	fsnotify_oldname_free(old_name);
 
 	return error;
@@ -4143,7 +4167,10 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
 	bool should_retry = false;
 	int error;
 
-	if (flags & ~RENAME_NOREPLACE)
+	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+		return -EINVAL;
+
+	if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE))
 		return -EINVAL;
 
 retry:
@@ -4180,7 +4207,8 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
 
 	oldnd.flags &= ~LOOKUP_PARENT;
 	newnd.flags &= ~LOOKUP_PARENT;
-	newnd.flags |= LOOKUP_RENAME_TARGET;
+	if (!(flags & RENAME_EXCHANGE))
+		newnd.flags |= LOOKUP_RENAME_TARGET;
 
 retry_deleg:
 	trap = lock_rename(new_dir, old_dir);
@@ -4200,12 +4228,23 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
 	error = -EEXIST;
 	if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
 		goto exit5;
+	if (flags & RENAME_EXCHANGE) {
+		error = -ENOENT;
+		if (d_is_negative(new_dentry))
+			goto exit5;
+
+		if (!d_is_dir(new_dentry)) {
+			error = -ENOTDIR;
+			if (newnd.last.name[newnd.last.len])
+				goto exit5;
+		}
+	}
 	/* unless the source is a directory trailing slashes give -ENOTDIR */
 	if (!d_is_dir(old_dentry)) {
 		error = -ENOTDIR;
 		if (oldnd.last.name[oldnd.last.len])
 			goto exit5;
-		if (newnd.last.name[newnd.last.len])
+		if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len])
 			goto exit5;
 	}
 	/* source should not be ancestor of target */
@@ -4213,7 +4252,8 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
 	if (old_dentry == trap)
 		goto exit5;
 	/* target should not be an ancestor of source */
-	error = -ENOTEMPTY;
+	if (!(flags & RENAME_EXCHANGE))
+		error = -ENOTEMPTY;
 	if (new_dentry == trap)
 		goto exit5;
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 3b50cac7ccb3..3b9bfdb83ba6 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -308,6 +308,7 @@ extern void dentry_update_name_case(struct dentry *, struct qstr *);
 
 /* used for rename() and baskets */
 extern void d_move(struct dentry *, struct dentry *);
+extern void d_exchange(struct dentry *, struct dentry *);
 extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
 
 /* appendix may either be NULL or be used for transname suffixes */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 9250f4dd7d96..ca1a11bb4443 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -36,6 +36,7 @@
 #define SEEK_MAX	SEEK_HOLE
 
 #define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
+#define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
 
 struct fstrim_range {
 	__u64 start;
diff --git a/security/security.c b/security/security.c
index 284fbc99aa9d..8b774f362a3d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -439,6 +439,14 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
 	if (unlikely(IS_PRIVATE(old_dentry->d_inode) ||
 		     (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode))))
 		return 0;
+
+	if (flags & RENAME_EXCHANGE) {
+		int err = security_ops->path_rename(new_dir, new_dentry,
+						    old_dir, old_dentry);
+		if (err)
+			return err;
+	}
+
 	return security_ops->path_rename(old_dir, old_dentry, new_dir,
 					 new_dentry);
 }
@@ -531,6 +539,14 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
         if (unlikely(IS_PRIVATE(old_dentry->d_inode) ||
             (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode))))
 		return 0;
+
+	if (flags & RENAME_EXCHANGE) {
+		int err = security_ops->inode_rename(new_dir, new_dentry,
+						     old_dir, old_dentry);
+		if (err)
+			return err;
+	}
+
 	return security_ops->inode_rename(old_dir, old_dentry,
 					   new_dir, new_dentry);
 }

From c0d268c3661efdfc76dde90ff2bc12806410cd0a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Apr 2014 17:08:43 +0200
Subject: [PATCH 09/12] ext4: rename: create ext4_renament structure for local
 vars

Need to split up ext4_rename() into helpers but there are too many local
variables involved, so create a new structure.  This also, apparently,
makes the generated code size slightly smaller.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/namei.c | 211 ++++++++++++++++++++++++++----------------------
 1 file changed, 114 insertions(+), 97 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5f19171b3e1f..7193cea805ff 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3000,6 +3000,22 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
 	return ext4_get_first_inline_block(inode, parent_de, retval);
 }
 
+struct ext4_renament {
+	struct inode *dir;
+	struct dentry *dentry;
+	struct inode *inode;
+
+	/* entry for "dentry" */
+	struct buffer_head *bh;
+	struct ext4_dir_entry_2 *de;
+	int inlined;
+
+	/* entry for ".." in inode if it's a directory */
+	struct buffer_head *dir_bh;
+	struct ext4_dir_entry_2 *parent_de;
+	int dir_inlined;
+};
+
 /*
  * Anybody can rename anything with this: the permission checks are left to the
  * higher-level routines.
@@ -3012,193 +3028,194 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 		       struct inode *new_dir, struct dentry *new_dentry)
 {
 	handle_t *handle = NULL;
-	struct inode *old_inode, *new_inode;
-	struct buffer_head *old_bh, *new_bh, *dir_bh;
-	struct ext4_dir_entry_2 *old_de, *new_de;
+	struct ext4_renament old = {
+		.dir = old_dir,
+		.dentry = old_dentry,
+		.inode = old_dentry->d_inode,
+	};
+	struct ext4_renament new = {
+		.dir = new_dir,
+		.dentry = new_dentry,
+		.inode = new_dentry->d_inode,
+	};
 	int retval;
-	int inlined = 0, new_inlined = 0;
-	struct ext4_dir_entry_2 *parent_de;
 
-	dquot_initialize(old_dir);
-	dquot_initialize(new_dir);
-
-	old_bh = new_bh = dir_bh = NULL;
+	dquot_initialize(old.dir);
+	dquot_initialize(new.dir);
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
-	if (new_dentry->d_inode)
-		dquot_initialize(new_dentry->d_inode);
+	if (new.inode)
+		dquot_initialize(new.inode);
 
-	old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL);
+	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
 	/*
 	 *  Check for inode number is _not_ due to possible IO errors.
 	 *  We might rmdir the source, keep it as pwd of some process
 	 *  and merrily kill the link to whatever was created under the
 	 *  same name. Goodbye sticky bit ;-<
 	 */
-	old_inode = old_dentry->d_inode;
 	retval = -ENOENT;
-	if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino)
+	if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
 		goto end_rename;
 
-	new_inode = new_dentry->d_inode;
-	new_bh = ext4_find_entry(new_dir, &new_dentry->d_name,
-				 &new_de, &new_inlined);
-	if (new_bh) {
-		if (!new_inode) {
-			brelse(new_bh);
-			new_bh = NULL;
+	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
+				 &new.de, &new.inlined);
+	if (new.bh) {
+		if (!new.inode) {
+			brelse(new.bh);
+			new.bh = NULL;
 		}
 	}
-	if (new_inode && !test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
-		ext4_alloc_da_blocks(old_inode);
+	if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC))
+		ext4_alloc_da_blocks(old.inode);
 
-	handle = ext4_journal_start(old_dir, EXT4_HT_DIR,
-		(2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
+	handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
+		(2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
 		 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
-	if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
+	if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
 		ext4_handle_sync(handle);
 
-	if (S_ISDIR(old_inode->i_mode)) {
-		if (new_inode) {
+	if (S_ISDIR(old.inode->i_mode)) {
+		if (new.inode) {
 			retval = -ENOTEMPTY;
-			if (!empty_dir(new_inode))
+			if (!empty_dir(new.inode))
 				goto end_rename;
 		}
 		retval = -EIO;
-		dir_bh = ext4_get_first_dir_block(handle, old_inode,
-						  &retval, &parent_de,
-						  &inlined);
-		if (!dir_bh)
+		old.dir_bh = ext4_get_first_dir_block(handle, old.inode,
+						  &retval, &old.parent_de,
+						  &old.dir_inlined);
+		if (!old.dir_bh)
 			goto end_rename;
-		if (le32_to_cpu(parent_de->inode) != old_dir->i_ino)
+		if (le32_to_cpu(old.parent_de->inode) != old.dir->i_ino)
 			goto end_rename;
 		retval = -EMLINK;
-		if (!new_inode && new_dir != old_dir &&
-		    EXT4_DIR_LINK_MAX(new_dir))
+		if (!new.inode && new.dir != old.dir &&
+		    EXT4_DIR_LINK_MAX(new.dir))
 			goto end_rename;
-		BUFFER_TRACE(dir_bh, "get_write_access");
-		retval = ext4_journal_get_write_access(handle, dir_bh);
+		BUFFER_TRACE(old.dir_bh, "get_write_access");
+		retval = ext4_journal_get_write_access(handle, old.dir_bh);
 		if (retval)
 			goto end_rename;
 	}
-	if (!new_bh) {
-		retval = ext4_add_entry(handle, new_dentry, old_inode);
+	if (!new.bh) {
+		retval = ext4_add_entry(handle, new.dentry, old.inode);
 		if (retval)
 			goto end_rename;
 	} else {
-		BUFFER_TRACE(new_bh, "get write access");
-		retval = ext4_journal_get_write_access(handle, new_bh);
+		BUFFER_TRACE(new.bh, "get write access");
+		retval = ext4_journal_get_write_access(handle, new.bh);
 		if (retval)
 			goto end_rename;
-		new_de->inode = cpu_to_le32(old_inode->i_ino);
-		if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
+		new.de->inode = cpu_to_le32(old.inode->i_ino);
+		if (EXT4_HAS_INCOMPAT_FEATURE(new.dir->i_sb,
 					      EXT4_FEATURE_INCOMPAT_FILETYPE))
-			new_de->file_type = old_de->file_type;
-		new_dir->i_version++;
-		new_dir->i_ctime = new_dir->i_mtime =
-					ext4_current_time(new_dir);
-		ext4_mark_inode_dirty(handle, new_dir);
-		BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
-		if (!new_inlined) {
+			new.de->file_type = old.de->file_type;
+		new.dir->i_version++;
+		new.dir->i_ctime = new.dir->i_mtime =
+					ext4_current_time(new.dir);
+		ext4_mark_inode_dirty(handle, new.dir);
+		BUFFER_TRACE(new.bh, "call ext4_handle_dirty_metadata");
+		if (!new.inlined) {
 			retval = ext4_handle_dirty_dirent_node(handle,
-							       new_dir, new_bh);
+							       new.dir, new.bh);
 			if (unlikely(retval)) {
-				ext4_std_error(new_dir->i_sb, retval);
+				ext4_std_error(new.dir->i_sb, retval);
 				goto end_rename;
 			}
 		}
-		brelse(new_bh);
-		new_bh = NULL;
+		brelse(new.bh);
+		new.bh = NULL;
 	}
 
 	/*
 	 * Like most other Unix systems, set the ctime for inodes on a
 	 * rename.
 	 */
-	old_inode->i_ctime = ext4_current_time(old_inode);
-	ext4_mark_inode_dirty(handle, old_inode);
+	old.inode->i_ctime = ext4_current_time(old.inode);
+	ext4_mark_inode_dirty(handle, old.inode);
 
 	/*
 	 * ok, that's it
 	 */
-	if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
-	    old_de->name_len != old_dentry->d_name.len ||
-	    strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
-	    (retval = ext4_delete_entry(handle, old_dir,
-					old_de, old_bh)) == -ENOENT) {
-		/* old_de could have moved from under us during htree split, so
+	if (le32_to_cpu(old.de->inode) != old.inode->i_ino ||
+	    old.de->name_len != old.dentry->d_name.len ||
+	    strncmp(old.de->name, old.dentry->d_name.name, old.de->name_len) ||
+	    (retval = ext4_delete_entry(handle, old.dir,
+					old.de, old.bh)) == -ENOENT) {
+		/* old.de could have moved from under us during htree split, so
 		 * make sure that we are deleting the right entry.  We might
 		 * also be pointing to a stale entry in the unused part of
-		 * old_bh so just checking inum and the name isn't enough. */
+		 * old.bh so just checking inum and the name isn't enough. */
 		struct buffer_head *old_bh2;
 		struct ext4_dir_entry_2 *old_de2;
 
-		old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name,
+		old_bh2 = ext4_find_entry(old.dir, &old.dentry->d_name,
 					  &old_de2, NULL);
 		if (old_bh2) {
-			retval = ext4_delete_entry(handle, old_dir,
+			retval = ext4_delete_entry(handle, old.dir,
 						   old_de2, old_bh2);
 			brelse(old_bh2);
 		}
 	}
 	if (retval) {
-		ext4_warning(old_dir->i_sb,
+		ext4_warning(old.dir->i_sb,
 				"Deleting old file (%lu), %d, error=%d",
-				old_dir->i_ino, old_dir->i_nlink, retval);
+				old.dir->i_ino, old.dir->i_nlink, retval);
 	}
 
-	if (new_inode) {
-		ext4_dec_count(handle, new_inode);
-		new_inode->i_ctime = ext4_current_time(new_inode);
+	if (new.inode) {
+		ext4_dec_count(handle, new.inode);
+		new.inode->i_ctime = ext4_current_time(new.inode);
 	}
-	old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
-	ext4_update_dx_flag(old_dir);
-	if (dir_bh) {
-		parent_de->inode = cpu_to_le32(new_dir->i_ino);
-		BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
-		if (!inlined) {
-			if (is_dx(old_inode)) {
+	old.dir->i_ctime = old.dir->i_mtime = ext4_current_time(old.dir);
+	ext4_update_dx_flag(old.dir);
+	if (old.dir_bh) {
+		old.parent_de->inode = cpu_to_le32(new.dir->i_ino);
+		BUFFER_TRACE(old.dir_bh, "call ext4_handle_dirty_metadata");
+		if (!old.dir_inlined) {
+			if (is_dx(old.inode)) {
 				retval = ext4_handle_dirty_dx_node(handle,
-								   old_inode,
-								   dir_bh);
+								   old.inode,
+								   old.dir_bh);
 			} else {
 				retval = ext4_handle_dirty_dirent_node(handle,
-							old_inode, dir_bh);
+							old.inode, old.dir_bh);
 			}
 		} else {
-			retval = ext4_mark_inode_dirty(handle, old_inode);
+			retval = ext4_mark_inode_dirty(handle, old.inode);
 		}
 		if (retval) {
-			ext4_std_error(old_dir->i_sb, retval);
+			ext4_std_error(old.dir->i_sb, retval);
 			goto end_rename;
 		}
-		ext4_dec_count(handle, old_dir);
-		if (new_inode) {
+		ext4_dec_count(handle, old.dir);
+		if (new.inode) {
 			/* checked empty_dir above, can't have another parent,
 			 * ext4_dec_count() won't work for many-linked dirs */
-			clear_nlink(new_inode);
+			clear_nlink(new.inode);
 		} else {
-			ext4_inc_count(handle, new_dir);
-			ext4_update_dx_flag(new_dir);
-			ext4_mark_inode_dirty(handle, new_dir);
+			ext4_inc_count(handle, new.dir);
+			ext4_update_dx_flag(new.dir);
+			ext4_mark_inode_dirty(handle, new.dir);
 		}
 	}
-	ext4_mark_inode_dirty(handle, old_dir);
-	if (new_inode) {
-		ext4_mark_inode_dirty(handle, new_inode);
-		if (!new_inode->i_nlink)
-			ext4_orphan_add(handle, new_inode);
+	ext4_mark_inode_dirty(handle, old.dir);
+	if (new.inode) {
+		ext4_mark_inode_dirty(handle, new.inode);
+		if (!new.inode->i_nlink)
+			ext4_orphan_add(handle, new.inode);
 	}
 	retval = 0;
 
 end_rename:
-	brelse(dir_bh);
-	brelse(old_bh);
-	brelse(new_bh);
+	brelse(old.dir_bh);
+	brelse(old.bh);
+	brelse(new.bh);
 	if (handle)
 		ext4_journal_stop(handle);
 	return retval;

From 0d7d5d678bf9e07dffe22b018cf035d511d9e86e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Apr 2014 17:08:44 +0200
Subject: [PATCH 10/12] ext4: rename: move EMLINK check up

Move checking i_nlink from after ext4_get_first_dir_block() to before.  The
check doesn't rely on the result of that function and the function only
fails on fs corruption, so the order shouldn't matter.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/namei.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 7193cea805ff..87a8a6e613ba 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3084,6 +3084,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 			retval = -ENOTEMPTY;
 			if (!empty_dir(new.inode))
 				goto end_rename;
+		} else {
+			retval = -EMLINK;
+			if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
+				goto end_rename;
 		}
 		retval = -EIO;
 		old.dir_bh = ext4_get_first_dir_block(handle, old.inode,
@@ -3093,10 +3097,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 			goto end_rename;
 		if (le32_to_cpu(old.parent_de->inode) != old.dir->i_ino)
 			goto end_rename;
-		retval = -EMLINK;
-		if (!new.inode && new.dir != old.dir &&
-		    EXT4_DIR_LINK_MAX(new.dir))
-			goto end_rename;
 		BUFFER_TRACE(old.dir_bh, "get_write_access");
 		retval = ext4_journal_get_write_access(handle, old.dir_bh);
 		if (retval)

From bd1af145b99311242673b32dff4599ce614352be Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Apr 2014 17:08:44 +0200
Subject: [PATCH 11/12] ext4: rename: split out helper functions

Cross rename (exchange source and dest) will need to call some of these
helpers for both source and dest, while overwriting rename currently only
calls them for one or the other.  This also makes the code easier to
follow.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/namei.c | 199 ++++++++++++++++++++++++++++++------------------
 1 file changed, 126 insertions(+), 73 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 87a8a6e613ba..75f1bde43dcc 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3016,6 +3016,125 @@ struct ext4_renament {
 	int dir_inlined;
 };
 
+static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent)
+{
+	int retval;
+
+	ent->dir_bh = ext4_get_first_dir_block(handle, ent->inode,
+					      &retval, &ent->parent_de,
+					      &ent->dir_inlined);
+	if (!ent->dir_bh)
+		return retval;
+	if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino)
+		return -EIO;
+	BUFFER_TRACE(ent->dir_bh, "get_write_access");
+	return ext4_journal_get_write_access(handle, ent->dir_bh);
+}
+
+static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
+				  unsigned dir_ino)
+{
+	int retval;
+
+	ent->parent_de->inode = cpu_to_le32(dir_ino);
+	BUFFER_TRACE(ent->dir_bh, "call ext4_handle_dirty_metadata");
+	if (!ent->dir_inlined) {
+		if (is_dx(ent->inode)) {
+			retval = ext4_handle_dirty_dx_node(handle,
+							   ent->inode,
+							   ent->dir_bh);
+		} else {
+			retval = ext4_handle_dirty_dirent_node(handle,
+							       ent->inode,
+							       ent->dir_bh);
+		}
+	} else {
+		retval = ext4_mark_inode_dirty(handle, ent->inode);
+	}
+	if (retval) {
+		ext4_std_error(ent->dir->i_sb, retval);
+		return retval;
+	}
+	return 0;
+}
+
+static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
+		       unsigned ino, unsigned file_type)
+{
+	int retval;
+
+	BUFFER_TRACE(ent->bh, "get write access");
+	retval = ext4_journal_get_write_access(handle, ent->bh);
+	if (retval)
+		return retval;
+	ent->de->inode = cpu_to_le32(ino);
+	if (EXT4_HAS_INCOMPAT_FEATURE(ent->dir->i_sb,
+				      EXT4_FEATURE_INCOMPAT_FILETYPE))
+		ent->de->file_type = file_type;
+	ent->dir->i_version++;
+	ent->dir->i_ctime = ent->dir->i_mtime =
+		ext4_current_time(ent->dir);
+	ext4_mark_inode_dirty(handle, ent->dir);
+	BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
+	if (!ent->inlined) {
+		retval = ext4_handle_dirty_dirent_node(handle,
+						       ent->dir, ent->bh);
+		if (unlikely(retval)) {
+			ext4_std_error(ent->dir->i_sb, retval);
+			return retval;
+		}
+	}
+	brelse(ent->bh);
+	ent->bh = NULL;
+
+	return 0;
+}
+
+static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
+				  const struct qstr *d_name)
+{
+	int retval = -ENOENT;
+	struct buffer_head *bh;
+	struct ext4_dir_entry_2 *de;
+
+	bh = ext4_find_entry(dir, d_name, &de, NULL);
+	if (bh) {
+		retval = ext4_delete_entry(handle, dir, de, bh);
+		brelse(bh);
+	}
+	return retval;
+}
+
+static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent)
+{
+	int retval;
+	/*
+	 * ent->de could have moved from under us during htree split, so make
+	 * sure that we are deleting the right entry.  We might also be pointing
+	 * to a stale entry in the unused part of ent->bh so just checking inum
+	 * and the name isn't enough.
+	 */
+	if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino ||
+	    ent->de->name_len != ent->dentry->d_name.len ||
+	    strncmp(ent->de->name, ent->dentry->d_name.name,
+		    ent->de->name_len)) {
+		retval = ext4_find_delete_entry(handle, ent->dir,
+						&ent->dentry->d_name);
+	} else {
+		retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh);
+		if (retval == -ENOENT) {
+			retval = ext4_find_delete_entry(handle, ent->dir,
+							&ent->dentry->d_name);
+		}
+	}
+
+	if (retval) {
+		ext4_warning(ent->dir->i_sb,
+				"Deleting old file (%lu), %d, error=%d",
+				ent->dir->i_ino, ent->dir->i_nlink, retval);
+	}
+}
+
 /*
  * Anybody can rename anything with this: the permission checks are left to the
  * higher-level routines.
@@ -3089,16 +3208,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 			if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
 				goto end_rename;
 		}
-		retval = -EIO;
-		old.dir_bh = ext4_get_first_dir_block(handle, old.inode,
-						  &retval, &old.parent_de,
-						  &old.dir_inlined);
-		if (!old.dir_bh)
-			goto end_rename;
-		if (le32_to_cpu(old.parent_de->inode) != old.dir->i_ino)
-			goto end_rename;
-		BUFFER_TRACE(old.dir_bh, "get_write_access");
-		retval = ext4_journal_get_write_access(handle, old.dir_bh);
+		retval = ext4_rename_dir_prepare(handle, &old);
 		if (retval)
 			goto end_rename;
 	}
@@ -3107,29 +3217,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 		if (retval)
 			goto end_rename;
 	} else {
-		BUFFER_TRACE(new.bh, "get write access");
-		retval = ext4_journal_get_write_access(handle, new.bh);
+		retval = ext4_setent(handle, &new,
+				     old.inode->i_ino, old.de->file_type);
 		if (retval)
 			goto end_rename;
-		new.de->inode = cpu_to_le32(old.inode->i_ino);
-		if (EXT4_HAS_INCOMPAT_FEATURE(new.dir->i_sb,
-					      EXT4_FEATURE_INCOMPAT_FILETYPE))
-			new.de->file_type = old.de->file_type;
-		new.dir->i_version++;
-		new.dir->i_ctime = new.dir->i_mtime =
-					ext4_current_time(new.dir);
-		ext4_mark_inode_dirty(handle, new.dir);
-		BUFFER_TRACE(new.bh, "call ext4_handle_dirty_metadata");
-		if (!new.inlined) {
-			retval = ext4_handle_dirty_dirent_node(handle,
-							       new.dir, new.bh);
-			if (unlikely(retval)) {
-				ext4_std_error(new.dir->i_sb, retval);
-				goto end_rename;
-			}
-		}
-		brelse(new.bh);
-		new.bh = NULL;
 	}
 
 	/*
@@ -3142,31 +3233,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	/*
 	 * ok, that's it
 	 */
-	if (le32_to_cpu(old.de->inode) != old.inode->i_ino ||
-	    old.de->name_len != old.dentry->d_name.len ||
-	    strncmp(old.de->name, old.dentry->d_name.name, old.de->name_len) ||
-	    (retval = ext4_delete_entry(handle, old.dir,
-					old.de, old.bh)) == -ENOENT) {
-		/* old.de could have moved from under us during htree split, so
-		 * make sure that we are deleting the right entry.  We might
-		 * also be pointing to a stale entry in the unused part of
-		 * old.bh so just checking inum and the name isn't enough. */
-		struct buffer_head *old_bh2;
-		struct ext4_dir_entry_2 *old_de2;
-
-		old_bh2 = ext4_find_entry(old.dir, &old.dentry->d_name,
-					  &old_de2, NULL);
-		if (old_bh2) {
-			retval = ext4_delete_entry(handle, old.dir,
-						   old_de2, old_bh2);
-			brelse(old_bh2);
-		}
-	}
-	if (retval) {
-		ext4_warning(old.dir->i_sb,
-				"Deleting old file (%lu), %d, error=%d",
-				old.dir->i_ino, old.dir->i_nlink, retval);
-	}
+	ext4_rename_delete(handle, &old);
 
 	if (new.inode) {
 		ext4_dec_count(handle, new.inode);
@@ -3175,24 +3242,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	old.dir->i_ctime = old.dir->i_mtime = ext4_current_time(old.dir);
 	ext4_update_dx_flag(old.dir);
 	if (old.dir_bh) {
-		old.parent_de->inode = cpu_to_le32(new.dir->i_ino);
-		BUFFER_TRACE(old.dir_bh, "call ext4_handle_dirty_metadata");
-		if (!old.dir_inlined) {
-			if (is_dx(old.inode)) {
-				retval = ext4_handle_dirty_dx_node(handle,
-								   old.inode,
-								   old.dir_bh);
-			} else {
-				retval = ext4_handle_dirty_dirent_node(handle,
-							old.inode, old.dir_bh);
-			}
-		} else {
-			retval = ext4_mark_inode_dirty(handle, old.inode);
-		}
-		if (retval) {
-			ext4_std_error(old.dir->i_sb, retval);
+		retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
+		if (retval)
 			goto end_rename;
-		}
+
 		ext4_dec_count(handle, old.dir);
 		if (new.inode) {
 			/* checked empty_dir above, can't have another parent,

From bd42998a6bcb9b1708dac9ca9876e3d304c16f3d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Apr 2014 17:08:44 +0200
Subject: [PATCH 12/12] ext4: add cross rename support

Implement RENAME_EXCHANGE flag in renameat2 syscall.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/namei.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 138 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 75f1bde43dcc..1cb84f78909e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3004,6 +3004,8 @@ struct ext4_renament {
 	struct inode *dir;
 	struct dentry *dentry;
 	struct inode *inode;
+	bool is_dir;
+	int dir_nlink_delta;
 
 	/* entry for "dentry" */
 	struct buffer_head *bh;
@@ -3135,6 +3137,17 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent)
 	}
 }
 
+static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
+{
+	if (ent->dir_nlink_delta) {
+		if (ent->dir_nlink_delta == -1)
+			ext4_dec_count(handle, ent->dir);
+		else
+			ext4_inc_count(handle, ent->dir);
+		ext4_mark_inode_dirty(handle, ent->dir);
+	}
+}
+
 /*
  * Anybody can rename anything with this: the permission checks are left to the
  * higher-level routines.
@@ -3274,13 +3287,137 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return retval;
 }
 
+static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
+			     struct inode *new_dir, struct dentry *new_dentry)
+{
+	handle_t *handle = NULL;
+	struct ext4_renament old = {
+		.dir = old_dir,
+		.dentry = old_dentry,
+		.inode = old_dentry->d_inode,
+	};
+	struct ext4_renament new = {
+		.dir = new_dir,
+		.dentry = new_dentry,
+		.inode = new_dentry->d_inode,
+	};
+	u8 new_file_type;
+	int retval;
+
+	dquot_initialize(old.dir);
+	dquot_initialize(new.dir);
+
+	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
+				 &old.de, &old.inlined);
+	/*
+	 *  Check for inode number is _not_ due to possible IO errors.
+	 *  We might rmdir the source, keep it as pwd of some process
+	 *  and merrily kill the link to whatever was created under the
+	 *  same name. Goodbye sticky bit ;-<
+	 */
+	retval = -ENOENT;
+	if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
+		goto end_rename;
+
+	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
+				 &new.de, &new.inlined);
+
+	/* RENAME_EXCHANGE case: old *and* new must both exist */
+	if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
+		goto end_rename;
+
+	handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
+		(2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
+		 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
+		ext4_handle_sync(handle);
+
+	if (S_ISDIR(old.inode->i_mode)) {
+		old.is_dir = true;
+		retval = ext4_rename_dir_prepare(handle, &old);
+		if (retval)
+			goto end_rename;
+	}
+	if (S_ISDIR(new.inode->i_mode)) {
+		new.is_dir = true;
+		retval = ext4_rename_dir_prepare(handle, &new);
+		if (retval)
+			goto end_rename;
+	}
+
+	/*
+	 * Other than the special case of overwriting a directory, parents'
+	 * nlink only needs to be modified if this is a cross directory rename.
+	 */
+	if (old.dir != new.dir && old.is_dir != new.is_dir) {
+		old.dir_nlink_delta = old.is_dir ? -1 : 1;
+		new.dir_nlink_delta = -old.dir_nlink_delta;
+		retval = -EMLINK;
+		if ((old.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(old.dir)) ||
+		    (new.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(new.dir)))
+			goto end_rename;
+	}
+
+	new_file_type = new.de->file_type;
+	retval = ext4_setent(handle, &new, old.inode->i_ino, old.de->file_type);
+	if (retval)
+		goto end_rename;
+
+	retval = ext4_setent(handle, &old, new.inode->i_ino, new_file_type);
+	if (retval)
+		goto end_rename;
+
+	/*
+	 * Like most other Unix systems, set the ctime for inodes on a
+	 * rename.
+	 */
+	old.inode->i_ctime = ext4_current_time(old.inode);
+	new.inode->i_ctime = ext4_current_time(new.inode);
+	ext4_mark_inode_dirty(handle, old.inode);
+	ext4_mark_inode_dirty(handle, new.inode);
+
+	if (old.dir_bh) {
+		retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
+		if (retval)
+			goto end_rename;
+	}
+	if (new.dir_bh) {
+		retval = ext4_rename_dir_finish(handle, &new, old.dir->i_ino);
+		if (retval)
+			goto end_rename;
+	}
+	ext4_update_dir_count(handle, &old);
+	ext4_update_dir_count(handle, &new);
+	retval = 0;
+
+end_rename:
+	brelse(old.dir_bh);
+	brelse(new.dir_bh);
+	brelse(old.bh);
+	brelse(new.bh);
+	if (handle)
+		ext4_journal_stop(handle);
+	return retval;
+}
+
 static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
 			struct inode *new_dir, struct dentry *new_dentry,
 			unsigned int flags)
 {
-	if (flags & ~RENAME_NOREPLACE)
+	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
 		return -EINVAL;
 
+	if (flags & RENAME_EXCHANGE) {
+		return ext4_cross_rename(old_dir, old_dentry,
+					 new_dir, new_dentry);
+	}
+	/*
+	 * Existence checking was done by the VFS, otherwise "RENAME_NOREPLACE"
+	 * is equivalent to regular rename.
+	 */
 	return ext4_rename(old_dir, old_dentry, new_dir, new_dentry);
 }