From 2853908a59603118f854b813da055c26257dfa4f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 May 2016 13:49:56 -0400 Subject: [PATCH 01/26] undo "fs: allow d_instantiate to be called with negative parent dentry" Background: spufs used to mangle the order in which it had been building dentry trees. It was broken in a lot of ways, but most of them required the right timing to trigger until an fsnotify change had added one more - the one that was always triggered. Unfortunately, insteading of fixing their long-standing bug the spufs folks had chosen to paper over the fsnotify trigger. Eventually said bug had been spotted and killed off, but the pointless check in fsnotify has remained, complete with the implication that one *could* do that kind of crap. Again, a parent of any dentry should always be positive. Any code can rely upon that and anything violating that assert is a bug, *not* something to be accomodated. Signed-off-by: Al Viro --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 29f917517299..d188a107c562 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -281,7 +281,7 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) * us with the new state. */ parent = dentry->d_parent; - if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode)) + if (fsnotify_inode_watches_children(parent->d_inode)) dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; else dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; From 07a8e62fde3c17e3d25e397cb5028176756fa316 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 May 2016 14:52:22 -0400 Subject: [PATCH 02/26] drbd: ->d_parent is never NULL or negative Signed-off-by: Al Viro --- drivers/block/drbd/drbd_debugfs.c | 4 ---- security/inode.c | 4 ---- 2 files changed, 8 deletions(-) diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index 4de95bbff486..71cdce707ff2 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c @@ -430,9 +430,6 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo /* Are we still linked, * or has debugfs_remove() already been called? */ parent = file->f_path.dentry->d_parent; - /* not sure if this can happen: */ - if (!parent || d_really_is_negative(parent)) - goto out; /* serialize with d_delete() */ inode_lock(d_inode(parent)); /* Make sure the object is still alive */ @@ -445,7 +442,6 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo if (ret) kref_put(kref, release); } -out: return ret; } diff --git a/security/inode.c b/security/inode.c index 28414b0207ce..526f82269cb0 100644 --- a/security/inode.c +++ b/security/inode.c @@ -191,10 +191,6 @@ void securityfs_remove(struct dentry *dentry) if (!dentry || IS_ERR(dentry)) return; - parent = dentry->d_parent; - if (!parent || d_really_is_negative(parent)) - return; - inode_lock(d_inode(parent)); if (simple_positive(dentry)) { if (d_is_dir(dentry)) From 4093d306a91f240f613f8b2c7c6843f4414adf8c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 May 2016 14:54:04 -0400 Subject: [PATCH 03/26] securityfs: ->d_parent is never NULL or negative Signed-off-by: Al Viro --- security/inode.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/security/inode.c b/security/inode.c index 526f82269cb0..e3df905ab5b1 100644 --- a/security/inode.c +++ b/security/inode.c @@ -186,20 +186,21 @@ EXPORT_SYMBOL_GPL(securityfs_create_dir); */ void securityfs_remove(struct dentry *dentry) { - struct dentry *parent; + struct inode *dir; if (!dentry || IS_ERR(dentry)) return; - inode_lock(d_inode(parent)); + dir = d_inode(dentry->d_parent); + inode_lock(dir); if (simple_positive(dentry)) { if (d_is_dir(dentry)) - simple_rmdir(d_inode(parent), dentry); + simple_rmdir(dir, dentry); else - simple_unlink(d_inode(parent), dentry); + simple_unlink(dir, dentry); dput(dentry); } - inode_unlock(d_inode(parent)); + inode_unlock(dir); simple_release_fs(&mount, &mount_count); } EXPORT_SYMBOL_GPL(securityfs_remove); From 0ba3353c2218f5311a163944e8ce347d6503c188 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 May 2016 14:57:53 -0400 Subject: [PATCH 04/26] tracefs: ->d_parent is never NULL or negative... Signed-off-by: Al Viro --- fs/tracefs/inode.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 4a0e48f92104..ad40b64c5e2f 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -541,9 +541,6 @@ void tracefs_remove(struct dentry *dentry) return; parent = dentry->d_parent; - if (!parent || !parent->d_inode) - return; - inode_lock(parent->d_inode); ret = __tracefs_remove(dentry, parent); inode_unlock(parent->d_inode); @@ -566,10 +563,6 @@ void tracefs_remove_recursive(struct dentry *dentry) if (IS_ERR_OR_NULL(dentry)) return; - parent = dentry->d_parent; - if (!parent || !parent->d_inode) - return; - parent = dentry; down: inode_lock(parent->d_inode); From 96b0cffbafb4671ac6d07b83a79b30077fe91525 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 May 2016 15:00:34 -0400 Subject: [PATCH 05/26] orangefs: don't open-code %pd2 Signed-off-by: Al Viro --- fs/orangefs/namei.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 5a60c508af4e..930894ae7e1d 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -405,12 +405,8 @@ static int orangefs_rename(struct inode *old_dir, int ret; gossip_debug(GOSSIP_NAME_DEBUG, - "orangefs_rename: called (%s/%s => %s/%s) ct=%d\n", - old_dentry->d_parent->d_name.name, - old_dentry->d_name.name, - new_dentry->d_parent->d_name.name, - new_dentry->d_name.name, - d_count(new_dentry)); + "orangefs_rename: called (%pd2 => %pd2) ct=%d\n", + old_dentry, new_dentry, d_count(new_dentry)); new_op = op_alloc(ORANGEFS_VFS_OP_RENAME); if (!new_op) From bf1309142088e92952a8a6e97288a6c26243154d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 May 2016 15:14:14 -0400 Subject: [PATCH 06/26] nfs_lookup(): remove debris left over from old sillyunlink exclusion Signed-off-by: Al Viro --- fs/nfs/dir.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index aaf7bd0cbae2..8be0a2438a93 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1363,7 +1363,6 @@ EXPORT_SYMBOL_GPL(nfs_dentry_operations); struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *res; - struct dentry *parent; struct inode *inode = NULL; struct nfs_fh *fhandle = NULL; struct nfs_fattr *fattr = NULL; @@ -1393,20 +1392,18 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in if (IS_ERR(label)) goto out; - parent = dentry->d_parent; - /* Protect against concurrent sillydeletes */ trace_nfs_lookup_enter(dir, dentry, flags); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); if (error == -ENOENT) goto no_entry; if (error < 0) { res = ERR_PTR(error); - goto out_unblock_sillyrename; + goto out_label; } inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); res = ERR_CAST(inode); if (IS_ERR(res)) - goto out_unblock_sillyrename; + goto out_label; /* Success: notify readdir to use READDIRPLUS */ nfs_advise_use_readdirplus(dir); @@ -1415,11 +1412,11 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in res = d_splice_alias(inode, dentry); if (res != NULL) { if (IS_ERR(res)) - goto out_unblock_sillyrename; + goto out_label; dentry = res; } nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); -out_unblock_sillyrename: +out_label: trace_nfs_lookup_exit(dir, dentry, flags, error); nfs4_label_free(label); out: From acc29fb8f7921ac85828021cf884579957446d3b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 May 2016 15:20:30 -0400 Subject: [PATCH 07/26] debugfs: ->d_parent is never NULL or negative Signed-off-by: Al Viro --- fs/debugfs/inode.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 4bc1f68243c1..72361baf9da7 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -621,9 +621,6 @@ void debugfs_remove(struct dentry *dentry) return; parent = dentry->d_parent; - if (!parent || d_really_is_negative(parent)) - return; - inode_lock(d_inode(parent)); ret = __debugfs_remove(dentry, parent); inode_unlock(d_inode(parent)); @@ -654,10 +651,6 @@ void debugfs_remove_recursive(struct dentry *dentry) if (IS_ERR_OR_NULL(dentry)) return; - parent = dentry->d_parent; - if (!parent || d_really_is_negative(parent)) - return; - parent = dentry; down: inode_lock(d_inode(parent)); From 77d5a6b7d9924e2ebb96c6e80bb9fc78615e41b5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 May 2016 15:29:26 -0400 Subject: [PATCH 08/26] 9p: new helper - v9fs_parent_fid() Signed-off-by: Al Viro --- fs/9p/fid.h | 4 ++++ fs/9p/vfs_inode.c | 10 +++++----- fs/9p/vfs_inode_dotl.c | 16 +++++----------- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 2b6787fcb626..12700df0bb51 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -24,6 +24,10 @@ #include struct p9_fid *v9fs_fid_lookup(struct dentry *dentry); +static inline struct p9_fid *v9fs_parent_fid(struct dentry *dentry) +{ + return v9fs_fid_lookup(dentry->d_parent); +} struct p9_fid *v9fs_fid_clone(struct dentry *dentry); void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid); struct p9_fid *v9fs_writeback_fid(struct dentry *dentry); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index f4645c515262..cca44862f2cf 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -595,7 +595,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) v9ses = v9fs_inode2v9ses(dir); inode = d_inode(dentry); - dfid = v9fs_fid_lookup(dentry->d_parent); + dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { retval = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", retval); @@ -653,7 +653,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, ofid = NULL; fid = NULL; name = (char *) dentry->d_name.name; - dfid = v9fs_fid_lookup(dentry->d_parent); + dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); @@ -798,7 +798,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, v9ses = v9fs_inode2v9ses(dir); /* We can walk d_parent because we hold the dir->i_mutex */ - dfid = v9fs_fid_lookup(dentry->d_parent); + dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) return ERR_CAST(dfid); @@ -975,13 +975,13 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_ERR(oldfid)) return PTR_ERR(oldfid); - olddirfid = v9fs_fid_clone(old_dentry->d_parent); + olddirfid = v9fs_parent_fid(old_dentry); if (IS_ERR(olddirfid)) { retval = PTR_ERR(olddirfid); goto done; } - newdirfid = v9fs_fid_clone(new_dentry->d_parent); + newdirfid = v9fs_parent_fid(new_dentry); if (IS_ERR(newdirfid)) { retval = PTR_ERR(newdirfid); goto clunk_olddir; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a34702c998f5..7432e1615311 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -273,7 +273,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n", name, flags, omode); - dfid = v9fs_fid_lookup(dentry->d_parent); + dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); @@ -389,7 +389,6 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, umode_t mode; struct inode *inode; struct p9_qid qid; - struct dentry *dir_dentry; struct posix_acl *dacl = NULL, *pacl = NULL; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); @@ -400,8 +399,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, if (dir->i_mode & S_ISGID) omode |= S_ISGID; - dir_dentry = dentry->d_parent; - dfid = v9fs_fid_lookup(dir_dentry); + dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); @@ -691,7 +689,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, p9_debug(P9_DEBUG_VFS, "%lu,%s,%s\n", dir->i_ino, name, symname); v9ses = v9fs_inode2v9ses(dir); - dfid = v9fs_fid_lookup(dentry->d_parent); + dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); @@ -762,7 +760,6 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { int err; - struct dentry *dir_dentry; struct p9_fid *dfid, *oldfid; struct v9fs_session_info *v9ses; @@ -770,8 +767,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, dir->i_ino, old_dentry, dentry); v9ses = v9fs_inode2v9ses(dir); - dir_dentry = dentry->d_parent; - dfid = v9fs_fid_lookup(dir_dentry); + dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) return PTR_ERR(dfid); @@ -822,7 +818,6 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, struct p9_fid *fid = NULL, *dfid = NULL; struct inode *inode; struct p9_qid qid; - struct dentry *dir_dentry; struct posix_acl *dacl = NULL, *pacl = NULL; p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n", @@ -830,8 +825,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, MAJOR(rdev), MINOR(rdev)); v9ses = v9fs_inode2v9ses(dir); - dir_dentry = dentry->d_parent; - dfid = v9fs_fid_lookup(dir_dentry); + dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); From affda48410a5bbfd516def60bbc97f2683cd9f7b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 May 2016 18:35:12 -0400 Subject: [PATCH 09/26] trim fsnotify hooks a bit fsnotify_d_move()/__fsnotify_d_instantiate()/__fsnotify_update_dcache_flags() are identical to each other, regardless of the config. Signed-off-by: Al Viro --- fs/dcache.c | 8 ++++---- include/linux/fsnotify.h | 12 ------------ include/linux/fsnotify_backend.h | 20 +++----------------- 3 files changed, 7 insertions(+), 33 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index ad4a542e9bab..f9c63c108881 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1769,7 +1769,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); - __fsnotify_d_instantiate(dentry); + fsnotify_update_flags(dentry); spin_unlock(&dentry->d_lock); } @@ -2563,7 +2563,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode) raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); - __fsnotify_d_instantiate(dentry); + fsnotify_update_flags(dentry); } _d_rehash(dentry); if (dir) @@ -2853,8 +2853,8 @@ static void __d_move(struct dentry *dentry, struct dentry *target, list_move(&target->d_child, &target->d_parent->d_subdirs); list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); if (exchange) - fsnotify_d_move(target); - fsnotify_d_move(dentry); + fsnotify_update_flags(target); + fsnotify_update_flags(dentry); } write_seqcount_end(&target->d_seq); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 0141f257d67b..eed9e853a06f 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -51,18 +51,6 @@ static inline int fsnotify_perm(struct file *file, int mask) return fsnotify(inode, fsnotify_mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); } -/* - * fsnotify_d_move - dentry has been moved - */ -static inline void fsnotify_d_move(struct dentry *dentry) -{ - /* - * On move we need to update dentry->d_flags to indicate if the new parent - * cares about events from this dentry. - */ - __fsnotify_update_dcache_flags(dentry); -} - /* * fsnotify_link_count - inode's link count changed */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d188a107c562..58205f33af02 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -267,10 +267,8 @@ static inline int fsnotify_inode_watches_children(struct inode *inode) * Update the dentry with a flag indicating the interest of its parent to receive * filesystem events when those events happens to this dentry->d_inode. */ -static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) +static inline void fsnotify_update_flags(struct dentry *dentry) { - struct dentry *parent; - assert_spin_locked(&dentry->d_lock); /* @@ -280,21 +278,12 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) * find our entry, so it will spin until we complete here, and update * us with the new state. */ - parent = dentry->d_parent; - if (fsnotify_inode_watches_children(parent->d_inode)) + if (fsnotify_inode_watches_children(dentry->d_parent->d_inode)) dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; else dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; } -/* - * fsnotify_d_instantiate - instantiate a dentry for inode - */ -static inline void __fsnotify_d_instantiate(struct dentry *dentry) -{ - __fsnotify_update_dcache_flags(dentry); -} - /* called from fsnotify listeners, such as fanotify or dnotify */ /* create a new group */ @@ -386,10 +375,7 @@ static inline void __fsnotify_inode_delete(struct inode *inode) static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) {} -static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) -{} - -static inline void __fsnotify_d_instantiate(struct dentry *dentry) +static inline void fsnotify_update_flags(struct dentry *dentry) {} static inline u32 fsnotify_get_cookie(void) From 93c76a3d437ff71fabe1d190a9f00e92cec7a621 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Dec 2015 23:45:44 -0500 Subject: [PATCH 10/26] file_inode(f)->i_mapping is f->f_mapping Signed-off-by: Al Viro --- drivers/gpu/drm/armada/armada_gem.c | 4 ++-- drivers/gpu/drm/drm_gem.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 10 +++++----- drivers/gpu/drm/omapdrm/omap_gem.c | 2 +- drivers/gpu/drm/ttm/ttm_tt.c | 4 ++-- fs/hfs/inode.c | 2 +- fs/hfsplus/inode.c | 2 +- fs/nfs/dir.c | 2 +- fs/ocfs2/aops.c | 2 +- mm/hugetlb.c | 2 +- 11 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c index 88e7fc797721..cb8f0347b934 100644 --- a/drivers/gpu/drm/armada/armada_gem.c +++ b/drivers/gpu/drm/armada/armada_gem.c @@ -231,7 +231,7 @@ struct armada_gem_object *armada_gem_alloc_object(struct drm_device *dev, obj->dev_addr = DMA_ERROR_CODE; - mapping = file_inode(obj->obj.filp)->i_mapping; + mapping = obj->obj.filp->f_mapping; mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE); DRM_DEBUG_DRIVER("alloc obj %p size %zu\n", obj, size); @@ -441,7 +441,7 @@ armada_gem_prime_map_dma_buf(struct dma_buf_attachment *attach, if (sg_alloc_table(sgt, count, GFP_KERNEL)) goto free_sgt; - mapping = file_inode(dobj->obj.filp)->i_mapping; + mapping = dobj->obj.filp->f_mapping; for_each_sg(sgt->sgl, sg, count, i) { struct page *page; diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 32156060b9c9..ad89db36ca25 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -511,7 +511,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj) int i, npages; /* This is the shared memory object that backs the GEM resource */ - mapping = file_inode(obj->filp)->i_mapping; + mapping = obj->filp->f_mapping; /* We already BUG_ON() for non-page-aligned sizes in * drm_gem_object_init(), so we should never hit this unless diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index df9bcbab922f..8c6f750634af 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -660,7 +660,7 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev, * why this is required _and_ expected if you're * going to pin these pages. */ - mapping = file_inode(obj->filp)->i_mapping; + mapping = obj->filp->f_mapping; mapping_set_gfp_mask(mapping, GFP_HIGHUSER); } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index aad26851cee3..ed6117a0ee84 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -151,7 +151,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) { - struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; + struct address_space *mapping = obj->base.filp->f_mapping; char *vaddr = obj->phys_handle->vaddr; struct sg_table *st; struct scatterlist *sg; @@ -218,7 +218,7 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) obj->dirty = 0; if (obj->dirty) { - struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; + struct address_space *mapping = obj->base.filp->f_mapping; char *vaddr = obj->phys_handle->vaddr; int i; @@ -2155,7 +2155,7 @@ i915_gem_object_invalidate(struct drm_i915_gem_object *obj) if (obj->base.filp == NULL) return; - mapping = file_inode(obj->base.filp)->i_mapping, + mapping = obj->base.filp->f_mapping, invalidate_mapping_pages(mapping, 0, (loff_t)-1); } @@ -2271,7 +2271,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) * * Fail silently without starting the shrinker */ - mapping = file_inode(obj->base.filp)->i_mapping; + mapping = obj->base.filp->f_mapping; gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM)); gfp |= __GFP_NORETRY | __GFP_NOWARN; sg = st->sgl; @@ -4522,7 +4522,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, mask |= __GFP_DMA32; } - mapping = file_inode(obj->base.filp)->i_mapping; + mapping = obj->base.filp->f_mapping; mapping_set_gfp_mask(mapping, mask); i915_gem_object_init(obj, &i915_gem_object_ops); diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index b97afc281778..dfb36f3c9c35 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -1406,7 +1406,7 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, if (ret) goto err_free; - mapping = file_inode(obj->filp)->i_mapping; + mapping = obj->filp->f_mapping; mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32); } diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 077ae9b2865d..97542c35d6ef 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -298,7 +298,7 @@ int ttm_tt_swapin(struct ttm_tt *ttm) swap_storage = ttm->swap_storage; BUG_ON(swap_storage == NULL); - swap_space = file_inode(swap_storage)->i_mapping; + swap_space = swap_storage->f_mapping; for (i = 0; i < ttm->num_pages; ++i) { from_page = shmem_read_mapping_page(swap_space, i); @@ -347,7 +347,7 @@ int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage) } else swap_storage = persistent_swap_storage; - swap_space = file_inode(swap_storage)->i_mapping; + swap_space = swap_storage->f_mapping; for (i = 0; i < ttm->num_pages; ++i) { from_page = ttm->pages[i]; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 8eed66af5b82..02a3845363f7 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -128,7 +128,7 @@ static ssize_t hfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; - struct inode *inode = file_inode(file)->i_mapping->host; + struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); ssize_t ret; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index ef9fefe364a6..19462d773fe2 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -126,7 +126,7 @@ static ssize_t hfsplus_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; - struct inode *inode = file_inode(file)->i_mapping->host; + struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); ssize_t ret; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8be0a2438a93..59efb6e46a5e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -729,7 +729,7 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc) struct page *page; for (;;) { - page = read_cache_page(file_inode(desc->file)->i_mapping, + page = read_cache_page(desc->file->f_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); if (IS_ERR(page) || grab_page(page)) break; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c034edf3ef38..ab8e56c6f2e1 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2426,7 +2426,7 @@ static int ocfs2_dio_end_io(struct kiocb *iocb, static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; - struct inode *inode = file_inode(file)->i_mapping->host; + struct inode *inode = file->f_mapping->host; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); get_block_t *get_block; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d26162e81fea..b322c85c58c3 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3287,7 +3287,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, address = address & huge_page_mask(h); pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - mapping = file_inode(vma->vm_file)->i_mapping; + mapping = vma->vm_file->f_mapping; /* * Take the mapping lock for the duration of the table walk. As From 90090ae645f6affba0d4c8413749df96dde0a526 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 May 2016 19:05:03 -0400 Subject: [PATCH 11/26] missed comment updates from ->direct_IO() prototype change Signed-off-by: Al Viro --- fs/9p/vfs_addr.c | 1 - fs/nfs/direct.c | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index c37fb9c08970..6181ad79e1a5 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -231,7 +231,6 @@ static int v9fs_launder_page(struct page *page) /** * v9fs_direct_IO - 9P address space operation for direct I/O * @iocb: target I/O control block - * @pos: offset in file to begin the operation * * The presence of v9fs_direct_IO() in the address space ops vector * allowes open() O_DIRECT flags which would have failed otherwise. diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 979b3c4dee6a..f0cbe028570f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -244,9 +244,7 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, /** * nfs_direct_IO - NFS address space operation for direct I/O * @iocb: target I/O control block - * @iov: array of vectors that define I/O buffer - * @pos: offset in file to begin the operation - * @nr_segs: size of iovec array + * @iter: I/O buffer * * The presence of this routine in the address space ops vector means * the NFS client supports direct I/O. However, for most direct IO, we From 6f3fc1070be028170b0f4c0c326480c9fcd1da3e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 14 May 2016 18:46:32 -0400 Subject: [PATCH 12/26] orangefs: constify inode_operations Signed-off-by: Al Viro --- fs/orangefs/inode.c | 2 +- fs/orangefs/namei.c | 2 +- fs/orangefs/orangefs-kernel.h | 6 +++--- fs/orangefs/symlink.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 85640e955cde..0f586bded7f4 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -294,7 +294,7 @@ int orangefs_permission(struct inode *inode, int mask) } /* ORANGEDS2 implementation of VFS inode operations for files */ -struct inode_operations orangefs_file_inode_operations = { +const struct inode_operations orangefs_file_inode_operations = { .get_acl = orangefs_get_acl, .set_acl = orangefs_set_acl, .setattr = orangefs_setattr, diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 930894ae7e1d..7e8dfa97c44a 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -438,7 +438,7 @@ static int orangefs_rename(struct inode *old_dir, } /* ORANGEFS implementation of VFS inode operations for directories */ -struct inode_operations orangefs_dir_inode_operations = { +const struct inode_operations orangefs_dir_inode_operations = { .lookup = orangefs_lookup, .get_acl = orangefs_get_acl, .set_acl = orangefs_set_acl, diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 2281882f718e..db78ddde0236 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -570,10 +570,10 @@ extern int hash_table_size; extern const struct address_space_operations orangefs_address_operations; extern struct backing_dev_info orangefs_backing_dev_info; -extern struct inode_operations orangefs_file_inode_operations; +extern const struct inode_operations orangefs_file_inode_operations; extern const struct file_operations orangefs_file_operations; -extern struct inode_operations orangefs_symlink_inode_operations; -extern struct inode_operations orangefs_dir_inode_operations; +extern const struct inode_operations orangefs_symlink_inode_operations; +extern const struct inode_operations orangefs_dir_inode_operations; extern const struct file_operations orangefs_dir_operations; extern const struct dentry_operations orangefs_dentry_operations; extern const struct file_operations orangefs_devreq_file_operations; diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c index 6418dd638680..8fecf823f5ba 100644 --- a/fs/orangefs/symlink.c +++ b/fs/orangefs/symlink.c @@ -8,7 +8,7 @@ #include "orangefs-kernel.h" #include "orangefs-bufmap.h" -struct inode_operations orangefs_symlink_inode_operations = { +const struct inode_operations orangefs_symlink_inode_operations = { .readlink = generic_readlink, .get_link = simple_get_link, .setattr = orangefs_setattr, From e0d508f1092db9a56cc6ed617f473a2704f7f683 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 May 2016 21:22:04 -0400 Subject: [PATCH 13/26] ufs: get rid of redundant checks ufs_check_page() makes sure there's no entries with zero ->reclen Signed-off-by: Al Viro --- fs/ufs/dir.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 57dcceda17d6..fa3bda1a860f 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -279,12 +279,6 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr, de = (struct ufs_dir_entry *) kaddr; kaddr += ufs_last_byte(dir, n) - reclen; while ((char *) de <= kaddr) { - if (de->d_reclen == 0) { - ufs_error(dir->i_sb, __func__, - "zero-length directory entry"); - ufs_put_page(page); - goto out; - } if (ufs_match(sb, namelen, name, de)) goto found; de = ufs_next_entry(sb, de); @@ -414,11 +408,8 @@ ufs_validate_entry(struct super_block *sb, char *base, { struct ufs_dir_entry *de = (struct ufs_dir_entry*)(base + offset); struct ufs_dir_entry *p = (struct ufs_dir_entry*)(base + (offset&mask)); - while ((char*)p < (char*)de) { - if (p->d_reclen == 0) - break; + while ((char*)p < (char*)de) p = ufs_next_entry(sb, p); - } return (char *)p - base; } @@ -469,12 +460,6 @@ ufs_readdir(struct file *file, struct dir_context *ctx) de = (struct ufs_dir_entry *)(kaddr+offset); limit = kaddr + ufs_last_byte(inode, n) - UFS_DIR_REC_LEN(1); for ( ;(char*)de <= limit; de = ufs_next_entry(sb, de)) { - if (de->d_reclen == 0) { - ufs_error(sb, __func__, - "zero-length directory entry"); - ufs_put_page(page); - return -EIO; - } if (de->d_ino) { unsigned char d_type = DT_UNKNOWN; From 84c60b1388249a0167d5fe8160f84e66a1221ba8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 May 2016 22:40:31 -0400 Subject: [PATCH 14/26] drop redundant ->owner initializations it's not needed for file_operations of inodes located on fs defined in the hosting module and for file_operations that go into procfs. Signed-off-by: Al Viro --- fs/cachefiles/proc.c | 1 - fs/cifs/cifs_debug.c | 7 ------- fs/coda/pioctl.c | 1 - fs/ext4/mballoc.c | 1 - fs/ext4/sysfs.c | 1 - fs/f2fs/super.c | 1 - fs/fscache/histogram.c | 1 - fs/fscache/object-list.c | 1 - fs/fscache/stats.c | 1 - fs/jfs/jfs_debug.c | 1 - fs/jfs/jfs_logmgr.c | 1 - fs/jfs/jfs_metapage.c | 1 - fs/jfs/jfs_txnmgr.c | 2 -- fs/jfs/jfs_xtree.c | 1 - fs/lockd/procfs.c | 1 - fs/nfs/client.c | 2 -- fs/nfsd/nfsctl.c | 3 --- fs/nfsd/stats.c | 1 - fs/pstore/inode.c | 1 - fs/xfs/xfs_stats.c | 1 - 20 files changed, 30 deletions(-) diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c index eccd33941199..125b90f6c796 100644 --- a/fs/cachefiles/proc.c +++ b/fs/cachefiles/proc.c @@ -93,7 +93,6 @@ static int cachefiles_histogram_open(struct inode *inode, struct file *file) } static const struct file_operations cachefiles_histogram_fops = { - .owner = THIS_MODULE, .open = cachefiles_histogram_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 788e19195991..6c58e13fed2f 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -244,7 +244,6 @@ static int cifs_debug_data_proc_open(struct inode *inode, struct file *file) } static const struct file_operations cifs_debug_data_proc_fops = { - .owner = THIS_MODULE, .open = cifs_debug_data_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -361,7 +360,6 @@ static int cifs_stats_proc_open(struct inode *inode, struct file *file) } static const struct file_operations cifs_stats_proc_fops = { - .owner = THIS_MODULE, .open = cifs_stats_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -447,7 +445,6 @@ static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer, } static const struct file_operations cifsFYI_proc_fops = { - .owner = THIS_MODULE, .open = cifsFYI_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -479,7 +476,6 @@ static ssize_t cifs_linux_ext_proc_write(struct file *file, } static const struct file_operations cifs_linux_ext_proc_fops = { - .owner = THIS_MODULE, .open = cifs_linux_ext_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -511,7 +507,6 @@ static ssize_t cifs_lookup_cache_proc_write(struct file *file, } static const struct file_operations cifs_lookup_cache_proc_fops = { - .owner = THIS_MODULE, .open = cifs_lookup_cache_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -543,7 +538,6 @@ static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer, } static const struct file_operations traceSMB_proc_fops = { - .owner = THIS_MODULE, .open = traceSMB_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -655,7 +649,6 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, } static const struct file_operations cifs_security_flags_proc_fops = { - .owner = THIS_MODULE, .open = cifs_security_flags_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index f36a4040afb8..b0b9cda41928 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -35,7 +35,6 @@ const struct inode_operations coda_ioctl_inode_operations = { }; const struct file_operations coda_ioctl_operations = { - .owner = THIS_MODULE, .unlocked_ioctl = coda_pioctl, .llseek = noop_llseek, }; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c1ab3ec30423..57c9bf5356d1 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2350,7 +2350,6 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) } const struct file_operations ext4_seq_mb_groups_fops = { - .owner = THIS_MODULE, .open = ext4_mb_seq_groups_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 1420a3c614af..73bcfd41f5f2 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -359,7 +359,6 @@ static int name##_open(struct inode *inode, struct file *file) \ } \ \ static const struct file_operations ext4_seq_##name##_fops = { \ - .owner = THIS_MODULE, \ .open = name##_open, \ .read = seq_read, \ .llseek = seq_lseek, \ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 74cc8520b8b1..da58b9671d1e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -866,7 +866,6 @@ static int _name##_open_fs(struct inode *inode, struct file *file) \ } \ \ static const struct file_operations f2fs_seq_##_name##_fops = { \ - .owner = THIS_MODULE, \ .open = _name##_open_fs, \ .read = seq_read, \ .llseek = seq_lseek, \ diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c index 7d637e2335fd..15a3d042247e 100644 --- a/fs/fscache/histogram.c +++ b/fs/fscache/histogram.c @@ -99,7 +99,6 @@ static int fscache_histogram_open(struct inode *inode, struct file *file) } const struct file_operations fscache_histogram_fops = { - .owner = THIS_MODULE, .open = fscache_histogram_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index 6b028b7c4250..5d5ddaa84b21 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -404,7 +404,6 @@ static int fscache_objlist_release(struct inode *inode, struct file *file) } const struct file_operations fscache_objlist_fops = { - .owner = THIS_MODULE, .open = fscache_objlist_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 7cfa0aacdf6d..7ac6e839b065 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -295,7 +295,6 @@ static int fscache_stats_open(struct inode *inode, struct file *file) } const struct file_operations fscache_stats_fops = { - .owner = THIS_MODULE, .open = fscache_stats_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index dd824d9b0b1a..a37eb5f8cbc0 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c @@ -58,7 +58,6 @@ static ssize_t jfs_loglevel_proc_write(struct file *file, } static const struct file_operations jfs_loglevel_proc_fops = { - .owner = THIS_MODULE, .open = jfs_loglevel_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 63759d723920..45416f6611cf 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2515,7 +2515,6 @@ static int jfs_lmstats_proc_open(struct inode *inode, struct file *file) } const struct file_operations jfs_lmstats_proc_fops = { - .owner = THIS_MODULE, .open = jfs_lmstats_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index b60e015cc757..ba7734ae9d4c 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -828,7 +828,6 @@ static int jfs_mpstat_proc_open(struct inode *inode, struct file *file) } const struct file_operations jfs_mpstat_proc_fops = { - .owner = THIS_MODULE, .open = jfs_mpstat_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index eddf2b6eda85..2e58978d6f45 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -3040,7 +3040,6 @@ static int jfs_txanchor_proc_open(struct inode *inode, struct file *file) } const struct file_operations jfs_txanchor_proc_fops = { - .owner = THIS_MODULE, .open = jfs_txanchor_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -3081,7 +3080,6 @@ static int jfs_txstats_proc_open(struct inode *inode, struct file *file) } const struct file_operations jfs_txstats_proc_fops = { - .owner = THIS_MODULE, .open = jfs_txstats_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 5ad7748860ce..5cde6d2fcfca 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -3894,7 +3894,6 @@ static int jfs_xtstat_proc_open(struct inode *inode, struct file *file) } const struct file_operations jfs_xtstat_proc_fops = { - .owner = THIS_MODULE, .open = jfs_xtstat_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/lockd/procfs.c b/fs/lockd/procfs.c index 2a0a98480e39..8f72cb237ef3 100644 --- a/fs/lockd/procfs.c +++ b/fs/lockd/procfs.c @@ -64,7 +64,6 @@ static const struct file_operations lockd_end_grace_operations = { .read = nlm_end_grace_read, .llseek = default_llseek, .release = simple_transaction_release, - .owner = THIS_MODULE, }; int __init diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 0c96528db94a..487c5607d52f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1102,7 +1102,6 @@ static const struct file_operations nfs_server_list_fops = { .read = seq_read, .llseek = seq_lseek, .release = seq_release_net, - .owner = THIS_MODULE, }; static int nfs_volume_list_open(struct inode *inode, struct file *file); @@ -1123,7 +1122,6 @@ static const struct file_operations nfs_volume_list_fops = { .read = seq_read, .llseek = seq_lseek, .release = seq_release_net, - .owner = THIS_MODULE, }; /* diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 9690cb4dd588..e7787777620e 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -158,7 +158,6 @@ static const struct file_operations exports_proc_operations = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, - .owner = THIS_MODULE, }; static int exports_nfsd_open(struct inode *inode, struct file *file) @@ -171,7 +170,6 @@ static const struct file_operations exports_nfsd_operations = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, - .owner = THIS_MODULE, }; static int export_features_show(struct seq_file *m, void *v) @@ -217,7 +215,6 @@ static const struct file_operations pool_stats_operations = { .read = seq_read, .llseek = seq_lseek, .release = nfsd_pool_stats_release, - .owner = THIS_MODULE, }; static struct file_operations reply_cache_stats_operations = { diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index cd90878a76aa..d97338bb6a39 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -84,7 +84,6 @@ static int nfsd_proc_open(struct inode *inode, struct file *file) } static const struct file_operations nfsd_proc_fops = { - .owner = THIS_MODULE, .open = nfsd_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 45d6110744cb..ec9ddef5ae75 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -178,7 +178,6 @@ static loff_t pstore_file_llseek(struct file *file, loff_t off, int whence) } static const struct file_operations pstore_file_operations = { - .owner = THIS_MODULE, .open = pstore_file_open, .read = pstore_file_read, .llseek = pstore_file_llseek, diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index 8686df6c7609..d266e835ecc3 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -128,7 +128,6 @@ static int xqm_proc_open(struct inode *inode, struct file *file) } static const struct file_operations xqm_proc_fops = { - .owner = THIS_MODULE, .open = xqm_proc_open, .read = seq_read, .llseek = seq_lseek, From ea7d4c046ba6e2c6135c98721f4f09efd1adaabc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 May 2016 19:14:03 -0400 Subject: [PATCH 15/26] binfmt_misc: ->s_root is not going anywhere ... no need to dget/dput it. Signed-off-by: Al Viro --- fs/binfmt_misc.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 3a3ced779fc7..5417516f6e59 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -637,13 +637,12 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer, break; case 3: /* Delete this handler. */ - root = dget(file->f_path.dentry->d_sb->s_root); + root = file_inode(file)->i_sb->s_root; inode_lock(d_inode(root)); kill_node(e); inode_unlock(d_inode(root)); - dput(root); break; default: return res; @@ -665,8 +664,8 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, { Node *e; struct inode *inode; - struct dentry *root, *dentry; - struct super_block *sb = file->f_path.dentry->d_sb; + struct super_block *sb = file_inode(file)->i_sb; + struct dentry *root = sb->s_root, *dentry; int err = 0; e = create_entry(buffer, count); @@ -674,7 +673,6 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, if (IS_ERR(e)) return PTR_ERR(e); - root = dget(sb->s_root); inode_lock(d_inode(root)); dentry = lookup_one_len(e->name, root, strlen(e->name)); err = PTR_ERR(dentry); @@ -712,7 +710,6 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, dput(dentry); out: inode_unlock(d_inode(root)); - dput(root); if (err) { kfree(e); @@ -753,14 +750,13 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer, break; case 3: /* Delete all handlers. */ - root = dget(file->f_path.dentry->d_sb->s_root); + root = file_inode(file)->i_sb->s_root; inode_lock(d_inode(root)); while (!list_empty(&entries)) kill_node(list_entry(entries.next, Node, list)); inode_unlock(d_inode(root)); - dput(root); break; default: return res; From 550dce01dd606c88a837138aa448ccd367fb0cbb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 May 2016 20:13:30 -0400 Subject: [PATCH 16/26] unify dentry_iput() and dentry_unlink_inode() There is a lot of duplication between dentry_unlink_inode() and dentry_iput(). The only real difference is that dentry_unlink_inode() bumps ->d_seq and dentry_iput() doesn't. The argument of the latter is known to have been unhashed, so anybody who might've found it in RCU lookup would already be doomed to a ->d_seq mismatch. And we want to avoid pointless smp_rmb() there. This patch makes dentry_unlink_inode() bump ->d_seq only for hashed dentries. It's safe (d_delete() calls that sucker only if we are holding the only reference to dentry, so rehash is not going to happen) and it allows to use dentry_unlink_inode() in __dentry_kill() and get rid of dentry_iput(). The interesting question here is profiling; it *is* a hot path, and extra conditional jumps in there might or might not be painful. Signed-off-by: Al Viro --- fs/dcache.c | 45 ++++++++++----------------------------------- 1 file changed, 10 insertions(+), 35 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index f9c63c108881..fe7cde2f2fe5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -335,44 +335,21 @@ static inline void dentry_rcuwalk_invalidate(struct dentry *dentry) /* * Release the dentry's inode, using the filesystem - * d_iput() operation if defined. Dentry has no refcount - * and is unhashed. - */ -static void dentry_iput(struct dentry * dentry) - __releases(dentry->d_lock) - __releases(dentry->d_inode->i_lock) -{ - struct inode *inode = dentry->d_inode; - if (inode) { - __d_clear_type_and_inode(dentry); - hlist_del_init(&dentry->d_u.d_alias); - spin_unlock(&dentry->d_lock); - spin_unlock(&inode->i_lock); - if (!inode->i_nlink) - fsnotify_inoderemove(inode); - if (dentry->d_op && dentry->d_op->d_iput) - dentry->d_op->d_iput(dentry, inode); - else - iput(inode); - } else { - spin_unlock(&dentry->d_lock); - } -} - -/* - * Release the dentry's inode, using the filesystem - * d_iput() operation if defined. dentry remains in-use. + * d_iput() operation if defined. */ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_lock) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; + bool hashed = !d_unhashed(dentry); - raw_write_seqcount_begin(&dentry->d_seq); + if (hashed) + raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); - raw_write_seqcount_end(&dentry->d_seq); + if (hashed) + raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -540,12 +517,10 @@ static void __dentry_kill(struct dentry *dentry) dentry->d_flags |= DCACHE_DENTRY_KILLED; if (parent) spin_unlock(&parent->d_lock); - dentry_iput(dentry); - /* - * dentry_iput drops the locks, at which point nobody (except - * transient RCU lookups) can reach this dentry. - */ - BUG_ON(dentry->d_lockref.count > 0); + if (dentry->d_inode) + dentry_unlink_inode(dentry); + else + spin_unlock(&dentry->d_lock); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); From 2d902671ce1cd98cdc88d78c481889a1b2996101 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 30 Jun 2016 08:53:27 +0200 Subject: [PATCH 17/26] vfs: merge .d_select_inode() into .d_real() The two methods essentially do the same: find the real dentry/inode belonging to an overlay dentry. The difference is in the usage: vfs_open() uses ->d_select_inode() and expects the function to perform copy-up if necessary based on the open flags argument. file_dentry() uses ->d_real() passing in the overlay dentry as well as the underlying inode. vfs_rename() uses ->d_select_inode() but passes zero flags. ->d_real() with a zero inode would have worked just as well here. This patch merges the functionality of ->d_select_inode() into ->d_real() by adding an 'open_flags' argument to the latter. [Al Viro] Make the signature of d_real() match that of ->d_real() again. And constify the inode argument, while we are at it. Signed-off-by: Miklos Szeredi --- fs/dcache.c | 3 --- fs/namei.c | 2 +- fs/open.c | 8 ++++---- fs/overlayfs/inode.c | 31 ++++++++++--------------------- fs/overlayfs/overlayfs.h | 2 +- fs/overlayfs/super.c | 20 ++++++++++++++------ include/linux/dcache.h | 28 ++++++++-------------------- include/linux/fs.h | 7 +------ 8 files changed, 39 insertions(+), 62 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index d6847d7b123d..5405b89fe8ec 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1729,7 +1729,6 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_DELETE | - DCACHE_OP_SELECT_INODE | DCACHE_OP_REAL)); dentry->d_op = op; if (!op) @@ -1746,8 +1745,6 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) dentry->d_flags |= DCACHE_OP_DELETE; if (op->d_prune) dentry->d_flags |= DCACHE_OP_PRUNE; - if (op->d_select_inode) - dentry->d_flags |= DCACHE_OP_SELECT_INODE; if (op->d_real) dentry->d_flags |= DCACHE_OP_REAL; diff --git a/fs/namei.c b/fs/namei.c index 70580ab1445c..bb7a2e0b959c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4328,7 +4328,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, * Check source == target. * On overlayfs need to look at underlying inodes. */ - if (vfs_select_inode(old_dentry, 0) == vfs_select_inode(new_dentry, 0)) + if (d_real_inode(old_dentry) == d_real_inode(new_dentry)) return 0; error = may_delete(old_dir, old_dentry, is_dir); diff --git a/fs/open.c b/fs/open.c index 93ae3cdee4ab..bf66cf1a9f5c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -840,13 +840,13 @@ EXPORT_SYMBOL(file_path); int vfs_open(const struct path *path, struct file *file, const struct cred *cred) { - struct inode *inode = vfs_select_inode(path->dentry, file->f_flags); + struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags); - if (IS_ERR(inode)) - return PTR_ERR(inode); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); file->f_path = *path; - return do_dentry_open(file, inode, NULL, cred); + return do_dentry_open(file, d_backing_inode(dentry), NULL, cred); } struct file *dentry_open(const struct path *path, int flags, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 1dbeab6cf96e..e08cd94d7b26 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -325,36 +325,25 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, return true; } -struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags) +int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags) { - int err; + int err = 0; struct path realpath; enum ovl_path_type type; - if (d_is_dir(dentry)) - return d_backing_inode(dentry); - type = ovl_path_real(dentry, &realpath); if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) { err = ovl_want_write(dentry); - if (err) - return ERR_PTR(err); - - if (file_flags & O_TRUNC) - err = ovl_copy_up_truncate(dentry); - else - err = ovl_copy_up(dentry); - ovl_drop_write(dentry); - if (err) - return ERR_PTR(err); - - ovl_path_upper(dentry, &realpath); + if (!err) { + if (file_flags & O_TRUNC) + err = ovl_copy_up_truncate(dentry); + else + err = ovl_copy_up(dentry); + ovl_drop_write(dentry); + } } - if (realpath.dentry->d_flags & DCACHE_OP_SELECT_INODE) - return realpath.dentry->d_op->d_select_inode(realpath.dentry, file_flags); - - return d_backing_inode(realpath.dentry); + return err; } static const struct inode_operations ovl_file_inode_operations = { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 4bd9b5ba8f42..6b9fd25c5ad4 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -179,7 +179,7 @@ ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); int ovl_removexattr(struct dentry *dentry, const char *name); -struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags); +int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, struct ovl_entry *oe); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index ce02f46029da..035c176edf00 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -304,7 +304,9 @@ static void ovl_dentry_release(struct dentry *dentry) } } -static struct dentry *ovl_d_real(struct dentry *dentry, struct inode *inode) +static struct dentry *ovl_d_real(struct dentry *dentry, + const struct inode *inode, + unsigned int open_flags) { struct dentry *real; @@ -314,6 +316,16 @@ static struct dentry *ovl_d_real(struct dentry *dentry, struct inode *inode) goto bug; } + if (d_is_negative(dentry)) + return dentry; + + if (open_flags) { + int err = ovl_open_maybe_copy_up(dentry, open_flags); + + if (err) + return ERR_PTR(err); + } + real = ovl_dentry_upper(dentry); if (real && (!inode || inode == d_inode(real))) return real; @@ -326,9 +338,7 @@ static struct dentry *ovl_d_real(struct dentry *dentry, struct inode *inode) return real; /* Handle recursion */ - if (real->d_flags & DCACHE_OP_REAL) - return real->d_op->d_real(real, inode); - + return d_real(real, inode, open_flags); bug: WARN(1, "ovl_d_real(%pd4, %s:%lu\n): real dentry not found\n", dentry, inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); @@ -378,13 +388,11 @@ static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) static const struct dentry_operations ovl_dentry_operations = { .d_release = ovl_dentry_release, - .d_select_inode = ovl_d_select_inode, .d_real = ovl_d_real, }; static const struct dentry_operations ovl_reval_dentry_operations = { .d_release = ovl_dentry_release, - .d_select_inode = ovl_d_select_inode, .d_real = ovl_d_real, .d_revalidate = ovl_dentry_revalidate, .d_weak_revalidate = ovl_dentry_weak_revalidate, diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f53fa055021a..45b22de15ede 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -139,8 +139,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); - struct inode *(*d_select_inode)(struct dentry *, unsigned); - struct dentry *(*d_real)(struct dentry *, struct inode *); + struct dentry *(*d_real)(struct dentry *, const struct inode *, unsigned int); } ____cacheline_aligned; /* @@ -206,10 +205,8 @@ struct dentry_operations { #define DCACHE_MAY_FREE 0x00800000 #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ -#define DCACHE_OP_SELECT_INODE 0x02000000 /* Unioned entry: dcache op selects inode */ - -#define DCACHE_ENCRYPTED_WITH_KEY 0x04000000 /* dir is encrypted with a valid key */ -#define DCACHE_OP_REAL 0x08000000 +#define DCACHE_ENCRYPTED_WITH_KEY 0x02000000 /* dir is encrypted with a valid key */ +#define DCACHE_OP_REAL 0x04000000 #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ #define DCACHE_DENTRY_CURSOR 0x20000000 @@ -557,25 +554,16 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) return upper; } -static inline struct dentry *d_real(struct dentry *dentry) +static inline struct dentry *d_real(struct dentry *dentry, + const struct inode *inode, + unsigned int flags) { if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) - return dentry->d_op->d_real(dentry, NULL); + return dentry->d_op->d_real(dentry, inode, flags); else return dentry; } -static inline struct inode *vfs_select_inode(struct dentry *dentry, - unsigned open_flags) -{ - struct inode *inode = d_inode(dentry); - - if (inode && unlikely(dentry->d_flags & DCACHE_OP_SELECT_INODE)) - inode = dentry->d_op->d_select_inode(dentry, open_flags); - - return inode; -} - /** * d_real_inode - Return the real inode * @dentry: The dentry to query @@ -585,7 +573,7 @@ static inline struct inode *vfs_select_inode(struct dentry *dentry, */ static inline struct inode *d_real_inode(struct dentry *dentry) { - return d_backing_inode(d_real(dentry)); + return d_backing_inode(d_real(dentry, NULL, 0)); } diff --git a/include/linux/fs.h b/include/linux/fs.h index dd288148a6b1..bacc0733663c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1272,12 +1272,7 @@ static inline struct inode *file_inode(const struct file *f) static inline struct dentry *file_dentry(const struct file *file) { - struct dentry *dentry = file->f_path.dentry; - - if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) - return dentry->d_op->d_real(dentry, file_inode(file)); - else - return dentry; + return d_real(file->f_path.dentry, file_inode(file), 0); } static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) From e698b8a43659f9ece192fcab215abcadac8f88d7 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 30 Jun 2016 08:53:27 +0200 Subject: [PATCH 18/26] vfs: document ->d_real() Add missing documentation for the d_op->d_real() method and d_real() helper. Signed-off-by: Miklos Szeredi --- Documentation/filesystems/Locking | 3 +++ Documentation/filesystems/vfs.txt | 20 ++++++++++++++++++++ include/linux/dcache.h | 14 +++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 75eea7ce3d7c..898d3cecd8a6 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -20,6 +20,8 @@ prototypes: char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); struct vfsmount *(*d_automount)(struct path *path); int (*d_manage)(struct dentry *, bool); + struct dentry *(*d_real)(struct dentry *, const struct inode *, + unsigned int); locking rules: rename_lock ->d_lock may block rcu-walk @@ -34,6 +36,7 @@ d_iput: no no yes no d_dname: no no no no d_automount: no no yes no d_manage: no no yes (ref-walk) maybe +d_real no no yes no --------------------------- inode_operations --------------------------- prototypes: diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index c61a223ef3ff..1c934dd2c47d 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -938,6 +938,8 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); + struct dentry *(*d_real)(struct dentry *, const struct inode *, + unsigned int); }; d_revalidate: called when the VFS needs to revalidate a dentry. This @@ -1060,6 +1062,24 @@ struct dentry_operations { This function is only used if DCACHE_MANAGE_TRANSIT is set on the dentry being transited from. + d_real: overlay/union type filesystems implement this method to return one of + the underlying dentries hidden by the overlay. It is used in three + different modes: + + Called from open it may need to copy-up the file depending on the + supplied open flags. This mode is selected with a non-zero flags + argument. In this mode the d_real method can return an error. + + Called from file_dentry() it returns the real dentry matching the inode + argument. The real dentry may be from a lower layer already copied up, + but still referenced from the file. This mode is selected with a + non-NULL inode argument. This will always succeed. + + With NULL inode and zero flags the topmost real underlying dentry is + returned. This will always succeed. + + This method is never called with both non-NULL inode and non-zero flags. + Example : static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 45b22de15ede..14df83609c7f 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -139,7 +139,8 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); - struct dentry *(*d_real)(struct dentry *, const struct inode *, unsigned int); + struct dentry *(*d_real)(struct dentry *, const struct inode *, + unsigned int); } ____cacheline_aligned; /* @@ -554,6 +555,17 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) return upper; } +/** + * d_real - Return the real dentry + * @dentry: the dentry to query + * @inode: inode to select the dentry from multiple layers (can be NULL) + * @flags: open flags to control copy-up behavior + * + * If dentry is on an union/overlay, then return the underlying, real dentry. + * Otherwise return the dentry itself. + * + * See also: Documentation/filesystems/vfs.txt + */ static inline struct dentry *d_real(struct dentry *dentry, const struct inode *inode, unsigned int flags) From 0cac643c102c0632dc2cc81e2490b0fec1cac0af Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 30 Jun 2016 08:53:28 +0200 Subject: [PATCH 19/26] vfs: clean up documentation Signed-off-by: Miklos Szeredi --- Documentation/filesystems/Locking | 2 -- Documentation/filesystems/vfs.txt | 24 ++++++++---------------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 898d3cecd8a6..d4def7b78a62 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -69,7 +69,6 @@ prototypes: struct file *, unsigned open_flag, umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); - int (*dentry_open)(struct dentry *, struct file *, const struct cred *); locking rules: all may block @@ -98,7 +97,6 @@ fiemap: no update_time: no atomic_open: yes tmpfile: no -dentry_open: no Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 1c934dd2c47d..01c4f1741c20 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -364,7 +364,6 @@ struct inode_operations { int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); - int (*dentry_open)(struct dentry *, struct file *, const struct cred *); }; Again, all methods are called without any locks being held, unless @@ -696,13 +695,6 @@ struct address_space_operations { but instead uses bmap to find out where the blocks in the file are and uses those addresses directly. - dentry_open: *WARNING: probably going away soon, do not use!* This is an - alternative to f_op->open(), the difference is that this method may open - a file not necessarily originating from the same filesystem as the one - i_op->open() was called on. It may be useful for stacking filesystems - which want to allow native I/O directly on underlying files. - - invalidatepage: If a page has PagePrivate set, then invalidatepage will be called when part or all of the page is to be removed from the address space. This generally corresponds to either a @@ -1024,6 +1016,14 @@ struct dentry_operations { at the end of the buffer, and returns a pointer to the first char. dynamic_dname() helper function is provided to take care of this. + Example : + + static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen) + { + return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", + dentry->d_inode->i_ino); + } + d_automount: called when an automount dentry is to be traversed (optional). This should create a new VFS mount record and return the record to the caller. The caller is supplied with a path parameter giving the @@ -1080,14 +1080,6 @@ struct dentry_operations { This method is never called with both non-NULL inode and non-zero flags. -Example : - -static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen) -{ - return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", - dentry->d_inode->i_ino); -} - Each dentry has a pointer to its parent dentry, as well as a hash list of child dentries. Child dentries are basically like files in a directory. From ae0a843c740b4e63684eefae96097cf62d9b7a14 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Sat, 26 Mar 2016 09:12:10 +0000 Subject: [PATCH 20/26] dentry_cmp(): use lockless_dereference() instead of smp_read_barrier_depends() lockless_dereference() was added which can be used in place of hard-coding smp_read_barrier_depends(). Signed-off-by: He Kuang Signed-off-by: Al Viro --- fs/dcache.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index e5c8ba76d426..dc37c0238b46 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -226,10 +226,9 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) { - const unsigned char *cs; /* * Be careful about RCU walk racing with rename: - * use ACCESS_ONCE to fetch the name pointer. + * use 'lockless_dereference' to fetch the name pointer. * * NOTE! Even if a rename will mean that the length * was not loaded atomically, we don't care. The @@ -243,8 +242,8 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c * early because the data cannot match (there can * be no NUL in the ct/tcount data) */ - cs = ACCESS_ONCE(dentry->d_name.name); - smp_read_barrier_depends(); + const unsigned char *cs = lockless_dereference(dentry->d_name.name); + return dentry_string_cmp(cs, ct, tcount); } From d4c91a8f7e5514a1e9cd37b453fda0dedfa8045d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 25 Jun 2016 23:33:49 -0400 Subject: [PATCH 21/26] new helper: d_same_name() Signed-off-by: Al Viro --- fs/dcache.c | 127 +++++++++++++++------------------------------------- 1 file changed, 36 insertions(+), 91 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index dc37c0238b46..040c2586d483 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2066,42 +2066,19 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, } EXPORT_SYMBOL(d_add_ci); -/* - * Do the slow-case of the dentry name compare. - * - * Unlike the dentry_cmp() function, we need to atomically - * load the name and length information, so that the - * filesystem can rely on them, and can use the 'name' and - * 'len' information without worrying about walking off the - * end of memory etc. - * - * Thus the read_seqcount_retry() and the "duplicate" info - * in arguments (the low-level filesystem should not look - * at the dentry inode or name contents directly, since - * rename can change them while we're in RCU mode). - */ -enum slow_d_compare { - D_COMP_OK, - D_COMP_NOMATCH, - D_COMP_SEQRETRY, -}; -static noinline enum slow_d_compare slow_dentry_cmp( - const struct dentry *parent, - struct dentry *dentry, - unsigned int seq, - const struct qstr *name) +static inline bool d_same_name(const struct dentry *dentry, + const struct dentry *parent, + const struct qstr *name) { - int tlen = dentry->d_name.len; - const char *tname = dentry->d_name.name; - - if (read_seqcount_retry(&dentry->d_seq, seq)) { - cpu_relax(); - return D_COMP_SEQRETRY; + if (likely(!(parent->d_flags & DCACHE_OP_COMPARE))) { + if (dentry->d_name.len != name->len) + return false; + return dentry_cmp(dentry, name->name, name->len) == 0; } - if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) - return D_COMP_NOMATCH; - return D_COMP_OK; + return parent->d_op->d_compare(parent, dentry, + dentry->d_name.len, dentry->d_name.name, + name) == 0; } /** @@ -2180,6 +2157,9 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, * dentry compare, we will do seqretries until it is stable, * and if we end up with a successful lookup, we actually * want to exit RCU lookup anyway. + * + * Note that raw_seqcount_begin still *does* smp_rmb(), so + * we are still guaranteed NUL-termination of ->d_name.name. */ seq = raw_seqcount_begin(&dentry->d_seq); if (dentry->d_parent != parent) @@ -2188,24 +2168,28 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, continue; if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { + int tlen; + const char *tname; if (dentry->d_name.hash != hashlen_hash(hashlen)) continue; - *seqp = seq; - switch (slow_dentry_cmp(parent, dentry, seq, name)) { - case D_COMP_OK: - return dentry; - case D_COMP_NOMATCH: - continue; - default: + tlen = dentry->d_name.len; + tname = dentry->d_name.name; + /* we want a consistent (name,len) pair */ + if (read_seqcount_retry(&dentry->d_seq, seq)) { + cpu_relax(); goto seqretry; } + if (parent->d_op->d_compare(parent, dentry, + tlen, tname, name) != 0) + continue; + } else { + if (dentry->d_name.hash_len != hashlen) + continue; + if (dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0) + continue; } - - if (dentry->d_name.hash_len != hashlen) - continue; *seqp = seq; - if (!dentry_cmp(dentry, str, hashlen_len(hashlen))) - return dentry; + return dentry; } return NULL; } @@ -2253,9 +2237,7 @@ EXPORT_SYMBOL(d_lookup); */ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) { - unsigned int len = name->len; unsigned int hash = name->hash; - const unsigned char *str = name->name; struct hlist_bl_head *b = d_hash(parent, hash); struct hlist_bl_node *node; struct dentry *found = NULL; @@ -2294,21 +2276,8 @@ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) if (d_unhashed(dentry)) goto next; - /* - * It is safe to compare names since d_move() cannot - * change the qstr (protected by d_lock). - */ - if (parent->d_flags & DCACHE_OP_COMPARE) { - int tlen = dentry->d_name.len; - const char *tname = dentry->d_name.name; - if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) - goto next; - } else { - if (dentry->d_name.len != len) - goto next; - if (dentry_cmp(dentry, str, len)) - goto next; - } + if (!d_same_name(dentry, parent, name)) + goto next; dentry->d_lockref.count++; found = dentry; @@ -2461,9 +2430,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent, const struct qstr *name, wait_queue_head_t *wq) { - unsigned int len = name->len; unsigned int hash = name->hash; - const unsigned char *str = name->name; struct hlist_bl_head *b = in_lookup_hash(parent, hash); struct hlist_bl_node *node; struct dentry *new = d_alloc(parent, name); @@ -2514,17 +2481,8 @@ struct dentry *d_alloc_parallel(struct dentry *parent, continue; if (dentry->d_parent != parent) continue; - if (parent->d_flags & DCACHE_OP_COMPARE) { - int tlen = dentry->d_name.len; - const char *tname = dentry->d_name.name; - if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) - continue; - } else { - if (dentry->d_name.len != len) - continue; - if (dentry_cmp(dentry, str, len)) - continue; - } + if (!d_same_name(dentry, parent, name)) + continue; hlist_bl_unlock(b); /* now we can try to grab a reference */ if (!lockref_get_not_dead(&dentry->d_lockref)) { @@ -2551,17 +2509,8 @@ struct dentry *d_alloc_parallel(struct dentry *parent, goto mismatch; if (unlikely(d_unhashed(dentry))) goto mismatch; - if (parent->d_flags & DCACHE_OP_COMPARE) { - int tlen = dentry->d_name.len; - const char *tname = dentry->d_name.name; - if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) - goto mismatch; - } else { - if (unlikely(dentry->d_name.len != len)) - goto mismatch; - if (unlikely(dentry_cmp(dentry, str, len))) - goto mismatch; - } + if (unlikely(!d_same_name(dentry, parent, name))) + goto mismatch; /* OK, it *is* a hashed match; return it */ spin_unlock(&dentry->d_lock); dput(new); @@ -2657,8 +2606,6 @@ EXPORT_SYMBOL(d_add); struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode) { struct dentry *alias; - int len = entry->d_name.len; - const char *name = entry->d_name.name; unsigned int hash = entry->d_name.hash; spin_lock(&inode->i_lock); @@ -2672,9 +2619,7 @@ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode) continue; if (alias->d_parent != entry->d_parent) continue; - if (alias->d_name.len != len) - continue; - if (dentry_cmp(alias, name, len)) + if (!d_same_name(alias, entry->d_parent, &entry->d_name)) continue; spin_lock(&alias->d_lock); if (!d_unhashed(alias)) { From f4e6d844bdc142322905d137a9e44e07eee43c5c Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sun, 6 Mar 2016 23:27:26 -0500 Subject: [PATCH 22/26] Remove last traces of ->sync_page Commit 7eaceaccab5f removed ->sync_page, but a few mentions of it still existed in documentation and comments, Signed-off-by: Matthew Wilcox Signed-off-by: Al Viro --- Documentation/block/biodoc.txt | 3 +-- Documentation/filesystems/Locking | 11 ++--------- Documentation/filesystems/vfs.txt | 8 +++----- fs/isofs/compress.c | 1 - fs/ntfs/inode.c | 2 +- 5 files changed, 7 insertions(+), 18 deletions(-) diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 5be8a7f4cc7f..026d13362aca 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -1024,8 +1024,7 @@ could be on demand. For example wait_on_buffer sets the unplugging going through sync_buffer() running blk_run_address_space(mapping). Or the caller can do it explicity through blk_unplug(bdev). So in the read case, the queue gets explicitly unplugged as part of waiting for completion on that -buffer. For page driven IO, the address space ->sync_page() takes care of -doing the blk_run_address_space(). +buffer. Aside: This is kind of controversial territory, as it's not clear if plugging is diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 75eea7ce3d7c..08086dc160d3 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -179,7 +179,6 @@ unlocks and drops the reference. prototypes: int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); - int (*sync_page)(struct page *); int (*writepages)(struct address_space *, struct writeback_control *); int (*set_page_dirty)(struct page *page); int (*readpages)(struct file *filp, struct address_space *mapping, @@ -208,7 +207,6 @@ locking rules: PageLocked(page) i_mutex writepage: yes, unlocks (see below) readpage: yes, unlocks -sync_page: maybe writepages: set_page_dirty no readpages: @@ -226,8 +224,8 @@ error_remove_page: yes swap_activate: no swap_deactivate: no - ->write_begin(), ->write_end(), ->sync_page() and ->readpage() -may be called from the request handler (/dev/loop). + ->write_begin(), ->write_end() and ->readpage() may be called from +the request handler (/dev/loop). ->readpage() unlocks the page, either synchronously or via I/O completion. @@ -283,11 +281,6 @@ will leave the page itself marked clean but it will be tagged as dirty in the radix tree. This incoherency can lead to all sorts of hard-to-debug problems in the filesystem like having dirty inodes at umount and losing written data. - ->sync_page() locking rules are not well-defined - usually it is called -with lock on page, but that is not guaranteed. Considering the currently -existing instances of this method ->sync_page() itself doesn't look -well-defined... - ->writepages() is used for periodic writeback and for syscall-initiated sync operations. The address_space should start I/O against at least *nr_to_write pages. *nr_to_write must be decremented for each page which is diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index c61a223ef3ff..d4e07c00e18e 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -534,9 +534,7 @@ __sync_single_inode) to check if ->writepages has been successful in writing out the whole address_space. The Writeback tag is used by filemap*wait* and sync_page* functions, -via filemap_fdatawait_range, to wait for all writeback to -complete. While waiting ->sync_page (if defined) will be called on -each page that is found to require writeback. +via filemap_fdatawait_range, to wait for all writeback to complete. An address_space handler may attach extra information to a page, typically using the 'private' field in the 'struct page'. If such @@ -554,8 +552,8 @@ address_space has finer control of write sizes. The read process essentially only requires 'readpage'. The write process is more complicated and uses write_begin/write_end or -set_page_dirty to write data into the address_space, and writepage, -sync_page, and writepages to writeback data to storage. +set_page_dirty to write data into the address_space, and writepage +and writepages to writeback data to storage. Adding and removing pages to/from an address_space is protected by the inode's i_mutex. diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 2e4e834d1a98..22dce16a8db9 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -361,7 +361,6 @@ static int zisofs_readpage(struct file *file, struct page *page) const struct address_space_operations zisofs_aops = { .readpage = zisofs_readpage, - /* No sync_page operation supported? */ /* No bmap operation supported */ }; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index f40972d6df90..e01287c964a8 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1854,7 +1854,7 @@ int ntfs_read_inode_mount(struct inode *vi) /* Need this to sanity check attribute list references to $MFT. */ vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number); - /* Provides readpage() and sync_page() for map_mft_record(). */ + /* Provides readpage() for map_mft_record(). */ vi->i_mapping->a_ops = &ntfs_mst_aops; ctx = ntfs_attr_get_search_ctx(ni, m); From a4a4f9439c73b921ef246368fc6f2d7c0281e99d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Jul 2016 13:16:52 -0400 Subject: [PATCH 23/26] bdev: get rid of ->bd_inodes Since 2006 we have ->i_bdev pinning bdev in question, so there's no way to get to bdev ->evict_inode() while there's an aliasing inode anywhere. In other words, the only place walking the list of aliases is guaranteed to do it only when the list is empty... Remove the detritus; it should've been done in "[PATCH] Fix a race condition between ->i_mapping and iput()", but nobody had noticed it back then. Signed-off-by: Al Viro --- fs/block_dev.c | 16 ++-------------- include/linux/fs.h | 1 - 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 71ccab1d22c6..73130dad30e4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -614,7 +614,6 @@ static void init_once(void *foo) memset(bdev, 0, sizeof(*bdev)); mutex_init(&bdev->bd_mutex); - INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); #ifdef CONFIG_SYSFS INIT_LIST_HEAD(&bdev->bd_holder_disks); @@ -624,24 +623,13 @@ static void init_once(void *foo) mutex_init(&bdev->bd_fsfreeze_mutex); } -static inline void __bd_forget(struct inode *inode) -{ - list_del_init(&inode->i_devices); - inode->i_bdev = NULL; - inode->i_mapping = &inode->i_data; -} - static void bdev_evict_inode(struct inode *inode) { struct block_device *bdev = &BDEV_I(inode)->bdev; - struct list_head *p; truncate_inode_pages_final(&inode->i_data); invalidate_inode_buffers(inode); /* is it needed here? */ clear_inode(inode); spin_lock(&bdev_lock); - while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { - __bd_forget(list_entry(p, struct inode, i_devices)); - } list_del_init(&bdev->bd_list); spin_unlock(&bdev_lock); } @@ -805,7 +793,6 @@ static struct block_device *bd_acquire(struct inode *inode) bdgrab(bdev); inode->i_bdev = bdev; inode->i_mapping = bdev->bd_inode->i_mapping; - list_add(&inode->i_devices, &bdev->bd_inodes); } spin_unlock(&bdev_lock); } @@ -821,7 +808,8 @@ void bd_forget(struct inode *inode) spin_lock(&bdev_lock); if (!sb_is_blkdev_sb(inode->i_sb)) bdev = inode->i_bdev; - __bd_forget(inode); + inode->i_bdev = NULL; + inode->i_mapping = &inode->i_data; spin_unlock(&bdev_lock); if (bdev) diff --git a/include/linux/fs.h b/include/linux/fs.h index bacc0733663c..1878c8461622 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -459,7 +459,6 @@ struct block_device { struct inode * bd_inode; /* will die */ struct super_block * bd_super; struct mutex bd_mutex; /* open/close mutex */ - struct list_head bd_inodes; void * bd_claiming; void * bd_holder; int bd_holders; From f4fdace94722cd4ca60bf72816de01ab911c45d8 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Thu, 7 Jul 2016 22:04:04 -0400 Subject: [PATCH 24/26] vfs: Update lookup_dcache() comment commit 6c51e513a3aa ("lookup_dcache(): lift d_alloc() into callers") removed the need_lookup argument from lookup_dcache(), but the comment was forgotten. Also it no longer allocates a new dentry if nothing was found. Signed-off-by: Oleg Drokin Signed-off-by: Al Viro --- fs/namei.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index bb7a2e0b959c..baab2c0d96ba 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1449,9 +1449,8 @@ static int follow_dotdot(struct nameidata *nd) } /* - * This looks up the name in dcache, possibly revalidates the old dentry and - * allocates a new one if not found or not valid. In the need_lookup argument - * returns whether i_op->lookup is necessary. + * This looks up the name in dcache and possibly revalidates the found dentry. + * NULL is returned if the dentry does not exist in the cache. */ static struct dentry *lookup_dcache(const struct qstr *name, struct dentry *dir, From 285b102d3b745f3c2c110c9c327741d87e64aacc Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 28 Jun 2016 11:47:32 +0200 Subject: [PATCH 25/26] vfs: new d_init method Allow filesystem to initialize dentry at allocation time. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 ++ Documentation/filesystems/vfs.txt | 3 +++ fs/dcache.c | 11 +++++++++++ include/linux/dcache.h | 1 + 4 files changed, 17 insertions(+) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index a38da93865c2..794adbe55399 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -15,6 +15,7 @@ prototypes: int (*d_compare)(const struct dentry *, const struct dentry *, unsigned int, const char *, const struct qstr *); int (*d_delete)(struct dentry *); + int (*d_init)(struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); @@ -30,6 +31,7 @@ d_weak_revalidate:no no yes no d_hash no no no maybe d_compare: yes no no maybe d_delete: no yes no no +d_init: no no yes no d_release: no no yes no d_prune: no yes no no d_iput: no no yes no diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 70a056fe51a3..15c3fa7c89cc 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -923,6 +923,7 @@ struct dentry_operations { int (*d_compare)(const struct dentry *, const struct dentry *, unsigned int, const char *, const struct qstr *); int (*d_delete)(const struct dentry *); + int (*d_init)(struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); @@ -995,6 +996,8 @@ struct dentry_operations { always cache a reachable dentry. d_delete must be constant and idempotent. + d_init: called when a dentry is allocated + d_release: called when a dentry is really deallocated d_iput: called when a dentry loses its inode (just prior to its diff --git a/fs/dcache.c b/fs/dcache.c index d5beef01cdfc..6d60a764c848 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1569,6 +1569,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) { struct dentry *dentry; char *dname; + int err; dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); if (!dentry) @@ -1627,6 +1628,16 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) INIT_LIST_HEAD(&dentry->d_child); d_set_d_op(dentry, dentry->d_sb->s_d_op); + if (dentry->d_op && dentry->d_op->d_init) { + err = dentry->d_op->d_init(dentry); + if (err) { + if (dname_external(dentry)) + kfree(external_name(dentry)); + kmem_cache_free(dentry_cache, dentry); + return NULL; + } + } + this_cpu_inc(nr_dentry); return dentry; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 14df83609c7f..98044a8d1487 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -133,6 +133,7 @@ struct dentry_operations { int (*d_compare)(const struct dentry *, const struct dentry *, unsigned int, const char *, const struct qstr *); int (*d_delete)(const struct dentry *); + int (*d_init)(struct dentry *); void (*d_release)(struct dentry *); void (*d_prune)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); From 47be61845c775643f1aa4d2a54343549f943c94c Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 6 Jul 2016 11:32:20 +0800 Subject: [PATCH 26/26] fs/dcache.c: avoid soft-lockup in dput() We triggered soft-lockup under stress test which open/access/write/close one file concurrently on more than five different CPUs: WARN: soft lockup - CPU#0 stuck for 11s! [who:30631] ... [] dput+0x100/0x298 [] terminate_walk+0x4c/0x60 [] path_lookupat+0x5cc/0x7a8 [] filename_lookup+0x38/0xf0 [] user_path_at_empty+0x78/0xd0 [] user_path_at+0x1c/0x28 [] SyS_faccessat+0xb4/0x230 ->d_lock trylock may failed many times because of concurrently operations, and dput() may execute a long time. Fix this by replacing cpu_relax() with cond_resched(). dput() used to be sleepable, so make it sleepable again should be safe. Cc: Signed-off-by: Wei Fang Signed-off-by: Al Viro --- fs/dcache.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 6d60a764c848..f650a4fc5b7c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -596,7 +596,6 @@ static struct dentry *dentry_kill(struct dentry *dentry) failed: spin_unlock(&dentry->d_lock); - cpu_relax(); return dentry; /* try again with same dentry */ } @@ -770,6 +769,8 @@ void dput(struct dentry *dentry) return; repeat: + might_sleep(); + rcu_read_lock(); if (likely(fast_dput(dentry))) { rcu_read_unlock(); @@ -803,8 +804,10 @@ void dput(struct dentry *dentry) kill_it: dentry = dentry_kill(dentry); - if (dentry) + if (dentry) { + cond_resched(); goto repeat; + } } EXPORT_SYMBOL(dput);