ceph: fix cap_snap and realm split
The cap_snap creation/queueing relies on both the current i_head_snapc _and_ the i_snap_realm pointers being correct, so that the new cap_snap can properly reference the old context and the new i_head_snapc can be updated to reference the new snaprealm's context. To fix this, we: - move inodes completely to the new (split) realm so that i_snap_realm is correct, and - generate the new snapc's _before_ queueing the cap_snaps in ceph_update_snap_trace(). Signed-off-by: Sage Weil <sage@newdream.net>
This commit is contained in:
parent
cfc0bf6640
commit
ae00d4f37f
3 changed files with 33 additions and 61 deletions
|
@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
|
|||
if (i_size < page_off + len)
|
||||
len = i_size - page_off;
|
||||
|
||||
dout("writepage %p page %p index %lu on %llu~%u\n",
|
||||
inode, page, page->index, page_off, len);
|
||||
dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
|
||||
inode, page, page->index, page_off, len, snapc);
|
||||
|
||||
writeback_stat = atomic_long_inc_return(&client->writeback_count);
|
||||
if (writeback_stat >
|
||||
|
|
|
@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
|
|||
INIT_LIST_HEAD(&realm->children);
|
||||
INIT_LIST_HEAD(&realm->child_item);
|
||||
INIT_LIST_HEAD(&realm->empty_item);
|
||||
INIT_LIST_HEAD(&realm->dirty_item);
|
||||
INIT_LIST_HEAD(&realm->inodes_with_caps);
|
||||
spin_lock_init(&realm->inodes_with_caps_lock);
|
||||
__insert_snap_realm(&mdsc->snap_realms, realm);
|
||||
|
@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
|
|||
struct ceph_snap_realm *realm;
|
||||
int invalidate = 0;
|
||||
int err = -ENOMEM;
|
||||
LIST_HEAD(dirty_realms);
|
||||
|
||||
dout("update_snap_trace deletion=%d\n", deletion);
|
||||
more:
|
||||
|
@ -626,24 +628,6 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
|
|||
}
|
||||
}
|
||||
|
||||
if (le64_to_cpu(ri->seq) > realm->seq) {
|
||||
dout("update_snap_trace updating %llx %p %lld -> %lld\n",
|
||||
realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
|
||||
/*
|
||||
* if the realm seq has changed, queue a cap_snap for every
|
||||
* inode with open caps. we do this _before_ we update
|
||||
* the realm info so that we prepare for writeback under the
|
||||
* _previous_ snap context.
|
||||
*
|
||||
* ...unless it's a snap deletion!
|
||||
*/
|
||||
if (!deletion)
|
||||
queue_realm_cap_snaps(realm);
|
||||
} else {
|
||||
dout("update_snap_trace %llx %p seq %lld unchanged\n",
|
||||
realm->ino, realm, realm->seq);
|
||||
}
|
||||
|
||||
/* ensure the parent is correct */
|
||||
err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
|
||||
if (err < 0)
|
||||
|
@ -651,6 +635,8 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
|
|||
invalidate += err;
|
||||
|
||||
if (le64_to_cpu(ri->seq) > realm->seq) {
|
||||
dout("update_snap_trace updating %llx %p %lld -> %lld\n",
|
||||
realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
|
||||
/* update realm parameters, snap lists */
|
||||
realm->seq = le64_to_cpu(ri->seq);
|
||||
realm->created = le64_to_cpu(ri->created);
|
||||
|
@ -668,9 +654,17 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
|
|||
if (err < 0)
|
||||
goto fail;
|
||||
|
||||
/* queue realm for cap_snap creation */
|
||||
list_add(&realm->dirty_item, &dirty_realms);
|
||||
|
||||
invalidate = 1;
|
||||
} else if (!realm->cached_context) {
|
||||
dout("update_snap_trace %llx %p seq %lld new\n",
|
||||
realm->ino, realm, realm->seq);
|
||||
invalidate = 1;
|
||||
} else {
|
||||
dout("update_snap_trace %llx %p seq %lld unchanged\n",
|
||||
realm->ino, realm, realm->seq);
|
||||
}
|
||||
|
||||
dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
|
||||
|
@ -683,6 +677,14 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
|
|||
if (invalidate)
|
||||
rebuild_snap_realms(realm);
|
||||
|
||||
/*
|
||||
* queue cap snaps _after_ we've built the new snap contexts,
|
||||
* so that i_head_snapc can be set appropriately.
|
||||
*/
|
||||
list_for_each_entry(realm, &dirty_realms, dirty_item) {
|
||||
queue_realm_cap_snaps(realm);
|
||||
}
|
||||
|
||||
__cleanup_empty_realms(mdsc);
|
||||
return 0;
|
||||
|
||||
|
@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
|
|||
};
|
||||
struct inode *inode = ceph_find_inode(sb, vino);
|
||||
struct ceph_inode_info *ci;
|
||||
struct ceph_snap_realm *oldrealm;
|
||||
|
||||
if (!inode)
|
||||
continue;
|
||||
|
@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
|
|||
dout(" will move %p to split realm %llx %p\n",
|
||||
inode, realm->ino, realm);
|
||||
/*
|
||||
* Remove the inode from the realm's inode
|
||||
* list, but don't add it to the new realm
|
||||
* yet. We don't want the cap_snap to be
|
||||
* queued (again) by ceph_update_snap_trace()
|
||||
* below. Queue it _now_, under the old context.
|
||||
* Move the inode to the new realm
|
||||
*/
|
||||
spin_lock(&realm->inodes_with_caps_lock);
|
||||
list_del_init(&ci->i_snap_realm_item);
|
||||
list_add(&ci->i_snap_realm_item,
|
||||
&realm->inodes_with_caps);
|
||||
oldrealm = ci->i_snap_realm;
|
||||
ci->i_snap_realm = realm;
|
||||
spin_unlock(&realm->inodes_with_caps_lock);
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
ceph_queue_cap_snap(ci);
|
||||
ceph_get_snap_realm(mdsc, realm);
|
||||
ceph_put_snap_realm(mdsc, oldrealm);
|
||||
|
||||
iput(inode);
|
||||
continue;
|
||||
|
@ -880,43 +884,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
|
|||
ceph_update_snap_trace(mdsc, p, e,
|
||||
op == CEPH_SNAP_OP_DESTROY);
|
||||
|
||||
if (op == CEPH_SNAP_OP_SPLIT) {
|
||||
/*
|
||||
* ok, _now_ add the inodes into the new realm.
|
||||
*/
|
||||
for (i = 0; i < num_split_inos; i++) {
|
||||
struct ceph_vino vino = {
|
||||
.ino = le64_to_cpu(split_inos[i]),
|
||||
.snap = CEPH_NOSNAP,
|
||||
};
|
||||
struct inode *inode = ceph_find_inode(sb, vino);
|
||||
struct ceph_inode_info *ci;
|
||||
|
||||
if (!inode)
|
||||
continue;
|
||||
ci = ceph_inode(inode);
|
||||
spin_lock(&inode->i_lock);
|
||||
if (list_empty(&ci->i_snap_realm_item)) {
|
||||
struct ceph_snap_realm *oldrealm =
|
||||
ci->i_snap_realm;
|
||||
|
||||
dout(" moving %p to split realm %llx %p\n",
|
||||
inode, realm->ino, realm);
|
||||
spin_lock(&realm->inodes_with_caps_lock);
|
||||
list_add(&ci->i_snap_realm_item,
|
||||
&realm->inodes_with_caps);
|
||||
ci->i_snap_realm = realm;
|
||||
spin_unlock(&realm->inodes_with_caps_lock);
|
||||
ceph_get_snap_realm(mdsc, realm);
|
||||
ceph_put_snap_realm(mdsc, oldrealm);
|
||||
}
|
||||
spin_unlock(&inode->i_lock);
|
||||
iput(inode);
|
||||
}
|
||||
|
||||
if (op == CEPH_SNAP_OP_SPLIT)
|
||||
/* we took a reference when we created the realm, above */
|
||||
ceph_put_snap_realm(mdsc, realm);
|
||||
}
|
||||
|
||||
__cleanup_empty_realms(mdsc);
|
||||
|
||||
|
|
|
@ -690,6 +690,8 @@ struct ceph_snap_realm {
|
|||
|
||||
struct list_head empty_item; /* if i have ref==0 */
|
||||
|
||||
struct list_head dirty_item; /* if realm needs new context */
|
||||
|
||||
/* the current set of snaps for this realm */
|
||||
struct ceph_snap_context *cached_context;
|
||||
|
||||
|
|
Loading…
Reference in a new issue