NTFS: - Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking

helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist().
	This allows us to map runlist fragments with the runlist lock already
	held without having to drop and reacquire it around the call.  Adapt
	all callers.
      - Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked
	runlist.  This allows us to find runlist elements with the runlist
	lock already held without having to drop and reacquire it around the
	call.  Adapt all callers.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
This commit is contained in:
Anton Altaparmakov 2005-02-15 10:08:43 +00:00
parent 1a0df15acd
commit b6ad6c52fe
6 changed files with 119 additions and 113 deletions

View file

@ -63,6 +63,15 @@ ToDo/Notes:
- Fix a bug in fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress() in
the creation of the unmapped runlist element for the base attribute
extent.
- Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking
helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist().
This allows us to map runlist fragments with the runlist lock already
held without having to drop and reacquire it around the call. Adapt
all callers.
- Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked
runlist. This allows us to find runlist elements with the runlist
lock already held without having to drop and reacquire it around the
call. Adapt all callers.
2.1.22 - Many bug and race fixes and error handling improvements.

View file

@ -2,7 +2,7 @@
* aops.c - NTFS kernel address space operations and page cache handling.
* Part of the Linux-NTFS project.
*
* Copyright (c) 2001-2004 Anton Altaparmakov
* Copyright (c) 2001-2005 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@ -135,7 +135,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
i * rec_size), rec_size);
flush_dcache_page(page);
kunmap_atomic(addr, KM_BIO_SRC_IRQ);
if (likely(!PageError(page) && page_uptodate))
if (likely(page_uptodate && !PageError(page)))
SetPageUptodate(page);
}
unlock_page(page);
@ -347,11 +347,11 @@ static int ntfs_read_block(struct page *page)
*/
static int ntfs_readpage(struct file *file, struct page *page)
{
loff_t i_size;
ntfs_inode *ni, *base_ni;
u8 *kaddr;
ntfs_attr_search_ctx *ctx;
MFT_RECORD *mrec;
unsigned long flags;
u32 attr_len;
int err = 0;
@ -389,9 +389,9 @@ static int ntfs_readpage(struct file *file, struct page *page)
* Attribute is resident, implying it is not compressed or encrypted.
* This also means the attribute is smaller than an mft record and
* hence smaller than a page, so can simply zero out any pages with
* index above 0. We can also do this if the file size is 0.
* index above 0.
*/
if (unlikely(page->index > 0 || !i_size_read(VFS_I(ni)))) {
if (unlikely(page->index > 0)) {
kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr, 0, PAGE_CACHE_SIZE);
flush_dcache_page(page);
@ -418,9 +418,10 @@ static int ntfs_readpage(struct file *file, struct page *page)
if (unlikely(err))
goto put_unm_err_out;
attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
i_size = i_size_read(VFS_I(ni));
if (unlikely(attr_len > i_size))
attr_len = i_size;
read_lock_irqsave(&ni->size_lock, flags);
if (unlikely(attr_len > ni->initialized_size))
attr_len = ni->initialized_size;
read_unlock_irqrestore(&ni->size_lock, flags);
kaddr = kmap_atomic(page, KM_USER0);
/* Copy the data to the page. */
memcpy(kaddr, (u8*)ctx->attr +
@ -1247,20 +1248,6 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
int err;
BUG_ON(!PageLocked(page));
/*
* If a previous ntfs_truncate() failed, repeat it and abort if it
* fails again.
*/
if (unlikely(NInoTruncateFailed(ni))) {
down_write(&vi->i_alloc_sem);
err = ntfs_truncate(vi);
up_write(&vi->i_alloc_sem);
if (err || NInoTruncateFailed(ni)) {
if (!err)
err = -EIO;
goto err_out;
}
}
i_size = i_size_read(vi);
/* Is the page fully outside i_size? (truncate in progress) */
if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
@ -1496,7 +1483,6 @@ static int ntfs_prepare_nonresident_write(struct page *page,
* blocksize.
*/
ablock = ni->allocated_size >> blocksize_bits;
i_size = i_size_read(vi);
initialized_size = ni->initialized_size;
read_unlock_irqrestore(&ni->size_lock, flags);

View file

@ -1,7 +1,7 @@
/**
* attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
*
* Copyright (c) 2001-2004 Anton Altaparmakov
* Copyright (c) 2001-2005 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@ -30,7 +30,7 @@
#include "types.h"
/**
* ntfs_map_runlist - map (a part of) a runlist of an ntfs inode
* ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode
* @ni: ntfs inode for which to map (part of) a runlist
* @vcn: map runlist part containing this vcn
*
@ -38,24 +38,23 @@
*
* Return 0 on success and -errno on error.
*
* Locking: - The runlist must be unlocked on entry and is unlocked on return.
* - This function takes the lock for writing and modifies the runlist.
* Locking: - The runlist must be locked for writing.
* - This function modifies the runlist.
*/
int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
{
ntfs_inode *base_ni;
ntfs_attr_search_ctx *ctx;
MFT_RECORD *mrec;
ntfs_attr_search_ctx *ctx;
runlist_element *rl;
int err = 0;
ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
(unsigned long long)vcn);
if (!NInoAttr(ni))
base_ni = ni;
else
base_ni = ni->ext.base_ntfs_ino;
mrec = map_mft_record(base_ni);
if (IS_ERR(mrec))
return PTR_ERR(mrec);
@ -66,15 +65,7 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
}
err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
CASE_SENSITIVE, vcn, NULL, 0, ctx);
if (unlikely(err))
goto put_err_out;
down_write(&ni->runlist.lock);
/* Make sure someone else didn't do the work while we were sleeping. */
if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
LCN_RL_NOT_MAPPED)) {
runlist_element *rl;
if (likely(!err)) {
rl = ntfs_mapping_pairs_decompress(ni->vol, ctx->attr,
ni->runlist.rl);
if (IS_ERR(rl))
@ -82,9 +73,6 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
else
ni->runlist.rl = rl;
}
up_write(&ni->runlist.lock);
put_err_out:
ntfs_attr_put_search_ctx(ctx);
err_out:
unmap_mft_record(base_ni);
@ -92,17 +80,45 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
}
/**
* ntfs_find_vcn - find a vcn in the runlist described by an ntfs inode
* ntfs_map_runlist - map (a part of) a runlist of an ntfs inode
* @ni: ntfs inode for which to map (part of) a runlist
* @vcn: map runlist part containing this vcn
*
* Map the part of a runlist containing the @vcn of the ntfs inode @ni.
*
* Return 0 on success and -errno on error.
*
* Locking: - The runlist must be unlocked on entry and is unlocked on return.
* - This function takes the runlist lock for writing and modifies the
* runlist.
*/
int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
{
int err = 0;
down_write(&ni->runlist.lock);
/* Make sure someone else didn't do the work while we were sleeping. */
if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
LCN_RL_NOT_MAPPED))
err = ntfs_map_runlist_nolock(ni, vcn);
up_write(&ni->runlist.lock);
return err;
}
/**
* ntfs_find_vcn_nolock - find a vcn in the runlist described by an ntfs inode
* @ni: ntfs inode describing the runlist to search
* @vcn: vcn to find
* @need_write: if false, lock for reading and if true, lock for writing
* @write_locked: true if the runlist is locked for writing
*
* Find the virtual cluster number @vcn in the runlist described by the ntfs
* inode @ni and return the address of the runlist element containing the @vcn.
* The runlist is left locked and the caller has to unlock it. If @need_write
* is true, the runlist is locked for writing and if @need_write is false, the
* runlist is locked for reading. In the error case, the runlist is not left
* locked.
* The runlist is left locked and the caller has to unlock it. In the error
* case, the runlist is left in the same locking state as on entry.
*
* Note if @write_locked is FALSE the lock may be dropped inside the function
* so you cannot rely on the runlist still being the same when this function
* returns.
*
* Note you need to distinguish between the lcn of the returned runlist element
* being >= 0 and LCN_HOLE. In the later case you have to return zeroes on
@ -124,28 +140,24 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
* true, it is locked for writing. Otherwise is is locked for
* reading.
*/
runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn,
const BOOL need_write)
runlist_element *ntfs_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
const BOOL write_locked)
{
runlist_element *rl;
int err = 0;
BOOL is_retry = FALSE;
ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, lock for %sing.",
ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
ni->mft_no, (unsigned long long)vcn,
!need_write ? "read" : "writ");
write_locked ? "write" : "read");
BUG_ON(!ni);
BUG_ON(!NInoNonResident(ni));
BUG_ON(vcn < 0);
lock_retry_remap:
if (!need_write)
down_read(&ni->runlist.lock);
else
down_write(&ni->runlist.lock);
retry_remap:
rl = ni->runlist.rl;
if (likely(rl && vcn >= rl[0].vcn)) {
while (likely(rl->length)) {
if (likely(vcn < rl[1].vcn)) {
if (unlikely(vcn < rl[1].vcn)) {
if (likely(rl->lcn >= LCN_HOLE)) {
ntfs_debug("Done.");
return rl;
@ -161,19 +173,23 @@ runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn,
err = -EIO;
}
}
if (!need_write)
up_read(&ni->runlist.lock);
else
up_write(&ni->runlist.lock);
if (!err && !is_retry) {
/*
* The @vcn is in an unmapped region, map the runlist and
* retry.
*/
err = ntfs_map_runlist(ni, vcn);
if (!write_locked) {
up_read(&ni->runlist.lock);
down_write(&ni->runlist.lock);
}
err = ntfs_map_runlist_nolock(ni, vcn);
if (!write_locked) {
up_write(&ni->runlist.lock);
down_read(&ni->runlist.lock);
}
if (likely(!err)) {
is_retry = TRUE;
goto lock_retry_remap;
goto retry_remap;
}
/*
* -EINVAL and -ENOENT coming from a failed mapping attempt are
@ -184,6 +200,7 @@ runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn,
err = -EIO;
} else if (!err)
err = -EIO;
if (err != -ENOENT)
ntfs_error(ni->vol->sb, "Failed with error code %i.", err);
return ERR_PTR(err);
}

View file

@ -60,10 +60,11 @@ typedef struct {
ATTR_RECORD *base_attr;
} ntfs_attr_search_ctx;
extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn);
extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn);
extern runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn,
const BOOL need_write);
extern runlist_element *ntfs_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
const BOOL write_locked);
int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
const u32 name_len, const IGNORE_CASE_BOOL ic,

View file

@ -849,7 +849,8 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
total_freed = real_freed = 0;
/* This returns with ni->runlist locked for reading on success. */
rl = ntfs_find_vcn(ni, start_vcn, FALSE);
down_read(&ni->runlist.lock);
rl = ntfs_find_vcn_nolock(ni, start_vcn, FALSE);
if (IS_ERR(rl)) {
if (!is_rollback)
ntfs_error(vol->sb, "Failed to find first runlist "
@ -863,7 +864,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
ntfs_error(vol->sb, "First runlist element has "
"invalid lcn, aborting.");
err = -EIO;
goto unl_err_out;
goto err_out;
}
/* Find the starting cluster inside the run that needs freeing. */
delta = start_vcn - rl->vcn;
@ -881,7 +882,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
if (!is_rollback)
ntfs_error(vol->sb, "Failed to clear first run "
"(error %i), aborting.", err);
goto unl_err_out;
goto err_out;
}
/* We have freed @to_free real clusters. */
real_freed = to_free;
@ -901,30 +902,15 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
if (unlikely(rl->lcn < LCN_HOLE)) {
VCN vcn;
/*
* Attempt to map runlist, dropping runlist lock for
* the duration.
*/
/* Attempt to map runlist. */
vcn = rl->vcn;
up_read(&ni->runlist.lock);
err = ntfs_map_runlist(ni, vcn);
if (err) {
if (!is_rollback)
ntfs_error(vol->sb, "Failed to map "
"runlist fragment.");
if (err == -EINVAL || err == -ENOENT)
err = -EIO;
goto err_out;
}
/*
* This returns with ni->runlist locked for reading on
* success.
*/
rl = ntfs_find_vcn(ni, vcn, FALSE);
rl = ntfs_find_vcn_nolock(ni, vcn, FALSE);
if (IS_ERR(rl)) {
err = PTR_ERR(rl);
if (!is_rollback)
ntfs_error(vol->sb, "Failed to find "
ntfs_error(vol->sb, "Failed to map "
"runlist fragment or "
"failed to find "
"subsequent runlist "
"element.");
goto err_out;
@ -937,7 +923,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
(unsigned long long)
rl->lcn);
err = -EIO;
goto unl_err_out;
goto err_out;
}
}
/* The number of clusters in this run that need freeing. */
@ -953,7 +939,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
if (!is_rollback)
ntfs_error(vol->sb, "Failed to clear "
"subsequent run.");
goto unl_err_out;
goto err_out;
}
/* We have freed @to_free real clusters. */
real_freed += to_free;
@ -974,9 +960,8 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
/* We are done. Return the number of actually freed clusters. */
ntfs_debug("Done.");
return real_freed;
unl_err_out:
up_read(&ni->runlist.lock);
err_out:
up_read(&ni->runlist.lock);
if (is_rollback)
return err;
/* If no real clusters were freed, no need to rollback. */

View file

@ -1,7 +1,7 @@
/**
* mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
*
* Copyright (c) 2001-2004 Anton Altaparmakov
* Copyright (c) 2001-2005 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@ -1292,19 +1292,20 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
/*
* Determine the last lcn of the mft bitmap. The allocated size of the
* mft bitmap cannot be zero so we are ok to do this.
* ntfs_find_vcn() returns the runlist locked on success.
*/
down_write(&mftbmp_ni->runlist.lock);
read_lock_irqsave(&mftbmp_ni->size_lock, flags);
ll = mftbmp_ni->allocated_size;
read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
rl = ntfs_find_vcn(mftbmp_ni, (ll - 1) >> vol->cluster_size_bits, TRUE);
rl = ntfs_find_vcn_nolock(mftbmp_ni,
(ll - 1) >> vol->cluster_size_bits, TRUE);
if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
up_write(&mftbmp_ni->runlist.lock);
ntfs_error(vol->sb, "Failed to determine last allocated "
"cluster of mft bitmap attribute.");
if (!IS_ERR(rl)) {
up_write(&mftbmp_ni->runlist.lock);
if (!IS_ERR(rl))
ret = -EIO;
} else
else
ret = PTR_ERR(rl);
return ret;
}
@ -1428,6 +1429,8 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
// TODO: Deal with this by moving this extent to a new mft
// record or by starting a new extent in a new mft record or by
// moving other attributes out of this mft record.
// Note: It will need to be a special mft record and if none of
// those are available it gets rather complicated...
ntfs_error(vol->sb, "Not enough space in this mft record to "
"accomodate extended mft bitmap attribute "
"extent. Cannot handle this yet.");
@ -1719,19 +1722,20 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
* Determine the preferred allocation location, i.e. the last lcn of
* the mft data attribute. The allocated size of the mft data
* attribute cannot be zero so we are ok to do this.
* ntfs_find_vcn() returns the runlist locked on success.
*/
down_write(&mft_ni->runlist.lock);
read_lock_irqsave(&mft_ni->size_lock, flags);
ll = mft_ni->allocated_size;
read_unlock_irqrestore(&mft_ni->size_lock, flags);
rl = ntfs_find_vcn(mft_ni, (ll - 1) >> vol->cluster_size_bits, TRUE);
rl = ntfs_find_vcn_nolock(mft_ni, (ll - 1) >> vol->cluster_size_bits,
TRUE);
if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
up_write(&mft_ni->runlist.lock);
ntfs_error(vol->sb, "Failed to determine last allocated "
"cluster of mft data attribute.");
if (!IS_ERR(rl)) {
up_write(&mft_ni->runlist.lock);
if (!IS_ERR(rl))
ret = -EIO;
} else
else
ret = PTR_ERR(rl);
return ret;
}
@ -1858,7 +1862,11 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
// moving other attributes out of this mft record.
// Note: Use the special reserved mft records and ensure that
// this extent is not required to find the mft record in
// question.
// question. If no free special records left we would need to
// move an existing record away, insert ours in its place, and
// then place the moved record into the newly allocated space
// and we would then need to update all references to this mft
// record appropriately. This is rather complicated...
ntfs_error(vol->sb, "Not enough space in this mft record to "
"accomodate extended mft data attribute "
"extent. Cannot handle this yet.");