NTFS: - Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking

helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist(). This allows us to map runlist fragments with the runlist lock already held without having to drop and reacquire it around the call. Adapt all callers. - Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked runlist. This allows us to find runlist elements with the runlist lock already held without having to drop and reacquire it around the call. Adapt all callers. Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
2005-02-15 10:08:43 +00:00 · 2005-02-15 10:08:43 +00:00 · b6ad6c52fe
commit b6ad6c52fe
parent 1a0df15acd
6 changed files with 119 additions and 113 deletions
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@ -63,6 +63,15 @@ ToDo/Notes:
 	- Fix a bug in fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress() in
 	  the creation of the unmapped runlist element for the base attribute
 	  extent.
+	- Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking
+	  helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist().
+	  This allows us to map runlist fragments with the runlist lock already
+	  held without having to drop and reacquire it around the call.  Adapt
+	  all callers.
+	- Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked
+	  runlist.  This allows us to find runlist elements with the runlist
+	  lock already held without having to drop and reacquire it around the
+	  call.  Adapt all callers.

 2.1.22 - Many bug and race fixes and error handling improvements.

--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@ -2,7 +2,7 @@
 * aops.c - NTFS kernel address space operations and page cache handling.
 *	    Part of the Linux-NTFS project.
 *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
 * Copyright (c) 2002 Richard Russon
 *
 * This program/include file is free software; you can redistribute it and/or
@ -135,7 +135,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 					i * rec_size), rec_size);
 		flush_dcache_page(page);
 		kunmap_atomic(addr, KM_BIO_SRC_IRQ);
-		if (likely(!PageError(page) && page_uptodate))
+		if (likely(page_uptodate && !PageError(page)))
 			SetPageUptodate(page);
 	}
 	unlock_page(page);
@ -347,11 +347,11 @@ static int ntfs_read_block(struct page *page)
 */
 static int ntfs_readpage(struct file *file, struct page *page)
 {
-	loff_t i_size;
 	ntfs_inode *ni, *base_ni;
 	u8 *kaddr;
 	ntfs_attr_search_ctx *ctx;
 	MFT_RECORD *mrec;
+	unsigned long flags;
 	u32 attr_len;
 	int err = 0;

@ -389,9 +389,9 @@ static int ntfs_readpage(struct file *file, struct page *page)
 	 * Attribute is resident, implying it is not compressed or encrypted.
 	 * This also means the attribute is smaller than an mft record and
 	 * hence smaller than a page, so can simply zero out any pages with
-	 * index above 0.  We can also do this if the file size is 0.
+	 * index above 0.
 	 */
-	if (unlikely(page->index > 0 || !i_size_read(VFS_I(ni)))) {
+	if (unlikely(page->index > 0)) {
 		kaddr = kmap_atomic(page, KM_USER0);
 		memset(kaddr, 0, PAGE_CACHE_SIZE);
 		flush_dcache_page(page);
@ -418,9 +418,10 @@ static int ntfs_readpage(struct file *file, struct page *page)
 	if (unlikely(err))
 		goto put_unm_err_out;
 	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
-	i_size = i_size_read(VFS_I(ni));
-	if (unlikely(attr_len > i_size))
-		attr_len = i_size;
+	read_lock_irqsave(&ni->size_lock, flags);
+	if (unlikely(attr_len > ni->initialized_size))
+		attr_len = ni->initialized_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
 	kaddr = kmap_atomic(page, KM_USER0);
 	/* Copy the data to the page. */
 	memcpy(kaddr, (u8*)ctx->attr +
@ -1247,20 +1248,6 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
 	int err;

 	BUG_ON(!PageLocked(page));
-	/*
-	 * If a previous ntfs_truncate() failed, repeat it and abort if it
-	 * fails again.
-	 */
-	if (unlikely(NInoTruncateFailed(ni))) {
-		down_write(&vi->i_alloc_sem);
-		err = ntfs_truncate(vi);
-		up_write(&vi->i_alloc_sem);
-		if (err || NInoTruncateFailed(ni)) {
-			if (!err)
-				err = -EIO;
-			goto err_out;
-		}
-	}
 	i_size = i_size_read(vi);
 	/* Is the page fully outside i_size? (truncate in progress) */
 	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
@ -1496,7 +1483,6 @@ static int ntfs_prepare_nonresident_write(struct page *page,
 	 * blocksize.
 	 */
 	ablock = ni->allocated_size >> blocksize_bits;
-
 	i_size = i_size_read(vi);
 	initialized_size = ni->initialized_size;
 	read_unlock_irqrestore(&ni->size_lock, flags);
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@ -1,7 +1,7 @@
 /**
 * attrib.c - NTFS attribute operations.  Part of the Linux-NTFS project.
 *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
 * Copyright (c) 2002 Richard Russon
 *
 * This program/include file is free software; you can redistribute it and/or
@ -30,7 +30,7 @@
 #include "types.h"

 /**
- * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode
+ * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode
 * @ni:		ntfs inode for which to map (part of) a runlist
 * @vcn:	map runlist part containing this vcn
 *
@ -38,24 +38,23 @@
 *
 * Return 0 on success and -errno on error.
 *
- * Locking: - The runlist must be unlocked on entry and is unlocked on return.
- *	    - This function takes the lock for writing and modifies the runlist.
+ * Locking: - The runlist must be locked for writing.
+ *	    - This function modifies the runlist.
 */
-int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
+int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
 {
 	ntfs_inode *base_ni;
-	ntfs_attr_search_ctx *ctx;
 	MFT_RECORD *mrec;
+	ntfs_attr_search_ctx *ctx;
+	runlist_element *rl;
 	int err = 0;

 	ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
 			(unsigned long long)vcn);
-
 	if (!NInoAttr(ni))
 		base_ni = ni;
 	else
 		base_ni = ni->ext.base_ntfs_ino;
-
 	mrec = map_mft_record(base_ni);
 	if (IS_ERR(mrec))
 		return PTR_ERR(mrec);
@ -66,15 +65,7 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
 	}
 	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
 			CASE_SENSITIVE, vcn, NULL, 0, ctx);
-	if (unlikely(err))
-		goto put_err_out;
-
-	down_write(&ni->runlist.lock);
-	/* Make sure someone else didn't do the work while we were sleeping. */
-	if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
-			LCN_RL_NOT_MAPPED)) {
-		runlist_element *rl;
-
+	if (likely(!err)) {
 		rl = ntfs_mapping_pairs_decompress(ni->vol, ctx->attr,
 				ni->runlist.rl);
 		if (IS_ERR(rl))
@ -82,9 +73,6 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
 		else
 			ni->runlist.rl = rl;
 	}
-	up_write(&ni->runlist.lock);
-
-put_err_out:
 	ntfs_attr_put_search_ctx(ctx);
 err_out:
 	unmap_mft_record(base_ni);
@ -92,17 +80,45 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
 }

 /**
- * ntfs_find_vcn - find a vcn in the runlist described by an ntfs inode
+ * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode
+ * @ni:		ntfs inode for which to map (part of) a runlist
+ * @vcn:	map runlist part containing this vcn
+ *
+ * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
+ *
+ * Return 0 on success and -errno on error.
+ *
+ * Locking: - The runlist must be unlocked on entry and is unlocked on return.
+ *	    - This function takes the runlist lock for writing and modifies the
+ *	      runlist.
+ */
+int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
+{
+	int err = 0;
+
+	down_write(&ni->runlist.lock);
+	/* Make sure someone else didn't do the work while we were sleeping. */
+	if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
+			LCN_RL_NOT_MAPPED))
+		err = ntfs_map_runlist_nolock(ni, vcn);
+	up_write(&ni->runlist.lock);
+	return err;
+}
+
+/**
+ * ntfs_find_vcn_nolock - find a vcn in the runlist described by an ntfs inode
 * @ni:			ntfs inode describing the runlist to search
 * @vcn:		vcn to find
- * @need_write:	if false, lock for reading and if true, lock for writing
+ * @write_locked:	true if the runlist is locked for writing
 *
 * Find the virtual cluster number @vcn in the runlist described by the ntfs
 * inode @ni and return the address of the runlist element containing the @vcn.
- * The runlist is left locked and the caller has to unlock it.  If @need_write
- * is true, the runlist is locked for writing and if @need_write is false, the
- * runlist is locked for reading.  In the error case, the runlist is not left
- * locked.
+ * The runlist is left locked and the caller has to unlock it.  In the error
+ * case, the runlist is left in the same locking state as on entry.
+ *
+ * Note if @write_locked is FALSE the lock may be dropped inside the function
+ * so you cannot rely on the runlist still being the same when this function
+ * returns.
 *
 * Note you need to distinguish between the lcn of the returned runlist element
 * being >= 0 and LCN_HOLE.  In the later case you have to return zeroes on
@ -124,28 +140,24 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
 *	      true, it is locked for writing.  Otherwise is is locked for
 *	      reading.
 */
-runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn,
-		const BOOL need_write)
+runlist_element *ntfs_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
+		const BOOL write_locked)
 {
 	runlist_element *rl;
 	int err = 0;
 	BOOL is_retry = FALSE;

-	ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, lock for %sing.",
+	ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
 			ni->mft_no, (unsigned long long)vcn,
-			!need_write ? "read" : "writ");
+			write_locked ? "write" : "read");
 	BUG_ON(!ni);
 	BUG_ON(!NInoNonResident(ni));
 	BUG_ON(vcn < 0);
-lock_retry_remap:
-	if (!need_write)
-		down_read(&ni->runlist.lock);
-	else
-		down_write(&ni->runlist.lock);
+retry_remap:
 	rl = ni->runlist.rl;
 	if (likely(rl && vcn >= rl[0].vcn)) {
 		while (likely(rl->length)) {
-			if (likely(vcn < rl[1].vcn)) {
+			if (unlikely(vcn < rl[1].vcn)) {
 				if (likely(rl->lcn >= LCN_HOLE)) {
 					ntfs_debug("Done.");
 					return rl;
@ -161,19 +173,23 @@ runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn,
 				err = -EIO;
 		}
 	}
-	if (!need_write)
-		up_read(&ni->runlist.lock);
-	else
-		up_write(&ni->runlist.lock);
 	if (!err && !is_retry) {
 		/*
 		 * The @vcn is in an unmapped region, map the runlist and
 		 * retry.
 		 */
-		err = ntfs_map_runlist(ni, vcn);
+		if (!write_locked) {
+			up_read(&ni->runlist.lock);
+			down_write(&ni->runlist.lock);
+		}
+		err = ntfs_map_runlist_nolock(ni, vcn);
+		if (!write_locked) {
+			up_write(&ni->runlist.lock);
+			down_read(&ni->runlist.lock);
+		}
 		if (likely(!err)) {
 			is_retry = TRUE;
-			goto lock_retry_remap;
+			goto retry_remap;
 		}
 		/*
 		 * -EINVAL and -ENOENT coming from a failed mapping attempt are
@ -184,6 +200,7 @@ runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn,
 			err = -EIO;
 	} else if (!err)
 		err = -EIO;
+	if (err != -ENOENT)
 		ntfs_error(ni->vol->sb, "Failed with error code %i.", err);
 	return ERR_PTR(err);
 }
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@ -60,10 +60,11 @@ typedef struct {
 	ATTR_RECORD *base_attr;
 } ntfs_attr_search_ctx;

+extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn);
 extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn);

-extern runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn,
-		const BOOL need_write);
+extern runlist_element *ntfs_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
+		const BOOL write_locked);

 int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
 		const u32 name_len, const IGNORE_CASE_BOOL ic,
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@ -849,7 +849,8 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 	total_freed = real_freed = 0;

 	/* This returns with ni->runlist locked for reading on success. */
-	rl = ntfs_find_vcn(ni, start_vcn, FALSE);
+	down_read(&ni->runlist.lock);
+	rl = ntfs_find_vcn_nolock(ni, start_vcn, FALSE);
 	if (IS_ERR(rl)) {
 		if (!is_rollback)
 			ntfs_error(vol->sb, "Failed to find first runlist "
@ -863,7 +864,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 			ntfs_error(vol->sb, "First runlist element has "
 					"invalid lcn, aborting.");
 		err = -EIO;
-		goto unl_err_out;
+		goto err_out;
 	}
 	/* Find the starting cluster inside the run that needs freeing. */
 	delta = start_vcn - rl->vcn;
@ -881,7 +882,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 			if (!is_rollback)
 				ntfs_error(vol->sb, "Failed to clear first run "
 						"(error %i), aborting.", err);
-			goto unl_err_out;
+			goto err_out;
 		}
 		/* We have freed @to_free real clusters. */
 		real_freed = to_free;
@ -901,30 +902,15 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 		if (unlikely(rl->lcn < LCN_HOLE)) {
 			VCN vcn;

-			/*
-			 * Attempt to map runlist, dropping runlist lock for
-			 * the duration.
-			 */
+			/* Attempt to map runlist. */
 			vcn = rl->vcn;
-			up_read(&ni->runlist.lock);
-			err = ntfs_map_runlist(ni, vcn);
-			if (err) {
-				if (!is_rollback)
-					ntfs_error(vol->sb, "Failed to map "
-							"runlist fragment.");
-				if (err == -EINVAL || err == -ENOENT)
-					err = -EIO;
-				goto err_out;
-			}
-			/*
-			 * This returns with ni->runlist locked for reading on
-			 * success.
-			 */
-			rl = ntfs_find_vcn(ni, vcn, FALSE);
+			rl = ntfs_find_vcn_nolock(ni, vcn, FALSE);
 			if (IS_ERR(rl)) {
 				err = PTR_ERR(rl);
 				if (!is_rollback)
-					ntfs_error(vol->sb, "Failed to find "
+					ntfs_error(vol->sb, "Failed to map "
+							"runlist fragment or "
+							"failed to find "
 							"subsequent runlist "
 							"element.");
 				goto err_out;
@ -937,7 +923,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 							(unsigned long long)
 							rl->lcn);
 				err = -EIO;
-				goto unl_err_out;
+				goto err_out;
 			}
 		}
 		/* The number of clusters in this run that need freeing. */
@ -953,7 +939,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 				if (!is_rollback)
 					ntfs_error(vol->sb, "Failed to clear "
 							"subsequent run.");
-				goto unl_err_out;
+				goto err_out;
 			}
 			/* We have freed @to_free real clusters. */
 			real_freed += to_free;
@ -974,9 +960,8 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 	/* We are done.  Return the number of actually freed clusters. */
 	ntfs_debug("Done.");
 	return real_freed;
-unl_err_out:
-	up_read(&ni->runlist.lock);
 err_out:
+	up_read(&ni->runlist.lock);
 	if (is_rollback)
 		return err;
 	/* If no real clusters were freed, no need to rollback. */
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@ -1,7 +1,7 @@
 /**
 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
 *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
 * Copyright (c) 2002 Richard Russon
 *
 * This program/include file is free software; you can redistribute it and/or
@ -1292,19 +1292,20 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 	/*
 	 * Determine the last lcn of the mft bitmap.  The allocated size of the
 	 * mft bitmap cannot be zero so we are ok to do this.
-	 * ntfs_find_vcn() returns the runlist locked on success.
 	 */
+	down_write(&mftbmp_ni->runlist.lock);
 	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
 	ll = mftbmp_ni->allocated_size;
 	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
-	rl = ntfs_find_vcn(mftbmp_ni, (ll - 1) >> vol->cluster_size_bits, TRUE);
+	rl = ntfs_find_vcn_nolock(mftbmp_ni,
+			(ll - 1) >> vol->cluster_size_bits, TRUE);
 	if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
+		up_write(&mftbmp_ni->runlist.lock);
 		ntfs_error(vol->sb, "Failed to determine last allocated "
 				"cluster of mft bitmap attribute.");
-		if (!IS_ERR(rl)) {
-			up_write(&mftbmp_ni->runlist.lock);
+		if (!IS_ERR(rl))
 			ret = -EIO;
-		} else
+		else
 			ret = PTR_ERR(rl);
 		return ret;
 	}
@ -1428,6 +1429,8 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 		// TODO: Deal with this by moving this extent to a new mft
 		// record or by starting a new extent in a new mft record or by
 		// moving other attributes out of this mft record.
+		// Note: It will need to be a special mft record and if none of
+		// those are available it gets rather complicated...
 		ntfs_error(vol->sb, "Not enough space in this mft record to "
 				"accomodate extended mft bitmap attribute "
 				"extent.  Cannot handle this yet.");
@ -1719,19 +1722,20 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 	 * Determine the preferred allocation location, i.e. the last lcn of
 	 * the mft data attribute.  The allocated size of the mft data
 	 * attribute cannot be zero so we are ok to do this.
-	 * ntfs_find_vcn() returns the runlist locked on success.
 	 */
+	down_write(&mft_ni->runlist.lock);
 	read_lock_irqsave(&mft_ni->size_lock, flags);
 	ll = mft_ni->allocated_size;
 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
-	rl = ntfs_find_vcn(mft_ni, (ll - 1) >> vol->cluster_size_bits, TRUE);
+	rl = ntfs_find_vcn_nolock(mft_ni, (ll - 1) >> vol->cluster_size_bits,
+			TRUE);
 	if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
+		up_write(&mft_ni->runlist.lock);
 		ntfs_error(vol->sb, "Failed to determine last allocated "
 				"cluster of mft data attribute.");
-		if (!IS_ERR(rl)) {
-			up_write(&mft_ni->runlist.lock);
+		if (!IS_ERR(rl))
 			ret = -EIO;
-		} else
+		else
 			ret = PTR_ERR(rl);
 		return ret;
 	}
@ -1858,7 +1862,11 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 		// moving other attributes out of this mft record.
 		// Note: Use the special reserved mft records and ensure that
 		// this extent is not required to find the mft record in
-		// question.
+		// question.  If no free special records left we would need to
+		// move an existing record away, insert ours in its place, and
+		// then place the moved record into the newly allocated space
+		// and we would then need to update all references to this mft
+		// record appropriately.  This is rather complicated...
 		ntfs_error(vol->sb, "Not enough space in this mft record to "
 				"accomodate extended mft data attribute "
 				"extent.  Cannot handle this yet.");