Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (138 commits)
  ocfs2: Access the right buffer_head in ocfs2_merge_rec_left.
  ocfs2: use min_t in ocfs2_quota_read()
  ocfs2: remove unneeded lvb casts
  ocfs2: Add xattr support checking in init_security
  ocfs2: alloc xattr bucket in ocfs2_xattr_set_handle
  ocfs2: calculate and reserve credits for xattr value in mknod
  ocfs2/xattr: fix credits calculation during index create
  ocfs2/xattr: Always updating ctime during xattr set.
  ocfs2/xattr: Remove extend_trans call and add its credits from the beginning
  ocfs2/dlm: Fix race during lockres mastery
  ocfs2/dlm: Fix race in adding/removing lockres' to/from the tracking list
  ocfs2/dlm: Hold off sending lockres drop ref message while lockres is migrating
  ocfs2/dlm: Clean up errors in dlm_proxy_ast_handler()
  ocfs2/dlm: Fix a race between migrate request and exit domain
  ocfs2: One more hamming code optimization.
  ocfs2: Another hamming code optimization.
  ocfs2: Don't hand-code xor in ocfs2_hamming_encode().
  ocfs2: Enable metadata checksums.
  ocfs2: Validate superblock with checksum and ecc.
  ocfs2: Checksum and ECC for directory blocks.
  ...
This commit is contained in:
Linus Torvalds 2009-01-05 18:32:43 -08:00
commit 10cc04f5a0
72 changed files with 9793 additions and 3190 deletions

View file

@ -31,7 +31,6 @@ Features which OCFS2 does not support yet:
- quotas
- Directory change notification (F_NOTIFY)
- Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
- POSIX ACLs
Mount options
=============
@ -79,3 +78,5 @@ inode64 Indicates that Ocfs2 is allowed to create inodes at
bits of significance.
user_xattr (*) Enables Extended User Attributes.
nouser_xattr Disables Extended User Attributes.
acl Enables POSIX Access Control Lists support.
noacl (*) Disables POSIX Access Control Lists support.

View file

@ -189,6 +189,8 @@ config OCFS2_FS
select CONFIGFS_FS
select JBD2
select CRC32
select QUOTA
select QUOTA_TREE
help
OCFS2 is a general purpose extent based shared disk cluster file
system with many similarities to ext3. It supports 64 bit inode
@ -258,15 +260,14 @@ config OCFS2_DEBUG_FS
this option for debugging only as it is likely to decrease
performance of the filesystem.
config OCFS2_COMPAT_JBD
bool "Use JBD for compatibility"
config OCFS2_FS_POSIX_ACL
bool "OCFS2 POSIX Access Control Lists"
depends on OCFS2_FS
select FS_POSIX_ACL
default n
select JBD
help
The ocfs2 filesystem now uses JBD2 for its journalling. JBD2
is backwards compatible with JBD. It is safe to say N here.
However, if you really want to use the original JBD, say Y here.
Posix Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
endif # BLOCK
@ -303,6 +304,10 @@ config PRINT_QUOTA_WARNING
Note that this behavior is currently deprecated and may go away in
future. Please use notification via netlink socket instead.
# Generic support for tree structured quota files. Seleted when needed.
config QUOTA_TREE
tristate
config QFMT_V1
tristate "Old quota format support"
depends on QUOTA
@ -314,6 +319,7 @@ config QFMT_V1
config QFMT_V2
tristate "Quota format v2 support"
depends on QUOTA
select QUOTA_TREE
help
This quota format allows using quotas with 32-bit UIDs/GIDs. If you
need this functionality say Y here.

View file

@ -54,6 +54,7 @@ obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
obj-$(CONFIG_QUOTA) += dquot.o
obj-$(CONFIG_QFMT_V1) += quota_v1.o
obj-$(CONFIG_QFMT_V2) += quota_v2.o
obj-$(CONFIG_QUOTA_TREE) += quota_tree.o
obj-$(CONFIG_QUOTACTL) += quota.o
obj-$(CONFIG_PROC_FS) += proc/

View file

@ -211,8 +211,6 @@ static struct hlist_head *dquot_hash;
struct dqstats dqstats;
static void dqput(struct dquot *dquot);
static inline unsigned int
hashfn(const struct super_block *sb, unsigned int id, int type)
{
@ -415,6 +413,17 @@ int dquot_release(struct dquot *dquot)
return ret;
}
void dquot_destroy(struct dquot *dquot)
{
kmem_cache_free(dquot_cachep, dquot);
}
EXPORT_SYMBOL(dquot_destroy);
static inline void do_destroy_dquot(struct dquot *dquot)
{
dquot->dq_sb->dq_op->destroy_dquot(dquot);
}
/* Invalidate all dquots on the list. Note that this function is called after
* quota is disabled and pointers from inodes removed so there cannot be new
* quota users. There can still be some users of quotas due to inodes being
@ -463,11 +472,46 @@ static void invalidate_dquots(struct super_block *sb, int type)
remove_dquot_hash(dquot);
remove_free_dquot(dquot);
remove_inuse(dquot);
kmem_cache_free(dquot_cachep, dquot);
do_destroy_dquot(dquot);
}
spin_unlock(&dq_list_lock);
}
/* Call callback for every active dquot on given filesystem */
int dquot_scan_active(struct super_block *sb,
int (*fn)(struct dquot *dquot, unsigned long priv),
unsigned long priv)
{
struct dquot *dquot, *old_dquot = NULL;
int ret = 0;
mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
spin_lock(&dq_list_lock);
list_for_each_entry(dquot, &inuse_list, dq_inuse) {
if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
continue;
if (dquot->dq_sb != sb)
continue;
/* Now we have active dquot so we can just increase use count */
atomic_inc(&dquot->dq_count);
dqstats.lookups++;
spin_unlock(&dq_list_lock);
dqput(old_dquot);
old_dquot = dquot;
ret = fn(dquot, priv);
if (ret < 0)
goto out;
spin_lock(&dq_list_lock);
/* We are safe to continue now because our dquot could not
* be moved out of the inuse list while we hold the reference */
}
spin_unlock(&dq_list_lock);
out:
dqput(old_dquot);
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return ret;
}
int vfs_quota_sync(struct super_block *sb, int type)
{
struct list_head *dirty;
@ -479,7 +523,7 @@ int vfs_quota_sync(struct super_block *sb, int type)
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && cnt != type)
continue;
if (!sb_has_quota_enabled(sb, cnt))
if (!sb_has_quota_active(sb, cnt))
continue;
spin_lock(&dq_list_lock);
dirty = &dqopt->info[cnt].dqi_dirty_list;
@ -504,8 +548,8 @@ int vfs_quota_sync(struct super_block *sb, int type)
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt)
&& info_dirty(&dqopt->info[cnt]))
if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
&& info_dirty(&dqopt->info[cnt]))
sb->dq_op->write_info(sb, cnt);
spin_lock(&dq_list_lock);
dqstats.syncs++;
@ -527,7 +571,7 @@ static void prune_dqcache(int count)
remove_dquot_hash(dquot);
remove_free_dquot(dquot);
remove_inuse(dquot);
kmem_cache_free(dquot_cachep, dquot);
do_destroy_dquot(dquot);
count--;
head = free_dquots.prev;
}
@ -558,7 +602,7 @@ static struct shrinker dqcache_shrinker = {
* NOTE: If you change this function please check whether dqput_blocks() works right...
* MUST be called with either dqptr_sem or dqonoff_mutex held
*/
static void dqput(struct dquot *dquot)
void dqput(struct dquot *dquot)
{
int ret;
@ -584,7 +628,7 @@ static void dqput(struct dquot *dquot)
/* We have more than one user... nothing to do */
atomic_dec(&dquot->dq_count);
/* Releasing dquot during quotaoff phase? */
if (!sb_has_quota_enabled(dquot->dq_sb, dquot->dq_type) &&
if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_type) &&
atomic_read(&dquot->dq_count) == 1)
wake_up(&dquot->dq_wait_unused);
spin_unlock(&dq_list_lock);
@ -625,11 +669,17 @@ static void dqput(struct dquot *dquot)
spin_unlock(&dq_list_lock);
}
struct dquot *dquot_alloc(struct super_block *sb, int type)
{
return kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
}
EXPORT_SYMBOL(dquot_alloc);
static struct dquot *get_empty_dquot(struct super_block *sb, int type)
{
struct dquot *dquot;
dquot = kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
dquot = sb->dq_op->alloc_dquot(sb, type);
if(!dquot)
return NODQUOT;
@ -646,16 +696,34 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
return dquot;
}
/*
* Check whether dquot is in memory.
* MUST be called with either dqptr_sem or dqonoff_mutex held
*/
int dquot_is_cached(struct super_block *sb, unsigned int id, int type)
{
unsigned int hashent = hashfn(sb, id, type);
int ret = 0;
if (!sb_has_quota_active(sb, type))
return 0;
spin_lock(&dq_list_lock);
if (find_dquot(hashent, sb, id, type) != NODQUOT)
ret = 1;
spin_unlock(&dq_list_lock);
return ret;
}
/*
* Get reference to dquot
* MUST be called with either dqptr_sem or dqonoff_mutex held
*/
static struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
{
unsigned int hashent = hashfn(sb, id, type);
struct dquot *dquot, *empty = NODQUOT;
if (!sb_has_quota_enabled(sb, type))
if (!sb_has_quota_active(sb, type))
return NODQUOT;
we_slept:
spin_lock(&dq_list_lock);
@ -682,7 +750,7 @@ static struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
dqstats.lookups++;
spin_unlock(&dq_list_lock);
if (empty)
kmem_cache_free(dquot_cachep, empty);
do_destroy_dquot(empty);
}
/* Wait for dq_lock - after this we know that either dquot_release() is already
* finished or it will be canceled due to dq_count > 1 test */
@ -820,7 +888,7 @@ static void drop_dquot_ref(struct super_block *sb, int type)
}
}
static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number)
static inline void dquot_incr_inodes(struct dquot *dquot, qsize_t number)
{
dquot->dq_dqb.dqb_curinodes += number;
}
@ -830,9 +898,10 @@ static inline void dquot_incr_space(struct dquot *dquot, qsize_t number)
dquot->dq_dqb.dqb_curspace += number;
}
static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number)
static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
{
if (dquot->dq_dqb.dqb_curinodes > number)
if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
dquot->dq_dqb.dqb_curinodes >= number)
dquot->dq_dqb.dqb_curinodes -= number;
else
dquot->dq_dqb.dqb_curinodes = 0;
@ -843,11 +912,12 @@ static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number)
static inline void dquot_decr_space(struct dquot *dquot, qsize_t number)
{
if (dquot->dq_dqb.dqb_curspace > number)
if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
dquot->dq_dqb.dqb_curspace >= number)
dquot->dq_dqb.dqb_curspace -= number;
else
dquot->dq_dqb.dqb_curspace = 0;
if (toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
if (dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
dquot->dq_dqb.dqb_btime = (time_t) 0;
clear_bit(DQ_BLKS_B, &dquot->dq_flags);
}
@ -1023,10 +1093,11 @@ static inline char ignore_hardlimit(struct dquot *dquot)
}
/* needs dq_data_lock */
static int check_idq(struct dquot *dquot, ulong inodes, char *warntype)
static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
{
*warntype = QUOTA_NL_NOWARN;
if (inodes <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
test_bit(DQ_FAKE_B, &dquot->dq_flags))
return QUOTA_OK;
if (dquot->dq_dqb.dqb_ihardlimit &&
@ -1058,11 +1129,12 @@ static int check_idq(struct dquot *dquot, ulong inodes, char *warntype)
static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
{
*warntype = QUOTA_NL_NOWARN;
if (space <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
test_bit(DQ_FAKE_B, &dquot->dq_flags))
return QUOTA_OK;
if (dquot->dq_dqb.dqb_bhardlimit &&
toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bhardlimit &&
dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bhardlimit &&
!ignore_hardlimit(dquot)) {
if (!prealloc)
*warntype = QUOTA_NL_BHARDWARN;
@ -1070,7 +1142,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_btime && get_seconds() >= dquot->dq_dqb.dqb_btime &&
!ignore_hardlimit(dquot)) {
if (!prealloc)
@ -1079,7 +1151,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_btime == 0) {
if (!prealloc) {
*warntype = QUOTA_NL_BSOFTWARN;
@ -1096,10 +1168,11 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
return QUOTA_OK;
}
static int info_idq_free(struct dquot *dquot, ulong inodes)
static int info_idq_free(struct dquot *dquot, qsize_t inodes)
{
if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit ||
!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type))
return QUOTA_NL_NOWARN;
if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
@ -1113,15 +1186,13 @@ static int info_idq_free(struct dquot *dquot, ulong inodes)
static int info_bdq_free(struct dquot *dquot, qsize_t space)
{
if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
return QUOTA_NL_NOWARN;
if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
dquot->dq_dqb.dqb_bsoftlimit)
if (dquot->dq_dqb.dqb_curspace - space <= dquot->dq_dqb.dqb_bsoftlimit)
return QUOTA_NL_BSOFTBELOW;
if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
toqb(dquot->dq_dqb.dqb_curspace - space) <
dquot->dq_dqb.dqb_bhardlimit)
if (dquot->dq_dqb.dqb_curspace >= dquot->dq_dqb.dqb_bhardlimit &&
dquot->dq_dqb.dqb_curspace - space < dquot->dq_dqb.dqb_bhardlimit)
return QUOTA_NL_BHARDBELOW;
return QUOTA_NL_NOWARN;
}
@ -1166,17 +1237,23 @@ int dquot_initialize(struct inode *inode, int type)
* Release all quotas referenced by inode
* Transaction must be started at an entry
*/
int dquot_drop(struct inode *inode)
int dquot_drop_locked(struct inode *inode)
{
int cnt;
down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt] != NODQUOT) {
dqput(inode->i_dquot[cnt]);
inode->i_dquot[cnt] = NODQUOT;
}
}
return 0;
}
int dquot_drop(struct inode *inode)
{
down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
dquot_drop_locked(inode);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
return 0;
}
@ -1264,7 +1341,7 @@ int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
/*
* This operation can block, but only after everything is updated
*/
int dquot_alloc_inode(const struct inode *inode, unsigned long number)
int dquot_alloc_inode(const struct inode *inode, qsize_t number)
{
int cnt, ret = NO_QUOTA;
char warntype[MAXQUOTAS];
@ -1349,7 +1426,7 @@ int dquot_free_space(struct inode *inode, qsize_t number)
/*
* This operation can block, but only after everything is updated
*/
int dquot_free_inode(const struct inode *inode, unsigned long number)
int dquot_free_inode(const struct inode *inode, qsize_t number)
{
unsigned int cnt;
char warntype[MAXQUOTAS];
@ -1495,7 +1572,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
/* Wrapper for transferring ownership of an inode */
int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
{
if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
vfs_dq_init(inode);
if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
return 1;
@ -1533,54 +1610,27 @@ struct dquot_operations dquot_operations = {
.acquire_dquot = dquot_acquire,
.release_dquot = dquot_release,
.mark_dirty = dquot_mark_dquot_dirty,
.write_info = dquot_commit_info
.write_info = dquot_commit_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
};
static inline void set_enable_flags(struct quota_info *dqopt, int type)
{
switch (type) {
case USRQUOTA:
dqopt->flags |= DQUOT_USR_ENABLED;
dqopt->flags &= ~DQUOT_USR_SUSPENDED;
break;
case GRPQUOTA:
dqopt->flags |= DQUOT_GRP_ENABLED;
dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
break;
}
}
static inline void reset_enable_flags(struct quota_info *dqopt, int type,
int remount)
{
switch (type) {
case USRQUOTA:
dqopt->flags &= ~DQUOT_USR_ENABLED;
if (remount)
dqopt->flags |= DQUOT_USR_SUSPENDED;
else
dqopt->flags &= ~DQUOT_USR_SUSPENDED;
break;
case GRPQUOTA:
dqopt->flags &= ~DQUOT_GRP_ENABLED;
if (remount)
dqopt->flags |= DQUOT_GRP_SUSPENDED;
else
dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
break;
}
}
/*
* Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
*/
int vfs_quota_off(struct super_block *sb, int type, int remount)
int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
{
int cnt, ret = 0;
struct quota_info *dqopt = sb_dqopt(sb);
struct inode *toputinode[MAXQUOTAS];
/* Cannot turn off usage accounting without turning off limits, or
* suspend quotas and simultaneously turn quotas off. */
if ((flags & DQUOT_USAGE_ENABLED && !(flags & DQUOT_LIMITS_ENABLED))
|| (flags & DQUOT_SUSPENDED && flags & (DQUOT_LIMITS_ENABLED |
DQUOT_USAGE_ENABLED)))
return -EINVAL;
/* We need to serialize quota_off() for device */
mutex_lock(&dqopt->dqonoff_mutex);
@ -1589,7 +1639,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
* sometimes we are called when fill_super() failed and calling
* sync_fs() in such cases does no good.
*/
if (!sb_any_quota_enabled(sb) && !sb_any_quota_suspended(sb)) {
if (!sb_any_quota_loaded(sb)) {
mutex_unlock(&dqopt->dqonoff_mutex);
return 0;
}
@ -1597,17 +1647,28 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
toputinode[cnt] = NULL;
if (type != -1 && cnt != type)
continue;
/* If we keep inodes of quota files after remount and quotaoff
* is called, drop kept inodes. */
if (!remount && sb_has_quota_suspended(sb, cnt)) {
iput(dqopt->files[cnt]);
dqopt->files[cnt] = NULL;
reset_enable_flags(dqopt, cnt, 0);
if (!sb_has_quota_loaded(sb, cnt))
continue;
if (flags & DQUOT_SUSPENDED) {
dqopt->flags |=
dquot_state_flag(DQUOT_SUSPENDED, cnt);
} else {
dqopt->flags &= ~dquot_state_flag(flags, cnt);
/* Turning off suspended quotas? */
if (!sb_has_quota_loaded(sb, cnt) &&
sb_has_quota_suspended(sb, cnt)) {
dqopt->flags &= ~dquot_state_flag(
DQUOT_SUSPENDED, cnt);
iput(dqopt->files[cnt]);
dqopt->files[cnt] = NULL;
continue;
}
}
if (!sb_has_quota_enabled(sb, cnt))
/* We still have to keep quota loaded? */
if (sb_has_quota_loaded(sb, cnt) && !(flags & DQUOT_SUSPENDED))
continue;
reset_enable_flags(dqopt, cnt, remount);
/* Note: these are blocking operations */
drop_dquot_ref(sb, cnt);
@ -1623,7 +1684,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
put_quota_format(dqopt->info[cnt].dqi_format);
toputinode[cnt] = dqopt->files[cnt];
if (!remount)
if (!sb_has_quota_loaded(sb, cnt))
dqopt->files[cnt] = NULL;
dqopt->info[cnt].dqi_flags = 0;
dqopt->info[cnt].dqi_igrace = 0;
@ -1631,6 +1692,11 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
dqopt->ops[cnt] = NULL;
}
mutex_unlock(&dqopt->dqonoff_mutex);
/* Skip syncing and setting flags if quota files are hidden */
if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
goto put_inodes;
/* Sync the superblock so that buffers with quota data are written to
* disk (and so userspace sees correct data afterwards). */
if (sb->s_op->sync_fs)
@ -1646,7 +1712,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
mutex_lock(&dqopt->dqonoff_mutex);
/* If quota was reenabled in the meantime, we have
* nothing to do */
if (!sb_has_quota_enabled(sb, cnt)) {
if (!sb_has_quota_loaded(sb, cnt)) {
mutex_lock_nested(&toputinode[cnt]->i_mutex, I_MUTEX_QUOTA);
toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
S_NOATIME | S_NOQUOTA);
@ -1655,26 +1721,43 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
mark_inode_dirty(toputinode[cnt]);
}
mutex_unlock(&dqopt->dqonoff_mutex);
}
if (sb->s_bdev)
invalidate_bdev(sb->s_bdev);
put_inodes:
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if (toputinode[cnt]) {
/* On remount RO, we keep the inode pointer so that we
* can reenable quota on the subsequent remount RW.
* But we have better not keep inode pointer when there
* is pending delete on the quota file... */
if (!remount)
* can reenable quota on the subsequent remount RW. We
* have to check 'flags' variable and not use sb_has_
* function because another quotaon / quotaoff could
* change global state before we got here. We refuse
* to suspend quotas when there is pending delete on
* the quota file... */
if (!(flags & DQUOT_SUSPENDED))
iput(toputinode[cnt]);
else if (!toputinode[cnt]->i_nlink)
ret = -EBUSY;
}
if (sb->s_bdev)
invalidate_bdev(sb->s_bdev);
return ret;
}
int vfs_quota_off(struct super_block *sb, int type, int remount)
{
return vfs_quota_disable(sb, type, remount ? DQUOT_SUSPENDED :
(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED));
}
/*
* Turn quotas on on a device
*/
/* Helper function when we already have the inode */
static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
/*
* Helper function to turn quotas on when we already have the inode of
* quota file and no quota information is loaded.
*/
static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
unsigned int flags)
{
struct quota_format_type *fmt = find_quota_format(format_id);
struct super_block *sb = inode->i_sb;
@ -1696,27 +1779,37 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
error = -EINVAL;
goto out_fmt;
}
/* Usage always has to be set... */
if (!(flags & DQUOT_USAGE_ENABLED)) {
error = -EINVAL;
goto out_fmt;
}
/* As we bypass the pagecache we must now flush the inode so that
* we see all the changes from userspace... */
write_inode_now(inode, 1);
/* And now flush the block cache so that kernel sees the changes */
invalidate_bdev(sb->s_bdev);
if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
/* As we bypass the pagecache we must now flush the inode so
* that we see all the changes from userspace... */
write_inode_now(inode, 1);
/* And now flush the block cache so that kernel sees the
* changes */
invalidate_bdev(sb->s_bdev);
}
mutex_lock(&inode->i_mutex);
mutex_lock(&dqopt->dqonoff_mutex);
if (sb_has_quota_enabled(sb, type) ||
sb_has_quota_suspended(sb, type)) {
if (sb_has_quota_loaded(sb, type)) {
error = -EBUSY;
goto out_lock;
}
/* We don't want quota and atime on quota files (deadlocks possible)
* Also nobody should write to the file - we use special IO operations
* which ignore the immutable bit. */
down_write(&dqopt->dqptr_sem);
oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
up_write(&dqopt->dqptr_sem);
sb->dq_op->drop(inode);
if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
/* We don't want quota and atime on quota files (deadlocks
* possible) Also nobody should write to the file - we use
* special IO operations which ignore the immutable bit. */
down_write(&dqopt->dqptr_sem);
oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
up_write(&dqopt->dqptr_sem);
sb->dq_op->drop(inode);
}
error = -EIO;
dqopt->files[type] = igrab(inode);
@ -1737,7 +1830,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
}
mutex_unlock(&dqopt->dqio_mutex);
mutex_unlock(&inode->i_mutex);
set_enable_flags(dqopt, type);
dqopt->flags |= dquot_state_flag(flags, type);
add_dquot_ref(sb, type);
mutex_unlock(&dqopt->dqonoff_mutex);
@ -1770,20 +1863,23 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
struct quota_info *dqopt = sb_dqopt(sb);
struct inode *inode;
int ret;
unsigned int flags;
mutex_lock(&dqopt->dqonoff_mutex);
if (!sb_has_quota_suspended(sb, type)) {
mutex_unlock(&dqopt->dqonoff_mutex);
return 0;
}
BUG_ON(sb_has_quota_enabled(sb, type));
inode = dqopt->files[type];
dqopt->files[type] = NULL;
reset_enable_flags(dqopt, type, 0);
flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED |
DQUOT_LIMITS_ENABLED, type);
dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, type);
mutex_unlock(&dqopt->dqonoff_mutex);
ret = vfs_quota_on_inode(inode, type, dqopt->info[type].dqi_fmt_id);
flags = dquot_generic_flag(flags, type);
ret = vfs_load_quota_inode(inode, type, dqopt->info[type].dqi_fmt_id,
flags);
iput(inode);
return ret;
@ -1799,12 +1895,12 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
if (path->mnt->mnt_sb != sb)
error = -EXDEV;
else
error = vfs_quota_on_inode(path->dentry->d_inode, type,
format_id);
error = vfs_load_quota_inode(path->dentry->d_inode, type,
format_id, DQUOT_USAGE_ENABLED |
DQUOT_LIMITS_ENABLED);
return error;
}
/* Actual function called from quotactl() */
int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
int remount)
{
@ -1822,6 +1918,50 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
return error;
}
/*
* More powerful function for turning on quotas allowing setting
* of individual quota flags
*/
int vfs_quota_enable(struct inode *inode, int type, int format_id,
unsigned int flags)
{
int ret = 0;
struct super_block *sb = inode->i_sb;
struct quota_info *dqopt = sb_dqopt(sb);
/* Just unsuspend quotas? */
if (flags & DQUOT_SUSPENDED)
return vfs_quota_on_remount(sb, type);
if (!flags)
return 0;
/* Just updating flags needed? */
if (sb_has_quota_loaded(sb, type)) {
mutex_lock(&dqopt->dqonoff_mutex);
/* Now do a reliable test... */
if (!sb_has_quota_loaded(sb, type)) {
mutex_unlock(&dqopt->dqonoff_mutex);
goto load_quota;
}
if (flags & DQUOT_USAGE_ENABLED &&
sb_has_quota_usage_enabled(sb, type)) {
ret = -EBUSY;
goto out_lock;
}
if (flags & DQUOT_LIMITS_ENABLED &&
sb_has_quota_limits_enabled(sb, type)) {
ret = -EBUSY;
goto out_lock;
}
sb_dqopt(sb)->flags |= dquot_state_flag(flags, type);
out_lock:
mutex_unlock(&dqopt->dqonoff_mutex);
return ret;
}
load_quota:
return vfs_load_quota_inode(inode, type, format_id, flags);
}
/*
* This function is used when filesystem needs to initialize quotas
* during mount time.
@ -1843,7 +1983,8 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
error = security_quota_on(dentry);
if (!error)
error = vfs_quota_on_inode(dentry->d_inode, type, format_id);
error = vfs_load_quota_inode(dentry->d_inode, type, format_id,
DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
out:
dput(dentry);
@ -1866,14 +2007,24 @@ int vfs_dq_quota_on_remount(struct super_block *sb)
return ret;
}
static inline qsize_t qbtos(qsize_t blocks)
{
return blocks << QIF_DQBLKSIZE_BITS;
}
static inline qsize_t stoqb(qsize_t space)
{
return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS;
}
/* Generic routine for getting common part of quota structure */
static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
{
struct mem_dqblk *dm = &dquot->dq_dqb;
spin_lock(&dq_data_lock);
di->dqb_bhardlimit = dm->dqb_bhardlimit;
di->dqb_bsoftlimit = dm->dqb_bsoftlimit;
di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit);
di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit);
di->dqb_curspace = dm->dqb_curspace;
di->dqb_ihardlimit = dm->dqb_ihardlimit;
di->dqb_isoftlimit = dm->dqb_isoftlimit;
@ -1918,28 +2069,36 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
if (di->dqb_valid & QIF_SPACE) {
dm->dqb_curspace = di->dqb_curspace;
check_blim = 1;
__set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
}
if (di->dqb_valid & QIF_BLIMITS) {
dm->dqb_bsoftlimit = di->dqb_bsoftlimit;
dm->dqb_bhardlimit = di->dqb_bhardlimit;
dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit);
dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit);
check_blim = 1;
__set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
}
if (di->dqb_valid & QIF_INODES) {
dm->dqb_curinodes = di->dqb_curinodes;
check_ilim = 1;
__set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
}
if (di->dqb_valid & QIF_ILIMITS) {
dm->dqb_isoftlimit = di->dqb_isoftlimit;
dm->dqb_ihardlimit = di->dqb_ihardlimit;
check_ilim = 1;
__set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
}
if (di->dqb_valid & QIF_BTIME)
if (di->dqb_valid & QIF_BTIME) {
dm->dqb_btime = di->dqb_btime;
if (di->dqb_valid & QIF_ITIME)
__set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
}
if (di->dqb_valid & QIF_ITIME) {
dm->dqb_itime = di->dqb_itime;
__set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
}
if (check_blim) {
if (!dm->dqb_bsoftlimit || toqb(dm->dqb_curspace) < dm->dqb_bsoftlimit) {
if (!dm->dqb_bsoftlimit || dm->dqb_curspace < dm->dqb_bsoftlimit) {
dm->dqb_btime = 0;
clear_bit(DQ_BLKS_B, &dquot->dq_flags);
}
@ -1970,12 +2129,14 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
int rc;
mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
if (!(dquot = dqget(sb, id, type))) {
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return -ESRCH;
dquot = dqget(sb, id, type);
if (!dquot) {
rc = -ESRCH;
goto out;
}
rc = do_set_dqblk(dquot, di);
dqput(dquot);
out:
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return rc;
}
@ -1986,7 +2147,7 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
struct mem_dqinfo *mi;
mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
if (!sb_has_quota_enabled(sb, type)) {
if (!sb_has_quota_active(sb, type)) {
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return -ESRCH;
}
@ -2005,11 +2166,12 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
{
struct mem_dqinfo *mi;
int err = 0;
mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
if (!sb_has_quota_enabled(sb, type)) {
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return -ESRCH;
if (!sb_has_quota_active(sb, type)) {
err = -ESRCH;
goto out;
}
mi = sb_dqopt(sb)->info + type;
spin_lock(&dq_data_lock);
@ -2023,8 +2185,9 @@ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
mark_info_dirty(sb, type);
/* Force write to disk */
sb->dq_op->write_info(sb, type);
out:
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return 0;
return err;
}
struct quotactl_ops vfs_quotactl_ops = {
@ -2186,10 +2349,13 @@ EXPORT_SYMBOL(register_quota_format);
EXPORT_SYMBOL(unregister_quota_format);
EXPORT_SYMBOL(dqstats);
EXPORT_SYMBOL(dq_data_lock);
EXPORT_SYMBOL(vfs_quota_enable);
EXPORT_SYMBOL(vfs_quota_on);
EXPORT_SYMBOL(vfs_quota_on_path);
EXPORT_SYMBOL(vfs_quota_on_mount);
EXPORT_SYMBOL(vfs_quota_disable);
EXPORT_SYMBOL(vfs_quota_off);
EXPORT_SYMBOL(dquot_scan_active);
EXPORT_SYMBOL(vfs_quota_sync);
EXPORT_SYMBOL(vfs_get_dqinfo);
EXPORT_SYMBOL(vfs_set_dqinfo);
@ -2202,7 +2368,11 @@ EXPORT_SYMBOL(dquot_release);
EXPORT_SYMBOL(dquot_mark_dquot_dirty);
EXPORT_SYMBOL(dquot_initialize);
EXPORT_SYMBOL(dquot_drop);
EXPORT_SYMBOL(dquot_drop_locked);
EXPORT_SYMBOL(vfs_dq_drop);
EXPORT_SYMBOL(dqget);
EXPORT_SYMBOL(dqput);
EXPORT_SYMBOL(dquot_is_cached);
EXPORT_SYMBOL(dquot_alloc_space);
EXPORT_SYMBOL(dquot_alloc_inode);
EXPORT_SYMBOL(dquot_free_space);

View file

@ -713,7 +713,9 @@ static struct dquot_operations ext3_quota_operations = {
.acquire_dquot = ext3_acquire_dquot,
.release_dquot = ext3_release_dquot,
.mark_dirty = ext3_mark_dquot_dirty,
.write_info = ext3_write_info
.write_info = ext3_write_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
};
static struct quotactl_ops ext3_qctl_operations = {
@ -1035,8 +1037,7 @@ static int parse_options (char *options, struct super_block *sb,
case Opt_grpjquota:
qtype = GRPQUOTA;
set_qf_name:
if ((sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) &&
if (sb_any_quota_loaded(sb) &&
!sbi->s_qf_names[qtype]) {
printk(KERN_ERR
"EXT3-fs: Cannot change journaled "
@ -1075,8 +1076,7 @@ static int parse_options (char *options, struct super_block *sb,
case Opt_offgrpjquota:
qtype = GRPQUOTA;
clear_qf_name:
if ((sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) &&
if (sb_any_quota_loaded(sb) &&
sbi->s_qf_names[qtype]) {
printk(KERN_ERR "EXT3-fs: Cannot change "
"journaled quota options when "
@ -1095,8 +1095,7 @@ static int parse_options (char *options, struct super_block *sb,
case Opt_jqfmt_vfsv0:
qfmt = QFMT_VFS_V0;
set_qf_format:
if ((sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) &&
if (sb_any_quota_loaded(sb) &&
sbi->s_jquota_fmt != qfmt) {
printk(KERN_ERR "EXT3-fs: Cannot change "
"journaled quota options when "
@ -1115,8 +1114,7 @@ static int parse_options (char *options, struct super_block *sb,
set_opt(sbi->s_mount_opt, GRPQUOTA);
break;
case Opt_noquota:
if (sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) {
if (sb_any_quota_loaded(sb)) {
printk(KERN_ERR "EXT3-fs: Cannot change quota "
"options when quota turned on.\n");
return 0;

View file

@ -803,7 +803,9 @@ static struct dquot_operations ext4_quota_operations = {
.acquire_dquot = ext4_acquire_dquot,
.release_dquot = ext4_release_dquot,
.mark_dirty = ext4_mark_dquot_dirty,
.write_info = ext4_write_info
.write_info = ext4_write_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
};
static struct quotactl_ops ext4_qctl_operations = {
@ -1142,8 +1144,7 @@ static int parse_options(char *options, struct super_block *sb,
case Opt_grpjquota:
qtype = GRPQUOTA;
set_qf_name:
if ((sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) &&
if (sb_any_quota_loaded(sb) &&
!sbi->s_qf_names[qtype]) {
printk(KERN_ERR
"EXT4-fs: Cannot change journaled "
@ -1182,8 +1183,7 @@ static int parse_options(char *options, struct super_block *sb,
case Opt_offgrpjquota:
qtype = GRPQUOTA;
clear_qf_name:
if ((sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) &&
if (sb_any_quota_loaded(sb) &&
sbi->s_qf_names[qtype]) {
printk(KERN_ERR "EXT4-fs: Cannot change "
"journaled quota options when "
@ -1202,8 +1202,7 @@ static int parse_options(char *options, struct super_block *sb,
case Opt_jqfmt_vfsv0:
qfmt = QFMT_VFS_V0;
set_qf_format:
if ((sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) &&
if (sb_any_quota_loaded(sb) &&
sbi->s_jquota_fmt != qfmt) {
printk(KERN_ERR "EXT4-fs: Cannot change "
"journaled quota options when "
@ -1222,7 +1221,7 @@ static int parse_options(char *options, struct super_block *sb,
set_opt(sbi->s_mount_opt, GRPQUOTA);
break;
case Opt_noquota:
if (sb_any_quota_enabled(sb)) {
if (sb_any_quota_loaded(sb)) {
printk(KERN_ERR "EXT4-fs: Cannot change quota "
"options when quota turned on.\n");
return 0;

View file

@ -509,6 +509,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (is_journal_aborted(journal)) {
clear_buffer_jbddirty(jh2bh(jh));
JBUFFER_TRACE(jh, "journal is aborting: refile");
jbd2_buffer_abort_trigger(jh,
jh->b_frozen_data ?
jh->b_frozen_triggers :
jh->b_triggers);
jbd2_journal_refile_buffer(journal, jh);
/* If that was the last one, we need to clean up
* any descriptor buffers which may have been
@ -844,6 +848,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* data.
*
* Otherwise, we can just throw away the frozen data now.
*
* We also know that the frozen data has already fired
* its triggers if they exist, so we can clear that too.
*/
if (jh->b_committed_data) {
jbd2_free(jh->b_committed_data, bh->b_size);
@ -851,10 +858,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (jh->b_frozen_data) {
jh->b_committed_data = jh->b_frozen_data;
jh->b_frozen_data = NULL;
jh->b_frozen_triggers = NULL;
}
} else if (jh->b_frozen_data) {
jbd2_free(jh->b_frozen_data, bh->b_size);
jh->b_frozen_data = NULL;
jh->b_frozen_triggers = NULL;
}
spin_lock(&journal->j_list_lock);

View file

@ -50,6 +50,7 @@ EXPORT_SYMBOL(jbd2_journal_unlock_updates);
EXPORT_SYMBOL(jbd2_journal_get_write_access);
EXPORT_SYMBOL(jbd2_journal_get_create_access);
EXPORT_SYMBOL(jbd2_journal_get_undo_access);
EXPORT_SYMBOL(jbd2_journal_set_triggers);
EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
EXPORT_SYMBOL(jbd2_journal_release_buffer);
EXPORT_SYMBOL(jbd2_journal_forget);
@ -290,6 +291,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct page *new_page;
unsigned int new_offset;
struct buffer_head *bh_in = jh2bh(jh_in);
struct jbd2_buffer_trigger_type *triggers;
/*
* The buffer really shouldn't be locked: only the current committing
@ -314,12 +316,22 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
done_copy_out = 1;
new_page = virt_to_page(jh_in->b_frozen_data);
new_offset = offset_in_page(jh_in->b_frozen_data);
triggers = jh_in->b_frozen_triggers;
} else {
new_page = jh2bh(jh_in)->b_page;
new_offset = offset_in_page(jh2bh(jh_in)->b_data);
triggers = jh_in->b_triggers;
}
mapped_data = kmap_atomic(new_page, KM_USER0);
/*
* Fire any commit trigger. Do this before checking for escaping,
* as the trigger may modify the magic offset. If a copy-out
* happens afterwards, it will have the correct data in the buffer.
*/
jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
triggers);
/*
* Check for escaping
*/
@ -352,6 +364,13 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
new_page = virt_to_page(tmp);
new_offset = offset_in_page(tmp);
done_copy_out = 1;
/*
* This isn't strictly necessary, as we're using frozen
* data for the escaping, but it keeps consistency with
* b_frozen_data usage.
*/
jh_in->b_frozen_triggers = jh_in->b_triggers;
}
/*

View file

@ -741,6 +741,12 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
source = kmap_atomic(page, KM_USER0);
memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
kunmap_atomic(source, KM_USER0);
/*
* Now that the frozen data is saved off, we need to store
* any matching triggers.
*/
jh->b_frozen_triggers = jh->b_triggers;
}
jbd_unlock_bh_state(bh);
@ -943,6 +949,47 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
return err;
}
/**
* void jbd2_journal_set_triggers() - Add triggers for commit writeout
* @bh: buffer to trigger on
* @type: struct jbd2_buffer_trigger_type containing the trigger(s).
*
* Set any triggers on this journal_head. This is always safe, because
* triggers for a committing buffer will be saved off, and triggers for
* a running transaction will match the buffer in that transaction.
*
* Call with NULL to clear the triggers.
*/
void jbd2_journal_set_triggers(struct buffer_head *bh,
struct jbd2_buffer_trigger_type *type)
{
struct journal_head *jh = bh2jh(bh);
jh->b_triggers = type;
}
void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
struct jbd2_buffer_trigger_type *triggers)
{
struct buffer_head *bh = jh2bh(jh);
if (!triggers || !triggers->t_commit)
return;
triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
}
void jbd2_buffer_abort_trigger(struct journal_head *jh,
struct jbd2_buffer_trigger_type *triggers)
{
if (!triggers || !triggers->t_abort)
return;
triggers->t_abort(triggers, jh2bh(jh));
}
/**
* int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
* @handle: transaction to add buffer to.

View file

@ -12,6 +12,7 @@ obj-$(CONFIG_OCFS2_FS_USERSPACE_CLUSTER) += ocfs2_stack_user.o
ocfs2-objs := \
alloc.o \
aops.o \
blockcheck.o \
buffer_head_io.o \
dcache.o \
dir.o \
@ -35,8 +36,14 @@ ocfs2-objs := \
sysfile.o \
uptodate.o \
ver.o \
quota_local.o \
quota_global.o \
xattr.o
ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y)
ocfs2-objs += acl.o
endif
ocfs2_stackglue-objs := stackglue.o
ocfs2_stack_o2cb-objs := stack_o2cb.o
ocfs2_stack_user-objs := stack_user.o

479
fs/ocfs2/acl.c Normal file
View file

@ -0,0 +1,479 @@
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* acl.c
*
* Copyright (C) 2004, 2008 Oracle. All rights reserved.
*
* CREDITS:
* Lots of code in this file is copy from linux/fs/ext3/acl.c.
* Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
#define MLOG_MASK_PREFIX ML_INODE
#include <cluster/masklog.h>
#include "ocfs2.h"
#include "alloc.h"
#include "dlmglue.h"
#include "file.h"
#include "ocfs2_fs.h"
#include "xattr.h"
#include "acl.h"
/*
* Convert from xattr value to acl struct.
*/
static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size)
{
int n, count;
struct posix_acl *acl;
if (!value)
return NULL;
if (size < sizeof(struct posix_acl_entry))
return ERR_PTR(-EINVAL);
count = size / sizeof(struct posix_acl_entry);
if (count < 0)
return ERR_PTR(-EINVAL);
if (count == 0)
return NULL;
acl = posix_acl_alloc(count, GFP_NOFS);
if (!acl)
return ERR_PTR(-ENOMEM);
for (n = 0; n < count; n++) {
struct ocfs2_acl_entry *entry =
(struct ocfs2_acl_entry *)value;
acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
acl->a_entries[n].e_id = le32_to_cpu(entry->e_id);
value += sizeof(struct posix_acl_entry);
}
return acl;
}
/*
* Convert acl struct to xattr value.
*/
static void *ocfs2_acl_to_xattr(const struct posix_acl *acl, size_t *size)
{
struct ocfs2_acl_entry *entry = NULL;
char *ocfs2_acl;
size_t n;
*size = acl->a_count * sizeof(struct posix_acl_entry);
ocfs2_acl = kmalloc(*size, GFP_NOFS);
if (!ocfs2_acl)
return ERR_PTR(-ENOMEM);
entry = (struct ocfs2_acl_entry *)ocfs2_acl;
for (n = 0; n < acl->a_count; n++, entry++) {
entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
}
return ocfs2_acl;
}
static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode,
int type,
struct buffer_head *di_bh)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
int name_index;
char *value = NULL;
struct posix_acl *acl;
int retval;
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return NULL;
switch (type) {
case ACL_TYPE_ACCESS:
name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
break;
default:
return ERR_PTR(-EINVAL);
}
retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index, "", NULL, 0);
if (retval > 0) {
value = kmalloc(retval, GFP_NOFS);
if (!value)
return ERR_PTR(-ENOMEM);
retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
"", value, retval);
}
if (retval > 0)
acl = ocfs2_acl_from_xattr(value, retval);
else if (retval == -ENODATA || retval == 0)
acl = NULL;
else
acl = ERR_PTR(retval);
kfree(value);
return acl;
}
/*
* Get posix acl.
*/
static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct buffer_head *di_bh = NULL;
struct posix_acl *acl;
int ret;
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return NULL;
ret = ocfs2_inode_lock(inode, &di_bh, 0);
if (ret < 0) {
mlog_errno(ret);
acl = ERR_PTR(ret);
return acl;
}
acl = ocfs2_get_acl_nolock(inode, type, di_bh);
ocfs2_inode_unlock(inode, 0);
brelse(di_bh);
return acl;
}
/*
* Set the access or default ACL of an inode.
*/
static int ocfs2_set_acl(handle_t *handle,
struct inode *inode,
struct buffer_head *di_bh,
int type,
struct posix_acl *acl,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_alloc_context *data_ac)
{
int name_index;
void *value = NULL;
size_t size = 0;
int ret;
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
switch (type) {
case ACL_TYPE_ACCESS:
name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) {
mode_t mode = inode->i_mode;
ret = posix_acl_equiv_mode(acl, &mode);
if (ret < 0)
return ret;
else {
inode->i_mode = mode;
if (ret == 0)
acl = NULL;
}
}
break;
case ACL_TYPE_DEFAULT:
name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
if (!S_ISDIR(inode->i_mode))
return acl ? -EACCES : 0;
break;
default:
return -EINVAL;
}
if (acl) {
value = ocfs2_acl_to_xattr(acl, &size);
if (IS_ERR(value))
return (int)PTR_ERR(value);
}
if (handle)
ret = ocfs2_xattr_set_handle(handle, inode, di_bh, name_index,
"", value, size, 0,
meta_ac, data_ac);
else
ret = ocfs2_xattr_set(inode, name_index, "", value, size, 0);
kfree(value);
return ret;
}
int ocfs2_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
int ret = posix_acl_permission(inode, acl, mask);
posix_acl_release(acl);
return ret;
}
return -EAGAIN;
}
int ocfs2_acl_chmod(struct inode *inode)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct posix_acl *acl, *clone;
int ret;
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return 0;
acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl) || !acl)
return PTR_ERR(acl);
clone = posix_acl_clone(acl, GFP_KERNEL);
posix_acl_release(acl);
if (!clone)
return -ENOMEM;
ret = posix_acl_chmod_masq(clone, inode->i_mode);
if (!ret)
ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS,
clone, NULL, NULL);
posix_acl_release(clone);
return ret;
}
/*
* Initialize the ACLs of a new inode. If parent directory has default ACL,
* then clone to new inode. Called from ocfs2_mknod.
*/
int ocfs2_init_acl(handle_t *handle,
struct inode *inode,
struct inode *dir,
struct buffer_head *di_bh,
struct buffer_head *dir_bh,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_alloc_context *data_ac)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct posix_acl *acl = NULL;
int ret = 0;
if (!S_ISLNK(inode->i_mode)) {
if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
dir_bh);
if (IS_ERR(acl))
return PTR_ERR(acl);
}
if (!acl)
inode->i_mode &= ~current->fs->umask;
}
if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
struct posix_acl *clone;
mode_t mode;
if (S_ISDIR(inode->i_mode)) {
ret = ocfs2_set_acl(handle, inode, di_bh,
ACL_TYPE_DEFAULT, acl,
meta_ac, data_ac);
if (ret)
goto cleanup;
}
clone = posix_acl_clone(acl, GFP_NOFS);
ret = -ENOMEM;
if (!clone)
goto cleanup;
mode = inode->i_mode;
ret = posix_acl_create_masq(clone, &mode);
if (ret >= 0) {
inode->i_mode = mode;
if (ret > 0) {
ret = ocfs2_set_acl(handle, inode,
di_bh, ACL_TYPE_ACCESS,
clone, meta_ac, data_ac);
}
}
posix_acl_release(clone);
}
cleanup:
posix_acl_release(acl);
return ret;
}
static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
char *list,
size_t list_len,
const char *name,
size_t name_len)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return 0;
if (list && size <= list_len)
memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
return size;
}
static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
char *list,
size_t list_len,
const char *name,
size_t name_len)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return 0;
if (list && size <= list_len)
memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
return size;
}
static int ocfs2_xattr_get_acl(struct inode *inode,
int type,
void *buffer,
size_t size)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct posix_acl *acl;
int ret;
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return -EOPNOTSUPP;
acl = ocfs2_get_acl(inode, type);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl == NULL)
return -ENODATA;
ret = posix_acl_to_xattr(acl, buffer, size);
posix_acl_release(acl);
return ret;
}
static int ocfs2_xattr_get_acl_access(struct inode *inode,
const char *name,
void *buffer,
size_t size)
{
if (strcmp(name, "") != 0)
return -EINVAL;
return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
}
static int ocfs2_xattr_get_acl_default(struct inode *inode,
const char *name,
void *buffer,
size_t size)
{
if (strcmp(name, "") != 0)
return -EINVAL;
return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
}
static int ocfs2_xattr_set_acl(struct inode *inode,
int type,
const void *value,
size_t size)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct posix_acl *acl;
int ret = 0;
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return -EOPNOTSUPP;
if (!is_owner_or_cap(inode))
return -EPERM;
if (value) {
acl = posix_acl_from_xattr(value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
else if (acl) {
ret = posix_acl_valid(acl);
if (ret)
goto cleanup;
}
} else
acl = NULL;
ret = ocfs2_set_acl(NULL, inode, NULL, type, acl, NULL, NULL);
cleanup:
posix_acl_release(acl);
return ret;
}
static int ocfs2_xattr_set_acl_access(struct inode *inode,
const char *name,
const void *value,
size_t size,
int flags)
{
if (strcmp(name, "") != 0)
return -EINVAL;
return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
}
static int ocfs2_xattr_set_acl_default(struct inode *inode,
const char *name,
const void *value,
size_t size,
int flags)
{
if (strcmp(name, "") != 0)
return -EINVAL;
return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
}
struct xattr_handler ocfs2_xattr_acl_access_handler = {
.prefix = POSIX_ACL_XATTR_ACCESS,
.list = ocfs2_xattr_list_acl_access,
.get = ocfs2_xattr_get_acl_access,
.set = ocfs2_xattr_set_acl_access,
};
struct xattr_handler ocfs2_xattr_acl_default_handler = {
.prefix = POSIX_ACL_XATTR_DEFAULT,
.list = ocfs2_xattr_list_acl_default,
.get = ocfs2_xattr_get_acl_default,
.set = ocfs2_xattr_set_acl_default,
};

58
fs/ocfs2/acl.h Normal file
View file

@ -0,0 +1,58 @@
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* acl.h
*
* Copyright (C) 2004, 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef OCFS2_ACL_H
#define OCFS2_ACL_H
#include <linux/posix_acl_xattr.h>
struct ocfs2_acl_entry {
__le16 e_tag;
__le16 e_perm;
__le32 e_id;
};
#ifdef CONFIG_OCFS2_FS_POSIX_ACL
extern int ocfs2_check_acl(struct inode *, int);
extern int ocfs2_acl_chmod(struct inode *);
extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
struct buffer_head *, struct buffer_head *,
struct ocfs2_alloc_context *,
struct ocfs2_alloc_context *);
#else /* CONFIG_OCFS2_FS_POSIX_ACL*/
#define ocfs2_check_acl NULL
static inline int ocfs2_acl_chmod(struct inode *inode)
{
return 0;
}
static inline int ocfs2_init_acl(handle_t *handle,
struct inode *inode,
struct inode *dir,
struct buffer_head *di_bh,
struct buffer_head *dir_bh,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_alloc_context *data_ac)
{
return 0;
}
#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
#endif /* OCFS2_ACL_H */

File diff suppressed because it is too large Load diff

View file

@ -45,7 +45,9 @@
*
* ocfs2_extent_tree contains info for the root of the b-tree, it must have a
* root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
* functions.
* functions. With metadata ecc, we now call different journal_access
* functions for each type of metadata, so it must have the
* root_journal_access function.
* ocfs2_extent_tree_operations abstract the normal operations we do for
* the root of extent b-tree.
*/
@ -54,6 +56,7 @@ struct ocfs2_extent_tree {
struct ocfs2_extent_tree_operations *et_ops;
struct buffer_head *et_root_bh;
struct ocfs2_extent_list *et_root_el;
ocfs2_journal_access_func et_root_journal_access;
void *et_object;
unsigned int et_max_leaf_clusters;
};
@ -68,10 +71,18 @@ void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh);
struct ocfs2_xattr_value_buf;
void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh,
struct ocfs2_xattr_value_root *xv);
struct ocfs2_xattr_value_buf *vb);
/*
* Read an extent block into *bh. If *bh is NULL, a bh will be
* allocated. This is a cached read. The extent block will be validated
* with ocfs2_validate_extent_block().
*/
int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
struct buffer_head **bh);
struct ocfs2_alloc_context;
int ocfs2_insert_extent(struct ocfs2_super *osb,
@ -110,6 +121,11 @@ int ocfs2_remove_extent(struct inode *inode,
u32 cpos, u32 len, handle_t *handle,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_remove_btree_range(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 cpos, u32 phys_cpos, u32 len,
struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_num_free_extents(struct ocfs2_super *osb,
struct inode *inode,
struct ocfs2_extent_tree *et);
@ -167,10 +183,18 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
*/
struct ocfs2_cached_dealloc_ctxt {
struct ocfs2_per_slot_free_list *c_first_suballocator;
struct ocfs2_cached_block_free *c_global_allocator;
};
static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
{
c->c_first_suballocator = NULL;
c->c_global_allocator = NULL;
}
int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
u64 blkno, unsigned int bit);
static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
{
return c->c_global_allocator != NULL;
}
int ocfs2_run_deallocs(struct ocfs2_super *osb,
struct ocfs2_cached_dealloc_ctxt *ctxt);

View file

@ -27,6 +27,7 @@
#include <linux/swap.h>
#include <linux/pipe_fs_i.h>
#include <linux/mpage.h>
#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_FILE_IO
#include <cluster/masklog.h>
@ -68,20 +69,13 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
goto bail;
}
status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
status = ocfs2_read_inode_block(inode, &bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
fe = (struct ocfs2_dinode *) bh->b_data;
if (!OCFS2_IS_VALID_DINODE(fe)) {
mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
(unsigned long long)le64_to_cpu(fe->i_blkno), 7,
fe->i_signature);
goto bail;
}
if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
le32_to_cpu(fe->i_clusters))) {
mlog(ML_ERROR, "block offset is outside the allocated size: "
@ -262,7 +256,7 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
BUG_ON(!PageLocked(page));
BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
ret = ocfs2_read_inode_block(inode, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@ -481,12 +475,6 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
if (ocfs2_should_order_data(inode)) {
ret = ocfs2_jbd2_file_inode(handle, inode);
#ifdef CONFIG_OCFS2_COMPAT_JBD
ret = walk_page_buffers(handle,
page_buffers(page),
from, to, NULL,
ocfs2_journal_dirty_data);
#endif
if (ret < 0)
mlog_errno(ret);
}
@ -1072,15 +1060,8 @@ static void ocfs2_write_failure(struct inode *inode,
tmppage = wc->w_pages[i];
if (page_has_buffers(tmppage)) {
if (ocfs2_should_order_data(inode)) {
if (ocfs2_should_order_data(inode))
ocfs2_jbd2_file_inode(wc->w_handle, inode);
#ifdef CONFIG_OCFS2_COMPAT_JBD
walk_page_buffers(wc->w_handle,
page_buffers(tmppage),
from, to, NULL,
ocfs2_journal_dirty_data);
#endif
}
block_commit_write(tmppage, from, to);
}
@ -1531,8 +1512,8 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
goto out;
}
ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
ocfs2_commit_trans(osb, handle);
@ -1750,15 +1731,20 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
wc->w_handle = handle;
if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode,
ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) {
ret = -EDQUOT;
goto out_commit;
}
/*
* We don't want this to fail in ocfs2_write_end(), so do it
* here.
*/
ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out_commit;
goto out_quota;
}
/*
@ -1771,14 +1757,14 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
mmap_page);
if (ret) {
mlog_errno(ret);
goto out_commit;
goto out_quota;
}
ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
len);
if (ret) {
mlog_errno(ret);
goto out_commit;
goto out_quota;
}
if (data_ac)
@ -1790,6 +1776,10 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
*pagep = wc->w_target_page;
*fsdata = wc;
return 0;
out_quota:
if (clusters_to_alloc)
vfs_dq_free_space(inode,
ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
out_commit:
ocfs2_commit_trans(osb, handle);
@ -1919,15 +1909,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
if (page_has_buffers(tmppage)) {
if (ocfs2_should_order_data(inode)) {
if (ocfs2_should_order_data(inode))
ocfs2_jbd2_file_inode(wc->w_handle, inode);
#ifdef CONFIG_OCFS2_COMPAT_JBD
walk_page_buffers(wc->w_handle,
page_buffers(tmppage),
from, to, NULL,
ocfs2_journal_dirty_data);
#endif
}
block_commit_write(tmppage, from, to);
}
}

477
fs/ocfs2/blockcheck.c Normal file
View file

@ -0,0 +1,477 @@
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* blockcheck.c
*
* Checksum and ECC codes for the OCFS2 userspace library.
*
* Copyright (C) 2006, 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License, version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/crc32.h>
#include <linux/buffer_head.h>
#include <linux/bitops.h>
#include <asm/byteorder.h>
#include <cluster/masklog.h>
#include "ocfs2.h"
#include "blockcheck.h"
/*
* We use the following conventions:
*
* d = # data bits
* p = # parity bits
* c = # total code bits (d + p)
*/
/*
* Calculate the bit offset in the hamming code buffer based on the bit's
* offset in the data buffer. Since the hamming code reserves all
* power-of-two bits for parity, the data bit number and the code bit
* number are offest by all the parity bits beforehand.
*
* Recall that bit numbers in hamming code are 1-based. This function
* takes the 0-based data bit from the caller.
*
* An example. Take bit 1 of the data buffer. 1 is a power of two (2^0),
* so it's a parity bit. 2 is a power of two (2^1), so it's a parity bit.
* 3 is not a power of two. So bit 1 of the data buffer ends up as bit 3
* in the code buffer.
*
* The caller can pass in *p if it wants to keep track of the most recent
* number of parity bits added. This allows the function to start the
* calculation at the last place.
*/
static unsigned int calc_code_bit(unsigned int i, unsigned int *p_cache)
{
unsigned int b, p = 0;
/*
* Data bits are 0-based, but we're talking code bits, which
* are 1-based.
*/
b = i + 1;
/* Use the cache if it is there */
if (p_cache)
p = *p_cache;
b += p;
/*
* For every power of two below our bit number, bump our bit.
*
* We compare with (b + 1) because we have to compare with what b
* would be _if_ it were bumped up by the parity bit. Capice?
*
* p is set above.
*/
for (; (1 << p) < (b + 1); p++)
b++;
if (p_cache)
*p_cache = p;
return b;
}
/*
* This is the low level encoder function. It can be called across
* multiple hunks just like the crc32 code. 'd' is the number of bits
* _in_this_hunk_. nr is the bit offset of this hunk. So, if you had
* two 512B buffers, you would do it like so:
*
* parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
* parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
*
* If you just have one buffer, use ocfs2_hamming_encode_block().
*/
u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr)
{
unsigned int i, b, p = 0;
BUG_ON(!d);
/*
* b is the hamming code bit number. Hamming code specifies a
* 1-based array, but C uses 0-based. So 'i' is for C, and 'b' is
* for the algorithm.
*
* The i++ in the for loop is so that the start offset passed
* to ocfs2_find_next_bit_set() is one greater than the previously
* found bit.
*/
for (i = 0; (i = ocfs2_find_next_bit(data, d, i)) < d; i++)
{
/*
* i is the offset in this hunk, nr + i is the total bit
* offset.
*/
b = calc_code_bit(nr + i, &p);
/*
* Data bits in the resultant code are checked by
* parity bits that are part of the bit number
* representation. Huh?
*
* <wikipedia href="http://en.wikipedia.org/wiki/Hamming_code">
* In other words, the parity bit at position 2^k
* checks bits in positions having bit k set in
* their binary representation. Conversely, for
* instance, bit 13, i.e. 1101(2), is checked by
* bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1.
* </wikipedia>
*
* Note that 'k' is the _code_ bit number. 'b' in
* our loop.
*/
parity ^= b;
}
/* While the data buffer was treated as little endian, the
* return value is in host endian. */
return parity;
}
u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize)
{
return ocfs2_hamming_encode(0, data, blocksize * 8, 0);
}
/*
* Like ocfs2_hamming_encode(), this can handle hunks. nr is the bit
* offset of the current hunk. If bit to be fixed is not part of the
* current hunk, this does nothing.
*
* If you only have one hunk, use ocfs2_hamming_fix_block().
*/
void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
unsigned int fix)
{
unsigned int i, b;
BUG_ON(!d);
/*
* If the bit to fix has an hweight of 1, it's a parity bit. One
* busted parity bit is its own error. Nothing to do here.
*/
if (hweight32(fix) == 1)
return;
/*
* nr + d is the bit right past the data hunk we're looking at.
* If fix after that, nothing to do
*/
if (fix >= calc_code_bit(nr + d, NULL))
return;
/*
* nr is the offset in the data hunk we're starting at. Let's
* start b at the offset in the code buffer. See hamming_encode()
* for a more detailed description of 'b'.
*/
b = calc_code_bit(nr, NULL);
/* If the fix is before this hunk, nothing to do */
if (fix < b)
return;
for (i = 0; i < d; i++, b++)
{
/* Skip past parity bits */
while (hweight32(b) == 1)
b++;
/*
* i is the offset in this data hunk.
* nr + i is the offset in the total data buffer.
* b is the offset in the total code buffer.
*
* Thus, when b == fix, bit i in the current hunk needs
* fixing.
*/
if (b == fix)
{
if (ocfs2_test_bit(i, data))
ocfs2_clear_bit(i, data);
else
ocfs2_set_bit(i, data);
break;
}
}
}
void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
unsigned int fix)
{
ocfs2_hamming_fix(data, blocksize * 8, 0, fix);
}
/*
* This function generates check information for a block.
* data is the block to be checked. bc is a pointer to the
* ocfs2_block_check structure describing the crc32 and the ecc.
*
* bc should be a pointer inside data, as the function will
* take care of zeroing it before calculating the check information. If
* bc does not point inside data, the caller must make sure any inline
* ocfs2_block_check structures are zeroed.
*
* The data buffer must be in on-disk endian (little endian for ocfs2).
* bc will be filled with little-endian values and will be ready to go to
* disk.
*/
void ocfs2_block_check_compute(void *data, size_t blocksize,
struct ocfs2_block_check *bc)
{
u32 crc;
u32 ecc;
memset(bc, 0, sizeof(struct ocfs2_block_check));
crc = crc32_le(~0, data, blocksize);
ecc = ocfs2_hamming_encode_block(data, blocksize);
/*
* No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
* larger than 16 bits.
*/
BUG_ON(ecc > USHORT_MAX);
bc->bc_crc32e = cpu_to_le32(crc);
bc->bc_ecc = cpu_to_le16((u16)ecc);
}
/*
* This function validates existing check information. Like _compute,
* the function will take care of zeroing bc before calculating check codes.
* If bc is not a pointer inside data, the caller must have zeroed any
* inline ocfs2_block_check structures.
*
* Again, the data passed in should be the on-disk endian.
*/
int ocfs2_block_check_validate(void *data, size_t blocksize,
struct ocfs2_block_check *bc)
{
int rc = 0;
struct ocfs2_block_check check;
u32 crc, ecc;
check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
check.bc_ecc = le16_to_cpu(bc->bc_ecc);
memset(bc, 0, sizeof(struct ocfs2_block_check));
/* Fast path - if the crc32 validates, we're good to go */
crc = crc32_le(~0, data, blocksize);
if (crc == check.bc_crc32e)
goto out;
mlog(ML_ERROR,
"CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
/* Ok, try ECC fixups */
ecc = ocfs2_hamming_encode_block(data, blocksize);
ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc);
/* And check the crc32 again */
crc = crc32_le(~0, data, blocksize);
if (crc == check.bc_crc32e)
goto out;
mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
rc = -EIO;
out:
bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
bc->bc_ecc = cpu_to_le16(check.bc_ecc);
return rc;
}
/*
* This function generates check information for a list of buffer_heads.
* bhs is the blocks to be checked. bc is a pointer to the
* ocfs2_block_check structure describing the crc32 and the ecc.
*
* bc should be a pointer inside data, as the function will
* take care of zeroing it before calculating the check information. If
* bc does not point inside data, the caller must make sure any inline
* ocfs2_block_check structures are zeroed.
*
* The data buffer must be in on-disk endian (little endian for ocfs2).
* bc will be filled with little-endian values and will be ready to go to
* disk.
*/
void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc)
{
int i;
u32 crc, ecc;
BUG_ON(nr < 0);
if (!nr)
return;
memset(bc, 0, sizeof(struct ocfs2_block_check));
for (i = 0, crc = ~0, ecc = 0; i < nr; i++) {
crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
/*
* The number of bits in a buffer is obviously b_size*8.
* The offset of this buffer is b_size*i, so the bit offset
* of this buffer is b_size*8*i.
*/
ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
bhs[i]->b_size * 8,
bhs[i]->b_size * 8 * i);
}
/*
* No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
* larger than 16 bits.
*/
BUG_ON(ecc > USHORT_MAX);
bc->bc_crc32e = cpu_to_le32(crc);
bc->bc_ecc = cpu_to_le16((u16)ecc);
}
/*
* This function validates existing check information on a list of
* buffer_heads. Like _compute_bhs, the function will take care of
* zeroing bc before calculating check codes. If bc is not a pointer
* inside data, the caller must have zeroed any inline
* ocfs2_block_check structures.
*
* Again, the data passed in should be the on-disk endian.
*/
int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc)
{
int i, rc = 0;
struct ocfs2_block_check check;
u32 crc, ecc, fix;
BUG_ON(nr < 0);
if (!nr)
return 0;
check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
check.bc_ecc = le16_to_cpu(bc->bc_ecc);
memset(bc, 0, sizeof(struct ocfs2_block_check));
/* Fast path - if the crc32 validates, we're good to go */
for (i = 0, crc = ~0; i < nr; i++)
crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
if (crc == check.bc_crc32e)
goto out;
mlog(ML_ERROR,
"CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
/* Ok, try ECC fixups */
for (i = 0, ecc = 0; i < nr; i++) {
/*
* The number of bits in a buffer is obviously b_size*8.
* The offset of this buffer is b_size*i, so the bit offset
* of this buffer is b_size*8*i.
*/
ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
bhs[i]->b_size * 8,
bhs[i]->b_size * 8 * i);
}
fix = ecc ^ check.bc_ecc;
for (i = 0; i < nr; i++) {
/*
* Try the fix against each buffer. It will only affect
* one of them.
*/
ocfs2_hamming_fix(bhs[i]->b_data, bhs[i]->b_size * 8,
bhs[i]->b_size * 8 * i, fix);
}
/* And check the crc32 again */
for (i = 0, crc = ~0; i < nr; i++)
crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
if (crc == check.bc_crc32e)
goto out;
mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
rc = -EIO;
out:
bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
bc->bc_ecc = cpu_to_le16(check.bc_ecc);
return rc;
}
/*
* These are the main API. They check the superblock flag before
* calling the underlying operations.
*
* They expect the buffer(s) to be in disk format.
*/
void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
struct ocfs2_block_check *bc)
{
if (ocfs2_meta_ecc(OCFS2_SB(sb)))
ocfs2_block_check_compute(data, sb->s_blocksize, bc);
}
int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
struct ocfs2_block_check *bc)
{
int rc = 0;
if (ocfs2_meta_ecc(OCFS2_SB(sb)))
rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc);
return rc;
}
void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc)
{
if (ocfs2_meta_ecc(OCFS2_SB(sb)))
ocfs2_block_check_compute_bhs(bhs, nr, bc);
}
int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc)
{
int rc = 0;
if (ocfs2_meta_ecc(OCFS2_SB(sb)))
rc = ocfs2_block_check_validate_bhs(bhs, nr, bc);
return rc;
}

82
fs/ocfs2/blockcheck.h Normal file
View file

@ -0,0 +1,82 @@
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* blockcheck.h
*
* Checksum and ECC codes for the OCFS2 userspace library.
*
* Copyright (C) 2004, 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License, version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef OCFS2_BLOCKCHECK_H
#define OCFS2_BLOCKCHECK_H
/* High level block API */
void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
struct ocfs2_block_check *bc);
int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
struct ocfs2_block_check *bc);
void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc);
int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc);
/* Lower level API */
void ocfs2_block_check_compute(void *data, size_t blocksize,
struct ocfs2_block_check *bc);
int ocfs2_block_check_validate(void *data, size_t blocksize,
struct ocfs2_block_check *bc);
void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc);
int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc);
/*
* Hamming code functions
*/
/*
* Encoding hamming code parity bits for a buffer.
*
* This is the low level encoder function. It can be called across
* multiple hunks just like the crc32 code. 'd' is the number of bits
* _in_this_hunk_. nr is the bit offset of this hunk. So, if you had
* two 512B buffers, you would do it like so:
*
* parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
* parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
*
* If you just have one buffer, use ocfs2_hamming_encode_block().
*/
u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d,
unsigned int nr);
/*
* Fix a buffer with a bit error. The 'fix' is the original parity
* xor'd with the parity calculated now.
*
* Like ocfs2_hamming_encode(), this can handle hunks. nr is the bit
* offset of the current hunk. If bit to be fixed is not part of the
* current hunk, this does nothing.
*
* If you only have one buffer, use ocfs2_hamming_fix_block().
*/
void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
unsigned int fix);
/* Convenience wrappers for a single buffer of data */
extern u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize);
extern void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
unsigned int fix);
#endif

View file

@ -39,6 +39,18 @@
#include "buffer_head_io.h"
/*
* Bits on bh->b_state used by ocfs2.
*
* These MUST be after the JBD2 bits. Hence, we use BH_JBDPrivateStart.
*/
enum ocfs2_state_bits {
BH_NeedsValidate = BH_JBDPrivateStart,
};
/* Expand the magic b_state functions */
BUFFER_FNS(NeedsValidate, needs_validate);
int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
struct inode *inode)
{
@ -166,7 +178,9 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
}
int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
struct buffer_head *bhs[], int flags)
struct buffer_head *bhs[], int flags,
int (*validate)(struct super_block *sb,
struct buffer_head *bh))
{
int status = 0;
int i, ignore_cache = 0;
@ -298,6 +312,8 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
if (validate)
set_buffer_needs_validate(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
continue;
@ -328,6 +344,20 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
bhs[i] = NULL;
continue;
}
if (buffer_needs_validate(bh)) {
/* We never set NeedsValidate if the
* buffer was held by the journal, so
* that better not have changed */
BUG_ON(buffer_jbd(bh));
clear_buffer_needs_validate(bh);
status = validate(inode->i_sb, bh);
if (status) {
put_bh(bh);
bhs[i] = NULL;
continue;
}
}
}
/* Always set the buffer in the cache, even if it was

View file

@ -31,21 +31,24 @@
void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
int uptodate);
static inline int ocfs2_read_block(struct inode *inode,
u64 off,
struct buffer_head **bh);
int ocfs2_write_block(struct ocfs2_super *osb,
struct buffer_head *bh,
struct inode *inode);
int ocfs2_read_blocks(struct inode *inode,
u64 block,
int nr,
struct buffer_head *bhs[],
int flags);
int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
unsigned int nr, struct buffer_head *bhs[]);
/*
* If not NULL, validate() will be called on a buffer that is freshly
* read from disk. It will not be called if the buffer was in cache.
* Note that if validate() is being used for this buffer, it needs to
* be set even for a READAHEAD call, as it marks the buffer for later
* validation.
*/
int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
struct buffer_head *bhs[], int flags,
int (*validate)(struct super_block *sb,
struct buffer_head *bh));
int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
struct buffer_head *bh);
@ -53,7 +56,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
#define OCFS2_BH_READAHEAD 8
static inline int ocfs2_read_block(struct inode *inode, u64 off,
struct buffer_head **bh)
struct buffer_head **bh,
int (*validate)(struct super_block *sb,
struct buffer_head *bh))
{
int status = 0;
@ -63,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off,
goto bail;
}
status = ocfs2_read_blocks(inode, off, 1, bh, 0);
status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate);
bail:
return status;

View file

@ -110,6 +110,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
define_mask(QUORUM),
define_mask(EXPORT),
define_mask(XATTR),
define_mask(QUOTA),
define_mask(ERROR),
define_mask(NOTICE),
define_mask(KTHREAD),

View file

@ -113,6 +113,7 @@
#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */
#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
/* bits that are infrequently given and frequently matched in the high word */
#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */

View file

@ -40,6 +40,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_NAMEI
#include <cluster/masklog.h>
@ -47,6 +48,7 @@
#include "ocfs2.h"
#include "alloc.h"
#include "blockcheck.h"
#include "dir.h"
#include "dlmglue.h"
#include "extent_map.h"
@ -82,47 +84,72 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
struct ocfs2_alloc_context *meta_ac,
struct buffer_head **new_bh);
static struct buffer_head *ocfs2_bread(struct inode *inode,
int block, int *err, int reada)
/*
* These are distinct checks because future versions of the file system will
* want to have a trailing dirent structure independent of indexing.
*/
static int ocfs2_dir_has_trailer(struct inode *dir)
{
struct buffer_head *bh = NULL;
int tmperr;
u64 p_blkno;
int readflags = 0;
if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
return 0;
if (reada)
readflags |= OCFS2_BH_READAHEAD;
return ocfs2_meta_ecc(OCFS2_SB(dir->i_sb));
}
if (((u64)block << inode->i_sb->s_blocksize_bits) >=
i_size_read(inode)) {
BUG_ON(!reada);
return NULL;
}
static int ocfs2_supports_dir_trailer(struct ocfs2_super *osb)
{
return ocfs2_meta_ecc(osb);
}
down_read(&OCFS2_I(inode)->ip_alloc_sem);
tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
NULL);
up_read(&OCFS2_I(inode)->ip_alloc_sem);
if (tmperr < 0) {
mlog_errno(tmperr);
goto fail;
}
static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb)
{
return sb->s_blocksize - sizeof(struct ocfs2_dir_block_trailer);
}
tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags);
if (tmperr < 0)
goto fail;
#define ocfs2_trailer_from_bh(_bh, _sb) ((struct ocfs2_dir_block_trailer *) ((_bh)->b_data + ocfs2_dir_trailer_blk_off((_sb))))
tmperr = 0;
/* XXX ocfs2_block_dqtrailer() is similar but not quite - can we make
* them more consistent? */
struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
void *data)
{
char *p = data;
*err = 0;
return bh;
p += blocksize - sizeof(struct ocfs2_dir_block_trailer);
return (struct ocfs2_dir_block_trailer *)p;
}
fail:
brelse(bh);
bh = NULL;
/*
* XXX: This is executed once on every dirent. We should consider optimizing
* it.
*/
static int ocfs2_skip_dir_trailer(struct inode *dir,
struct ocfs2_dir_entry *de,
unsigned long offset,
unsigned long blklen)
{
unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer);
*err = -EIO;
return NULL;
if (!ocfs2_dir_has_trailer(dir))
return 0;
if (offset != toff)
return 0;
return 1;
}
static void ocfs2_init_dir_trailer(struct inode *inode,
struct buffer_head *bh)
{
struct ocfs2_dir_block_trailer *trailer;
trailer = ocfs2_trailer_from_bh(bh, inode->i_sb);
strcpy(trailer->db_signature, OCFS2_DIR_TRAILER_SIGNATURE);
trailer->db_compat_rec_len =
cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer));
trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
trailer->db_blkno = cpu_to_le64(bh->b_blocknr);
}
/*
@ -231,7 +258,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name,
struct ocfs2_dinode *di;
struct ocfs2_inline_data *data;
ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
ret = ocfs2_read_inode_block(dir, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@ -250,6 +277,108 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name,
return NULL;
}
static int ocfs2_validate_dir_block(struct super_block *sb,
struct buffer_head *bh)
{
int rc;
struct ocfs2_dir_block_trailer *trailer =
ocfs2_trailer_from_bh(bh, sb);
/*
* We don't validate dirents here, that's handled
* in-place when the code walks them.
*/
mlog(0, "Validating dirblock %llu\n",
(unsigned long long)bh->b_blocknr);
BUG_ON(!buffer_uptodate(bh));
/*
* If the ecc fails, we return the error but otherwise
* leave the filesystem running. We know any error is
* local to this block.
*
* Note that we are safe to call this even if the directory
* doesn't have a trailer. Filesystems without metaecc will do
* nothing, and filesystems with it will have one.
*/
rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &trailer->db_check);
if (rc)
mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
(unsigned long long)bh->b_blocknr);
return rc;
}
/*
* This function forces all errors to -EIO for consistency with its
* predecessor, ocfs2_bread(). We haven't audited what returning the
* real error codes would do to callers. We log the real codes with
* mlog_errno() before we squash them.
*/
static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
struct buffer_head **bh, int flags)
{
int rc = 0;
struct buffer_head *tmp = *bh;
struct ocfs2_dir_block_trailer *trailer;
rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
ocfs2_validate_dir_block);
if (rc) {
mlog_errno(rc);
goto out;
}
/*
* We check the trailer here rather than in
* ocfs2_validate_dir_block() because that function doesn't have
* the inode to test.
*/
if (!(flags & OCFS2_BH_READAHEAD) &&
ocfs2_dir_has_trailer(inode)) {
trailer = ocfs2_trailer_from_bh(tmp, inode->i_sb);
if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
rc = -EINVAL;
ocfs2_error(inode->i_sb,
"Invalid dirblock #%llu: "
"signature = %.*s\n",
(unsigned long long)tmp->b_blocknr, 7,
trailer->db_signature);
goto out;
}
if (le64_to_cpu(trailer->db_blkno) != tmp->b_blocknr) {
rc = -EINVAL;
ocfs2_error(inode->i_sb,
"Directory block #%llu has an invalid "
"db_blkno of %llu",
(unsigned long long)tmp->b_blocknr,
(unsigned long long)le64_to_cpu(trailer->db_blkno));
goto out;
}
if (le64_to_cpu(trailer->db_parent_dinode) !=
OCFS2_I(inode)->ip_blkno) {
rc = -EINVAL;
ocfs2_error(inode->i_sb,
"Directory block #%llu on dinode "
"#%llu has an invalid parent_dinode "
"of %llu",
(unsigned long long)tmp->b_blocknr,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)le64_to_cpu(trailer->db_blkno));
goto out;
}
}
/* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
if (!*bh)
*bh = tmp;
out:
return rc ? -EIO : 0;
}
static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir)
@ -296,15 +425,17 @@ static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
}
num++;
bh = ocfs2_bread(dir, b++, &err, 1);
bh = NULL;
err = ocfs2_read_dir_block(dir, b++, &bh,
OCFS2_BH_READAHEAD);
bh_use[ra_max] = bh;
}
}
if ((bh = bh_use[ra_ptr++]) == NULL)
goto next;
if (ocfs2_read_block(dir, block, &bh)) {
if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
/* read error, skip block & hope for the best.
* ocfs2_read_block() has released the bh. */
* ocfs2_read_dir_block() has released the bh. */
ocfs2_error(dir->i_sb, "reading directory %llu, "
"offset %lu\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno,
@ -381,14 +512,18 @@ int ocfs2_update_entry(struct inode *dir, handle_t *handle,
struct inode *new_entry_inode)
{
int ret;
ocfs2_journal_access_func access = ocfs2_journal_access_db;
/*
* The same code works fine for both inline-data and extent
* based directories, so no need to split this up.
* based directories, so no need to split this up. The only
* difference is the journal_access function.
*/
ret = ocfs2_journal_access(handle, dir, de_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
access = ocfs2_journal_access_di;
ret = access(handle, dir, de_bh, OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out;
@ -410,9 +545,13 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
{
struct ocfs2_dir_entry *de, *pde;
int i, status = -ENOENT;
ocfs2_journal_access_func access = ocfs2_journal_access_db;
mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
access = ocfs2_journal_access_di;
i = 0;
pde = NULL;
de = (struct ocfs2_dir_entry *) first_de;
@ -423,8 +562,8 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
goto bail;
}
if (de == de_del) {
status = ocfs2_journal_access(handle, dir, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = access(handle, dir, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
status = -EIO;
mlog_errno(status);
@ -458,7 +597,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle,
struct ocfs2_dinode *di;
struct ocfs2_inline_data *data;
ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
ret = ocfs2_read_inode_block(dir, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@ -576,6 +715,16 @@ int __ocfs2_add_entry(handle_t *handle,
goto bail;
}
/* We're guaranteed that we should have space, so we
* can't possibly have hit the trailer...right? */
mlog_bug_on_msg(ocfs2_skip_dir_trailer(dir, de, offset, size),
"Hit dir trailer trying to insert %.*s "
"(namelen %d) into directory %llu. "
"offset is %lu, trailer offset is %d\n",
namelen, name, namelen,
(unsigned long long)parent_fe_bh->b_blocknr,
offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
if (ocfs2_dirent_would_fit(de, rec_len)) {
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
@ -584,8 +733,14 @@ int __ocfs2_add_entry(handle_t *handle,
goto bail;
}
status = ocfs2_journal_access(handle, dir, insert_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (insert_bh == parent_fe_bh)
status = ocfs2_journal_access_di(handle, dir,
insert_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
else
status = ocfs2_journal_access_db(handle, dir,
insert_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
/* By now the buffer is marked for journaling */
offset += le16_to_cpu(de->rec_len);
if (le64_to_cpu(de->inode)) {
@ -611,6 +766,7 @@ int __ocfs2_add_entry(handle_t *handle,
retval = 0;
goto bail;
}
offset += le16_to_cpu(de->rec_len);
de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
}
@ -636,7 +792,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
struct ocfs2_inline_data *data;
struct ocfs2_dir_entry *de;
ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
ret = ocfs2_read_inode_block(inode, &di_bh);
if (ret) {
mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
@ -724,7 +880,6 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
int i, stored;
struct buffer_head * bh, * tmp;
struct ocfs2_dir_entry * de;
int err;
struct super_block * sb = inode->i_sb;
unsigned int ra_sectors = 16;
@ -735,12 +890,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
while (!error && !stored && *f_pos < i_size_read(inode)) {
blk = (*f_pos) >> sb->s_blocksize_bits;
bh = ocfs2_bread(inode, blk, &err, 0);
if (!bh) {
mlog(ML_ERROR,
"directory #%llu contains a hole at offset %lld\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
*f_pos);
if (ocfs2_read_dir_block(inode, blk, &bh, 0)) {
/* Skip the corrupt dirblock and keep trying */
*f_pos += sb->s_blocksize - offset;
continue;
}
@ -754,8 +905,10 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
|| (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
i > 0; i--) {
tmp = ocfs2_bread(inode, ++blk, &err, 1);
brelse(tmp);
tmp = NULL;
if (!ocfs2_read_dir_block(inode, ++blk, &tmp,
OCFS2_BH_READAHEAD))
brelse(tmp);
}
last_ra_blk = blk;
ra_sectors = 8;
@ -828,6 +981,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
}
offset = 0;
brelse(bh);
bh = NULL;
}
stored = 0;
@ -1050,9 +1204,15 @@ int ocfs2_empty_dir(struct inode *inode)
return !priv.seen_other;
}
static void ocfs2_fill_initial_dirents(struct inode *inode,
struct inode *parent,
char *start, unsigned int size)
/*
* Fills "." and ".." dirents in a new directory block. Returns dirent for
* "..", which might be used during creation of a directory with a trailing
* header. It is otherwise safe to ignore the return code.
*/
static struct ocfs2_dir_entry *ocfs2_fill_initial_dirents(struct inode *inode,
struct inode *parent,
char *start,
unsigned int size)
{
struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start;
@ -1069,6 +1229,8 @@ static void ocfs2_fill_initial_dirents(struct inode *inode,
de->name_len = 2;
strcpy(de->name, "..");
ocfs2_set_de_type(de, S_IFDIR);
return de;
}
/*
@ -1086,8 +1248,8 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
struct ocfs2_inline_data *data = &di->id2.i_data;
unsigned int size = le16_to_cpu(data->id_count);
ret = ocfs2_journal_access(handle, inode, di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
ret = ocfs2_journal_access_di(handle, inode, di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out;
@ -1121,10 +1283,15 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
struct ocfs2_alloc_context *data_ac)
{
int status;
unsigned int size = osb->sb->s_blocksize;
struct buffer_head *new_bh = NULL;
struct ocfs2_dir_entry *de;
mlog_entry_void();
if (ocfs2_supports_dir_trailer(osb))
size = ocfs2_dir_trailer_blk_off(parent->i_sb);
status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
data_ac, NULL, &new_bh);
if (status < 0) {
@ -1134,16 +1301,17 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
ocfs2_set_new_buffer_uptodate(inode, new_bh);
status = ocfs2_journal_access(handle, inode, new_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
status = ocfs2_journal_access_db(handle, inode, new_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) {
mlog_errno(status);
goto bail;
}
memset(new_bh->b_data, 0, osb->sb->s_blocksize);
ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data,
osb->sb->s_blocksize);
de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size);
if (ocfs2_supports_dir_trailer(osb))
ocfs2_init_dir_trailer(inode, new_bh);
status = ocfs2_journal_dirty(handle, new_bh);
if (status < 0) {
@ -1184,13 +1352,27 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb,
data_ac);
}
/*
* Expand rec_len of the rightmost dirent in a directory block so that it
* contains the end of our valid space for dirents. We do this during
* expansion from an inline directory to one with extents. The first dir block
* in that case is taken from the inline data portion of the inode block.
*
* We add the dir trailer if this filesystem wants it.
*/
static void ocfs2_expand_last_dirent(char *start, unsigned int old_size,
unsigned int new_size)
struct super_block *sb)
{
struct ocfs2_dir_entry *de;
struct ocfs2_dir_entry *prev_de;
char *de_buf, *limit;
unsigned int bytes = new_size - old_size;
unsigned int new_size = sb->s_blocksize;
unsigned int bytes;
if (ocfs2_supports_dir_trailer(OCFS2_SB(sb)))
new_size = ocfs2_dir_trailer_blk_off(sb);
bytes = new_size - old_size;
limit = start + old_size;
de_buf = start;
@ -1216,9 +1398,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
unsigned int blocks_wanted,
struct buffer_head **first_block_bh)
{
int ret, credits = OCFS2_INLINE_TO_EXTENTS_CREDITS;
u32 alloc, bit_off, len;
struct super_block *sb = dir->i_sb;
int ret, credits = ocfs2_inline_to_extents_credits(sb);
u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
struct ocfs2_inode_info *oi = OCFS2_I(dir);
@ -1227,6 +1409,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
handle_t *handle;
struct ocfs2_extent_tree et;
int did_quota = 0;
ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
@ -1264,6 +1447,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
goto out_sem;
}
if (vfs_dq_alloc_space_nodirty(dir,
ocfs2_clusters_to_bytes(osb->sb, alloc))) {
ret = -EDQUOT;
goto out_commit;
}
did_quota = 1;
/*
* Try to claim as many clusters as the bitmap can give though
* if we only get one now, that's enough to continue. The rest
@ -1290,8 +1479,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
ocfs2_set_new_buffer_uptodate(dir, dirdata_bh);
ret = ocfs2_journal_access(handle, dir, dirdata_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
ret = ocfs2_journal_access_db(handle, dir, dirdata_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (ret) {
mlog_errno(ret);
goto out_commit;
@ -1300,8 +1489,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
memset(dirdata_bh->b_data + i_size_read(dir), 0,
sb->s_blocksize - i_size_read(dir));
ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir),
sb->s_blocksize);
ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), sb);
if (ocfs2_supports_dir_trailer(osb))
ocfs2_init_dir_trailer(dir, dirdata_bh);
ret = ocfs2_journal_dirty(handle, dirdata_bh);
if (ret) {
@ -1317,8 +1507,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
* We let the later dirent insert modify c/mtime - to the user
* the data hasn't changed.
*/
ret = ocfs2_journal_access(handle, dir, di_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
ret = ocfs2_journal_access_di(handle, dir, di_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (ret) {
mlog_errno(ret);
goto out_commit;
@ -1386,6 +1576,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
dirdata_bh = NULL;
out_commit:
if (ret < 0 && did_quota)
vfs_dq_free_space_nodirty(dir,
ocfs2_clusters_to_bytes(osb->sb, 2));
ocfs2_commit_trans(osb, handle);
out_sem:
@ -1410,7 +1603,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
struct buffer_head **new_bh)
{
int status;
int extend;
int extend, did_quota = 0;
u64 p_blkno, v_blkno;
spin_lock(&OCFS2_I(dir)->ip_lock);
@ -1420,6 +1613,13 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
if (extend) {
u32 offset = OCFS2_I(dir)->ip_clusters;
if (vfs_dq_alloc_space_nodirty(dir,
ocfs2_clusters_to_bytes(sb, 1))) {
status = -EDQUOT;
goto bail;
}
did_quota = 1;
status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
1, 0, parent_fe_bh, handle,
data_ac, meta_ac, NULL);
@ -1445,6 +1645,8 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
}
status = 0;
bail:
if (did_quota && status < 0)
vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
mlog_exit(status);
return status;
}
@ -1569,16 +1771,22 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
ocfs2_set_new_buffer_uptodate(dir, new_bh);
status = ocfs2_journal_access(handle, dir, new_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
status = ocfs2_journal_access_db(handle, dir, new_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) {
mlog_errno(status);
goto bail;
}
memset(new_bh->b_data, 0, sb->s_blocksize);
de = (struct ocfs2_dir_entry *) new_bh->b_data;
de->inode = 0;
de->rec_len = cpu_to_le16(sb->s_blocksize);
if (ocfs2_dir_has_trailer(dir)) {
de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb));
ocfs2_init_dir_trailer(dir, new_bh);
} else {
de->rec_len = cpu_to_le16(sb->s_blocksize);
}
status = ocfs2_journal_dirty(handle, new_bh);
if (status < 0) {
mlog_errno(status);
@ -1620,11 +1828,21 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
unsigned int *blocks_wanted)
{
int ret;
struct super_block *sb = dir->i_sb;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_dir_entry *de, *last_de = NULL;
char *de_buf, *limit;
unsigned long offset = 0;
unsigned int rec_len, new_rec_len;
unsigned int rec_len, new_rec_len, free_space = dir->i_sb->s_blocksize;
/*
* This calculates how many free bytes we'd have in block zero, should
* this function force expansion to an extent tree.
*/
if (ocfs2_supports_dir_trailer(OCFS2_SB(sb)))
free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir);
else
free_space = dir->i_sb->s_blocksize - i_size_read(dir);
de_buf = di->id2.i_data.id_data;
limit = de_buf + i_size_read(dir);
@ -1641,6 +1859,11 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
ret = -EEXIST;
goto out;
}
/*
* No need to check for a trailing dirent record here as
* they're not used for inline dirs.
*/
if (ocfs2_dirent_would_fit(de, rec_len)) {
/* Ok, we found a spot. Return this bh and let
* the caller actually fill it in. */
@ -1661,7 +1884,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
* dirent can be found.
*/
*blocks_wanted = 1;
new_rec_len = le16_to_cpu(last_de->rec_len) + (dir->i_sb->s_blocksize - i_size_read(dir));
new_rec_len = le16_to_cpu(last_de->rec_len) + free_space;
if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
*blocks_wanted = 2;
@ -1679,9 +1902,10 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
struct ocfs2_dir_entry *de;
struct super_block *sb = dir->i_sb;
int status;
int blocksize = dir->i_sb->s_blocksize;
bh = ocfs2_bread(dir, 0, &status, 0);
if (!bh) {
status = ocfs2_read_dir_block(dir, 0, &bh, 0);
if (status) {
mlog_errno(status);
goto bail;
}
@ -1702,11 +1926,10 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
status = -ENOSPC;
goto bail;
}
bh = ocfs2_bread(dir,
offset >> sb->s_blocksize_bits,
&status,
0);
if (!bh) {
status = ocfs2_read_dir_block(dir,
offset >> sb->s_blocksize_bits,
&bh, 0);
if (status) {
mlog_errno(status);
goto bail;
}
@ -1721,6 +1944,11 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
status = -EEXIST;
goto bail;
}
if (ocfs2_skip_dir_trailer(dir, de, offset % blocksize,
blocksize))
goto next;
if (ocfs2_dirent_would_fit(de, rec_len)) {
/* Ok, we found a spot. Return this bh and let
* the caller actually fill it in. */
@ -1729,6 +1957,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
status = 0;
goto bail;
}
next:
offset += le16_to_cpu(de->rec_len);
de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
}

View file

@ -83,4 +83,6 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb,
struct buffer_head *fe_bh,
struct ocfs2_alloc_context *data_ac);
struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
void *data);
#endif /* OCFS2_DIR_H */

View file

@ -275,6 +275,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
struct list_head *iter, *head=NULL;
u64 cookie;
u32 flags;
u8 node;
if (!dlm_grab(dlm)) {
dlm_error(DLM_REJECTED);
@ -286,18 +287,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
name = past->name;
locklen = past->namelen;
cookie = be64_to_cpu(past->cookie);
cookie = past->cookie;
flags = be32_to_cpu(past->flags);
node = past->node_idx;
if (locklen > DLM_LOCKID_NAME_MAX) {
ret = DLM_IVBUFLEN;
mlog(ML_ERROR, "Invalid name length in proxy ast handler!\n");
mlog(ML_ERROR, "Invalid name length (%d) in proxy ast "
"handler!\n", locklen);
goto leave;
}
if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) ==
(LKM_PUT_LVB|LKM_GET_LVB)) {
mlog(ML_ERROR, "both PUT and GET lvb specified\n");
mlog(ML_ERROR, "Both PUT and GET lvb specified, (0x%x)\n",
flags);
ret = DLM_BADARGS;
goto leave;
}
@ -310,22 +314,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
if (past->type != DLM_AST &&
past->type != DLM_BAST) {
mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu"
"name=%.*s\n", past->type,
dlm_get_lock_cookie_node(cookie),
dlm_get_lock_cookie_seq(cookie),
locklen, name);
"name=%.*s, node=%u\n", past->type,
dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
locklen, name, node);
ret = DLM_IVLOCKID;
goto leave;
}
res = dlm_lookup_lockres(dlm, name, locklen);
if (!res) {
mlog(0, "got %sast for unknown lockres! "
"cookie=%u:%llu, name=%.*s, namelen=%u\n",
past->type == DLM_AST ? "" : "b",
dlm_get_lock_cookie_node(cookie),
dlm_get_lock_cookie_seq(cookie),
locklen, name, locklen);
mlog(0, "Got %sast for unknown lockres! cookie=%u:%llu, "
"name=%.*s, node=%u\n", (past->type == DLM_AST ? "" : "b"),
dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
locklen, name, node);
ret = DLM_IVLOCKID;
goto leave;
}
@ -337,12 +340,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
spin_lock(&res->spinlock);
if (res->state & DLM_LOCK_RES_RECOVERING) {
mlog(0, "responding with DLM_RECOVERING!\n");
mlog(0, "Responding with DLM_RECOVERING!\n");
ret = DLM_RECOVERING;
goto unlock_out;
}
if (res->state & DLM_LOCK_RES_MIGRATING) {
mlog(0, "responding with DLM_MIGRATING!\n");
mlog(0, "Responding with DLM_MIGRATING!\n");
ret = DLM_MIGRATING;
goto unlock_out;
}
@ -351,7 +354,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
lock = NULL;
list_for_each(iter, head) {
lock = list_entry (iter, struct dlm_lock, list);
if (be64_to_cpu(lock->ml.cookie) == cookie)
if (lock->ml.cookie == cookie)
goto do_ast;
}
@ -363,15 +366,15 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
list_for_each(iter, head) {
lock = list_entry (iter, struct dlm_lock, list);
if (be64_to_cpu(lock->ml.cookie) == cookie)
if (lock->ml.cookie == cookie)
goto do_ast;
}
mlog(0, "got %sast for unknown lock! cookie=%u:%llu, "
"name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b",
dlm_get_lock_cookie_node(cookie),
dlm_get_lock_cookie_seq(cookie),
locklen, name, locklen);
mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, "
"node=%u\n", past->type == DLM_AST ? "" : "b",
dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
locklen, name, node);
ret = DLM_NORMAL;
unlock_out:
@ -383,8 +386,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
if (past->type == DLM_AST) {
/* do not alter lock refcount. switching lists. */
list_move_tail(&lock->list, &res->granted);
mlog(0, "ast: adding to granted list... type=%d, "
"convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
mlog(0, "ast: Adding to granted list... type=%d, "
"convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
if (lock->ml.convert_type != LKM_IVMODE) {
lock->ml.type = lock->ml.convert_type;
lock->ml.convert_type = LKM_IVMODE;
@ -408,7 +411,6 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
dlm_do_local_bast(dlm, res, lock, past->blocked_type);
leave:
if (res)
dlm_lockres_put(res);

View file

@ -140,6 +140,7 @@ struct dlm_ctxt
unsigned int purge_count;
spinlock_t spinlock;
spinlock_t ast_lock;
spinlock_t track_lock;
char *name;
u8 node_num;
u32 key;
@ -316,6 +317,8 @@ struct dlm_lock_resource
* put on a list for the dlm thread to run. */
unsigned long last_used;
struct dlm_ctxt *dlm;
unsigned migration_pending:1;
atomic_t asts_reserved;
spinlock_t spinlock;

View file

@ -630,43 +630,38 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
{
struct debug_lockres *dl = m->private;
struct dlm_ctxt *dlm = dl->dl_ctxt;
struct dlm_lock_resource *oldres = dl->dl_res;
struct dlm_lock_resource *res = NULL;
struct list_head *track_list;
spin_lock(&dlm->spinlock);
spin_lock(&dlm->track_lock);
if (oldres)
track_list = &oldres->tracking;
else
track_list = &dlm->tracking_list;
if (dl->dl_res) {
list_for_each_entry(res, &dl->dl_res->tracking, tracking) {
if (dl->dl_res) {
dlm_lockres_put(dl->dl_res);
dl->dl_res = NULL;
}
if (&res->tracking == &dlm->tracking_list) {
mlog(0, "End of list found, %p\n", res);
dl = NULL;
break;
}
list_for_each_entry(res, track_list, tracking) {
if (&res->tracking == &dlm->tracking_list)
res = NULL;
else
dlm_lockres_get(res);
dl->dl_res = res;
break;
}
} else {
if (!list_empty(&dlm->tracking_list)) {
list_for_each_entry(res, &dlm->tracking_list, tracking)
break;
dlm_lockres_get(res);
dl->dl_res = res;
} else
dl = NULL;
break;
}
spin_unlock(&dlm->track_lock);
if (dl) {
spin_lock(&dl->dl_res->spinlock);
dump_lockres(dl->dl_res, dl->dl_buf, dl->dl_len - 1);
spin_unlock(&dl->dl_res->spinlock);
}
if (oldres)
dlm_lockres_put(oldres);
spin_unlock(&dlm->spinlock);
dl->dl_res = res;
if (res) {
spin_lock(&res->spinlock);
dump_lockres(res, dl->dl_buf, dl->dl_len - 1);
spin_unlock(&res->spinlock);
} else
dl = NULL;
/* passed to seq_show */
return dl;
}

View file

@ -1550,6 +1550,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
spin_lock_init(&dlm->spinlock);
spin_lock_init(&dlm->master_lock);
spin_lock_init(&dlm->ast_lock);
spin_lock_init(&dlm->track_lock);
INIT_LIST_HEAD(&dlm->list);
INIT_LIST_HEAD(&dlm->dirty_list);
INIT_LIST_HEAD(&dlm->reco.resources);

View file

@ -505,8 +505,10 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
static void dlm_lockres_release(struct kref *kref)
{
struct dlm_lock_resource *res;
struct dlm_ctxt *dlm;
res = container_of(kref, struct dlm_lock_resource, refs);
dlm = res->dlm;
/* This should not happen -- all lockres' have a name
* associated with them at init time. */
@ -515,6 +517,7 @@ static void dlm_lockres_release(struct kref *kref)
mlog(0, "destroying lockres %.*s\n", res->lockname.len,
res->lockname.name);
spin_lock(&dlm->track_lock);
if (!list_empty(&res->tracking))
list_del_init(&res->tracking);
else {
@ -522,6 +525,9 @@ static void dlm_lockres_release(struct kref *kref)
res->lockname.len, res->lockname.name);
dlm_print_one_lock_resource(res);
}
spin_unlock(&dlm->track_lock);
dlm_put(dlm);
if (!hlist_unhashed(&res->hash_node) ||
!list_empty(&res->granted) ||
@ -595,6 +601,10 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
res->migration_pending = 0;
res->inflight_locks = 0;
/* put in dlm_lockres_release */
dlm_grab(dlm);
res->dlm = dlm;
kref_init(&res->refs);
/* just for consistency */
@ -722,14 +732,21 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
if (tmpres) {
int dropping_ref = 0;
spin_unlock(&dlm->spinlock);
spin_lock(&tmpres->spinlock);
/* We wait for the other thread that is mastering the resource */
if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
__dlm_wait_on_lockres(tmpres);
BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN);
}
if (tmpres->owner == dlm->node_num) {
BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF);
dlm_lockres_grab_inflight_ref(dlm, tmpres);
} else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF)
dropping_ref = 1;
spin_unlock(&tmpres->spinlock);
spin_unlock(&dlm->spinlock);
/* wait until done messaging the master, drop our ref to allow
* the lockres to be purged, start over. */
@ -2949,7 +2966,7 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
struct dlm_node_iter *iter)
{
struct dlm_migrate_request migrate;
int ret, status = 0;
int ret, skip, status = 0;
int nodenum;
memset(&migrate, 0, sizeof(migrate));
@ -2966,12 +2983,27 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
nodenum == new_master)
continue;
/* We could race exit domain. If exited, skip. */
spin_lock(&dlm->spinlock);
skip = (!test_bit(nodenum, dlm->domain_map));
spin_unlock(&dlm->spinlock);
if (skip) {
clear_bit(nodenum, iter->node_map);
continue;
}
ret = o2net_send_message(DLM_MIGRATE_REQUEST_MSG, dlm->key,
&migrate, sizeof(migrate), nodenum,
&status);
if (ret < 0)
mlog_errno(ret);
else if (status < 0) {
if (ret < 0) {
mlog(0, "migrate_request returned %d!\n", ret);
if (!dlm_is_host_down(ret)) {
mlog(ML_ERROR, "unhandled error=%d!\n", ret);
BUG();
}
clear_bit(nodenum, iter->node_map);
ret = 0;
} else if (status < 0) {
mlog(0, "migrate request (node %u) returned %d!\n",
nodenum, status);
ret = status;

View file

@ -181,7 +181,8 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
spin_lock(&res->spinlock);
/* This ensures that clear refmap is sent after the set */
__dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
__dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_SETREF_INPROG |
DLM_LOCK_RES_MIGRATING));
spin_unlock(&res->spinlock);
/* clear our bit from the master's refmap, ignore errors */

View file

@ -32,6 +32,7 @@
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/time.h>
#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_DLM_GLUE
#include <cluster/masklog.h>
@ -51,6 +52,7 @@
#include "slot_map.h"
#include "super.h"
#include "uptodate.h"
#include "quota.h"
#include "buffer_head_io.h"
@ -68,6 +70,7 @@ struct ocfs2_mask_waiter {
static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
/*
* Return value from ->downconvert_worker functions.
@ -102,6 +105,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
@ -111,8 +115,7 @@ static void ocfs2_dump_meta_lvb_info(u64 level,
unsigned int line,
struct ocfs2_lock_res *lockres)
{
struct ocfs2_meta_lvb *lvb =
(struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
mlog(level, "LVB information for %s (called from %s:%u):\n",
lockres->l_name, function, line);
@ -258,6 +261,12 @@ static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
.flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
.set_lvb = ocfs2_set_qinfo_lvb,
.get_osb = ocfs2_get_qinfo_osb,
.flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
};
static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
{
return lockres->l_type == OCFS2_LOCK_TYPE_META ||
@ -279,6 +288,13 @@ static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res
return (struct ocfs2_dentry_lock *)lockres->l_priv;
}
static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
{
BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
}
static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
{
if (lockres->l_ops->get_osb)
@ -507,6 +523,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
return OCFS2_SB(inode->i_sb);
}
static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
{
struct ocfs2_mem_dqinfo *info = lockres->l_priv;
return OCFS2_SB(info->dqi_gi.dqi_sb);
}
static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
{
struct ocfs2_file_private *fp = lockres->l_priv;
@ -609,6 +632,17 @@ void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
lockres->l_flags |= OCFS2_LOCK_NOCACHE;
}
void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
struct ocfs2_mem_dqinfo *info)
{
ocfs2_lock_res_init_once(lockres);
ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
0, lockres->l_name);
ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
info);
}
void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
{
mlog_entry_void();
@ -1829,7 +1863,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
mlog_entry_void();
lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
/*
* Invalidate the LVB of a deleted inode - this way other
@ -1881,7 +1915,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
mlog_meta_lvb(0, lockres);
lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
/* We're safe here without the lockres lock... */
spin_lock(&oi->ip_lock);
@ -1916,8 +1950,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
struct ocfs2_lock_res *lockres)
{
struct ocfs2_meta_lvb *lvb =
(struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
if (lvb->lvb_version == OCFS2_LVB_VERSION
&& be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
@ -2024,7 +2057,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
} else {
/* Boo, we have to go to disk. */
/* read bh, cast, ocfs2_refresh_inode */
status = ocfs2_read_block(inode, oi->ip_blkno, bh);
status = ocfs2_read_inode_block(inode, bh);
if (status < 0) {
mlog_errno(status);
goto bail_refresh;
@ -2032,18 +2065,14 @@ static int ocfs2_inode_lock_update(struct inode *inode,
fe = (struct ocfs2_dinode *) (*bh)->b_data;
/* This is a good chance to make sure we're not
* locking an invalid object.
* locking an invalid object. ocfs2_read_inode_block()
* already checked that the inode block is sane.
*
* We bug on a stale inode here because we checked
* above whether it was wiped from disk. The wiping
* node provides a guarantee that we receive that
* message and can mark the inode before dropping any
* locks associated with it. */
if (!OCFS2_IS_VALID_DINODE(fe)) {
OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
status = -EIO;
goto bail_refresh;
}
mlog_bug_on_msg(inode->i_generation !=
le32_to_cpu(fe->i_generation),
"Invalid dinode %llu disk generation: %u "
@ -2085,7 +2114,7 @@ static int ocfs2_assign_bh(struct inode *inode,
return 0;
}
status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh);
status = ocfs2_read_inode_block(inode, ret_bh);
if (status < 0)
mlog_errno(status);
@ -3449,6 +3478,117 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
return UNBLOCK_CONTINUE_POST;
}
static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
{
struct ocfs2_qinfo_lvb *lvb;
struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
oinfo->dqi_gi.dqi_type);
mlog_entry_void();
lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
mlog_exit_void();
}
void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
{
struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
mlog_entry_void();
if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
ocfs2_cluster_unlock(osb, lockres, level);
mlog_exit_void();
}
static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
{
struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
oinfo->dqi_gi.dqi_type);
struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
struct buffer_head *bh = NULL;
struct ocfs2_global_disk_dqinfo *gdinfo;
int status = 0;
if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
oinfo->dqi_gi.dqi_free_entry =
be32_to_cpu(lvb->lvb_free_entry);
} else {
status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh);
if (status) {
mlog_errno(status);
goto bail;
}
gdinfo = (struct ocfs2_global_disk_dqinfo *)
(bh->b_data + OCFS2_GLOBAL_INFO_OFF);
info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
oinfo->dqi_gi.dqi_free_entry =
le32_to_cpu(gdinfo->dqi_free_entry);
brelse(bh);
ocfs2_track_lock_refresh(lockres);
}
bail:
return status;
}
/* Lock quota info, this function expects at least shared lock on the quota file
* so that we can safely refresh quota info from disk. */
int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
{
struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
int status = 0;
mlog_entry_void();
/* On RO devices, locking really isn't needed... */
if (ocfs2_is_hard_readonly(osb)) {
if (ex)
status = -EROFS;
goto bail;
}
if (ocfs2_mount_local(osb))
goto bail;
status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
if (status < 0) {
mlog_errno(status);
goto bail;
}
if (!ocfs2_should_refresh_lock_res(lockres))
goto bail;
/* OK, we have the lock but we need to refresh the quota info */
status = ocfs2_refresh_qinfo(oinfo);
if (status)
ocfs2_qinfo_unlock(oinfo, ex);
ocfs2_complete_lock_res_refresh(lockres, status);
bail:
mlog_exit(status);
return status;
}
/*
* This is the filesystem locking protocol. It provides the lock handling
* hooks for the underlying DLM. It has a maximum version number.

View file

@ -49,6 +49,19 @@ struct ocfs2_meta_lvb {
__be32 lvb_reserved2;
};
#define OCFS2_QINFO_LVB_VERSION 1
struct ocfs2_qinfo_lvb {
__u8 lvb_version;
__u8 lvb_reserved[3];
__be32 lvb_bgrace;
__be32 lvb_igrace;
__be32 lvb_syncms;
__be32 lvb_blocks;
__be32 lvb_free_blk;
__be32 lvb_free_entry;
};
/* ocfs2_inode_lock_full() 'arg_flags' flags */
/* don't wait on recovery. */
#define OCFS2_META_LOCK_RECOVERY (0x01)
@ -69,6 +82,9 @@ void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
struct ocfs2_file_private;
void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
struct ocfs2_file_private *fp);
struct ocfs2_mem_dqinfo;
void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
struct ocfs2_mem_dqinfo *info);
void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
int ocfs2_create_new_inode_locks(struct inode *inode);
int ocfs2_drop_inode_locks(struct inode *inode);
@ -103,6 +119,9 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex);
void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
int ocfs2_file_lock(struct file *file, int ex, int trylock);
void ocfs2_file_unlock(struct file *file);
int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex);
void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex);
void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,

View file

@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh);
ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
if (ret) {
mlog_errno(ret);
goto out;
@ -302,12 +302,6 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
el = &eb->h_list;
if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
ret = -EROFS;
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
goto out;
}
if (el->l_tree_depth) {
ocfs2_error(inode->i_sb,
"Inode %lu has non zero tree depth in "
@ -381,23 +375,16 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
goto no_more_extents;
ret = ocfs2_read_block(inode,
le64_to_cpu(eb->h_next_leaf_blk),
&next_eb_bh);
ret = ocfs2_read_extent_block(inode,
le64_to_cpu(eb->h_next_leaf_blk),
&next_eb_bh);
if (ret) {
mlog_errno(ret);
goto out;
}
next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
if (!OCFS2_IS_VALID_EXTENT_BLOCK(next_eb)) {
ret = -EROFS;
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, next_eb);
goto out;
}
el = &next_eb->h_list;
i = ocfs2_search_for_hole_index(el, v_cluster);
}
@ -630,7 +617,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
if (ret == 0)
goto out;
ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
ret = ocfs2_read_inode_block(inode, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@ -819,3 +806,74 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return ret;
}
int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
struct buffer_head *bhs[], int flags,
int (*validate)(struct super_block *sb,
struct buffer_head *bh))
{
int rc = 0;
u64 p_block, p_count;
int i, count, done = 0;
mlog_entry("(inode = %p, v_block = %llu, nr = %d, bhs = %p, "
"flags = %x, validate = %p)\n",
inode, (unsigned long long)v_block, nr, bhs, flags,
validate);
if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
i_size_read(inode)) {
BUG_ON(!(flags & OCFS2_BH_READAHEAD));
goto out;
}
while (done < nr) {
down_read(&OCFS2_I(inode)->ip_alloc_sem);
rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
&p_block, &p_count, NULL);
up_read(&OCFS2_I(inode)->ip_alloc_sem);
if (rc) {
mlog_errno(rc);
break;
}
if (!p_block) {
rc = -EIO;
mlog(ML_ERROR,
"Inode #%llu contains a hole at offset %llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)(v_block + done) <<
inode->i_sb->s_blocksize_bits);
break;
}
count = nr - done;
if (p_count < count)
count = p_count;
/*
* If the caller passed us bhs, they should have come
* from a previous readahead call to this function. Thus,
* they should have the right b_blocknr.
*/
for (i = 0; i < count; i++) {
if (!bhs[done + i])
continue;
BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
}
rc = ocfs2_read_blocks(inode, p_block, count, bhs + done,
flags, validate);
if (rc) {
mlog_errno(rc);
break;
}
done += count;
}
out:
mlog_exit(rc);
return rc;
}

View file

@ -57,4 +57,28 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
u32 *p_cluster, u32 *num_clusters,
struct ocfs2_extent_list *el);
int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
struct buffer_head *bhs[], int flags,
int (*validate)(struct super_block *sb,
struct buffer_head *bh));
static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block,
struct buffer_head **bh,
int (*validate)(struct super_block *sb,
struct buffer_head *bh))
{
int status = 0;
if (bh == NULL) {
printk("ocfs2: bh == NULL\n");
status = -EINVAL;
goto bail;
}
status = ocfs2_read_virt_blocks(inode, v_block, 1, bh, 0, validate);
bail:
return status;
}
#endif /* _EXTENT_MAP_H */

View file

@ -35,6 +35,7 @@
#include <linux/mount.h>
#include <linux/writeback.h>
#include <linux/falloc.h>
#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_INODE
#include <cluster/masklog.h>
@ -56,6 +57,8 @@
#include "suballoc.h"
#include "super.h"
#include "xattr.h"
#include "acl.h"
#include "quota.h"
#include "buffer_head_io.h"
@ -253,8 +256,8 @@ int ocfs2_update_inode_atime(struct inode *inode,
goto out;
}
ret = ocfs2_journal_access(handle, inode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
ret = ocfs2_journal_access_di(handle, inode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out_commit;
@ -303,9 +306,9 @@ static int ocfs2_set_inode_size(handle_t *handle,
return status;
}
static int ocfs2_simple_size_update(struct inode *inode,
struct buffer_head *di_bh,
u64 new_i_size)
int ocfs2_simple_size_update(struct inode *inode,
struct buffer_head *di_bh,
u64 new_i_size)
{
int ret;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@ -350,8 +353,8 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
goto out;
}
status = ocfs2_journal_access(handle, inode, fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, inode, fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto out_commit;
@ -401,12 +404,9 @@ static int ocfs2_truncate_file(struct inode *inode,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)new_i_size);
/* We trust di_bh because it comes from ocfs2_inode_lock(), which
* already validated it */
fe = (struct ocfs2_dinode *) di_bh->b_data;
if (!OCFS2_IS_VALID_DINODE(fe)) {
OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
status = -EIO;
goto bail;
}
mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
"Inode %llu, inode i_size = %lld != di "
@ -536,6 +536,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
enum ocfs2_alloc_restarted why;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_extent_tree et;
int did_quota = 0;
mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
@ -545,18 +546,12 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
*/
BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
status = ocfs2_read_inode_block(inode, &bh);
if (status < 0) {
mlog_errno(status);
goto leave;
}
fe = (struct ocfs2_dinode *) bh->b_data;
if (!OCFS2_IS_VALID_DINODE(fe)) {
OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
status = -EIO;
goto leave;
}
restart_all:
BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
@ -585,11 +580,18 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
}
restarted_transaction:
if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb,
clusters_to_add))) {
status = -EDQUOT;
goto leave;
}
did_quota = 1;
/* reserve a write to the file entry early on - that we if we
* run out of credits in the allocation path, we can still
* update i_size. */
status = ocfs2_journal_access(handle, inode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, inode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto leave;
@ -622,6 +624,10 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
spin_lock(&OCFS2_I(inode)->ip_lock);
clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
spin_unlock(&OCFS2_I(inode)->ip_lock);
/* Release unused quota reservation */
vfs_dq_free_space(inode,
ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
did_quota = 0;
if (why != RESTART_NONE && clusters_to_add) {
if (why == RESTART_META) {
@ -654,6 +660,9 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode));
leave:
if (status < 0 && did_quota)
vfs_dq_free_space(inode,
ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
if (handle) {
ocfs2_commit_trans(osb, handle);
handle = NULL;
@ -885,6 +894,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
struct ocfs2_super *osb = OCFS2_SB(sb);
struct buffer_head *bh = NULL;
handle_t *handle = NULL;
int locked[MAXQUOTAS] = {0, 0};
int credits, qtype;
struct ocfs2_mem_dqinfo *oinfo;
mlog_entry("(0x%p, '%.*s')\n", dentry,
dentry->d_name.len, dentry->d_name.name);
@ -955,11 +967,47 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
}
}
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
goto bail_unlock;
if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
(attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
credits = OCFS2_INODE_UPDATE_CREDITS;
if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
&& OCFS2_HAS_RO_COMPAT_FEATURE(sb,
OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv;
status = ocfs2_lock_global_qf(oinfo, 1);
if (status < 0)
goto bail_unlock;
credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) +
ocfs2_calc_qdel_credits(sb, USRQUOTA);
locked[USRQUOTA] = 1;
}
if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid
&& OCFS2_HAS_RO_COMPAT_FEATURE(sb,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv;
status = ocfs2_lock_global_qf(oinfo, 1);
if (status < 0)
goto bail_unlock;
credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) +
ocfs2_calc_qdel_credits(sb, GRPQUOTA);
locked[GRPQUOTA] = 1;
}
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
goto bail_unlock;
}
status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
if (status < 0)
goto bail_commit;
} else {
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
goto bail_unlock;
}
}
/*
@ -982,6 +1030,12 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
bail_commit:
ocfs2_commit_trans(osb, handle);
bail_unlock:
for (qtype = 0; qtype < MAXQUOTAS; qtype++) {
if (!locked[qtype])
continue;
oinfo = sb_dqinfo(sb, qtype)->dqi_priv;
ocfs2_unlock_global_qf(oinfo, 1);
}
ocfs2_inode_unlock(inode, 1);
bail_unlock_rw:
if (size_change)
@ -989,6 +1043,12 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
bail:
brelse(bh);
if (!status && attr->ia_valid & ATTR_MODE) {
status = ocfs2_acl_chmod(inode);
if (status < 0)
mlog_errno(status);
}
mlog_exit(status);
return status;
}
@ -1035,7 +1095,7 @@ int ocfs2_permission(struct inode *inode, int mask)
goto out;
}
ret = generic_permission(inode, mask, NULL);
ret = generic_permission(inode, mask, ocfs2_check_acl);
ocfs2_inode_unlock(inode, 0);
out:
@ -1061,8 +1121,8 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
goto out;
}
ret = ocfs2_journal_access(handle, inode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
ret = ocfs2_journal_access_di(handle, inode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
mlog_errno(ret);
goto out_trans;
@ -1128,9 +1188,8 @@ static int ocfs2_write_remove_suid(struct inode *inode)
{
int ret;
struct buffer_head *bh = NULL;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
ret = ocfs2_read_block(inode, oi->ip_blkno, &bh);
ret = ocfs2_read_inode_block(inode, &bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
@ -1156,8 +1215,7 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
struct buffer_head *di_bh = NULL;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno,
&di_bh);
ret = ocfs2_read_inode_block(inode, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@ -1226,83 +1284,6 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
return ret;
}
static int __ocfs2_remove_inode_range(struct inode *inode,
struct buffer_head *di_bh,
u32 cpos, u32 phys_cpos, u32 len,
struct ocfs2_cached_dealloc_ctxt *dealloc)
{
int ret;
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct inode *tl_inode = osb->osb_tl_inode;
handle_t *handle;
struct ocfs2_alloc_context *meta_ac = NULL;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_extent_tree et;
ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
if (ret) {
mlog_errno(ret);
return ret;
}
mutex_lock(&tl_inode->i_mutex);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
}
handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
goto out;
}
ret = ocfs2_journal_access(handle, inode, di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out;
}
ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
dealloc);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
OCFS2_I(inode)->ip_clusters -= len;
di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
ret = ocfs2_journal_dirty(handle, di_bh);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
if (ret)
mlog_errno(ret);
out_commit:
ocfs2_commit_trans(osb, handle);
out:
mutex_unlock(&tl_inode->i_mutex);
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
return ret;
}
/*
* Truncate a byte range, avoiding pages within partial clusters. This
* preserves those pages for the zeroing code to write to.
@ -1402,7 +1383,9 @@ static int ocfs2_remove_inode_range(struct inode *inode,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_cached_dealloc_ctxt dealloc;
struct address_space *mapping = inode->i_mapping;
struct ocfs2_extent_tree et;
ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
ocfs2_init_dealloc_ctxt(&dealloc);
if (byte_len == 0)
@ -1458,9 +1441,9 @@ static int ocfs2_remove_inode_range(struct inode *inode,
/* Only do work for non-holes */
if (phys_cpos != 0) {
ret = __ocfs2_remove_inode_range(inode, di_bh, cpos,
phys_cpos, alloc_size,
&dealloc);
ret = ocfs2_remove_btree_range(inode, &et, cpos,
phys_cpos, alloc_size,
&dealloc);
if (ret) {
mlog_errno(ret);
goto out;

View file

@ -51,6 +51,9 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret);
int ocfs2_simple_size_update(struct inode *inode,
struct buffer_head *di_bh,
u64 new_i_size);
int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
u64 zero_to);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);

View file

@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include <asm/byteorder.h>
@ -37,6 +38,7 @@
#include "ocfs2.h"
#include "alloc.h"
#include "blockcheck.h"
#include "dlmglue.h"
#include "extent_map.h"
#include "file.h"
@ -214,12 +216,11 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
return 0;
}
int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
int create_ino)
void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
int create_ino)
{
struct super_block *sb;
struct ocfs2_super *osb;
int status = -EINVAL;
int use_plocks = 1;
mlog_entry("(0x%p, size:%llu)\n", inode,
@ -232,25 +233,17 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks())
use_plocks = 0;
/* this means that read_inode cannot create a superblock inode
* today. change if needed. */
if (!OCFS2_IS_VALID_DINODE(fe) ||
!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
mlog(0, "Invalid dinode: i_ino=%lu, i_blkno=%llu, "
"signature = %.*s, flags = 0x%x\n",
inode->i_ino,
(unsigned long long)le64_to_cpu(fe->i_blkno), 7,
fe->i_signature, le32_to_cpu(fe->i_flags));
goto bail;
}
/*
* These have all been checked by ocfs2_read_inode_block() or set
* by ocfs2_mknod_locked(), so a failure is a code bug.
*/
BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); /* This means that read_inode
cannot create a superblock
inode today. change if
that is needed. */
BUG_ON(!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)));
BUG_ON(le32_to_cpu(fe->i_fs_generation) != osb->fs_generation);
if (le32_to_cpu(fe->i_fs_generation) != osb->fs_generation) {
mlog(ML_ERROR, "file entry generation does not match "
"superblock! osb->fs_generation=%x, "
"fe->i_fs_generation=%x\n",
osb->fs_generation, le32_to_cpu(fe->i_fs_generation));
goto bail;
}
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
@ -284,14 +277,18 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
inode->i_nlink = le16_to_cpu(fe->i_links_count);
if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
inode->i_flags |= S_NOQUOTA;
}
if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
} else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) {
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
} else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) {
inode->i_flags |= S_NOQUOTA;
} else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) {
mlog(0, "superblock inode: i_ino=%lu\n", inode->i_ino);
/* we can't actually hit this as read_inode can't
@ -354,10 +351,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
ocfs2_set_inode_flags(inode);
status = 0;
bail:
mlog_exit(status);
return status;
mlog_exit_void();
}
static int ocfs2_read_locked_inode(struct inode *inode,
@ -460,11 +454,14 @@ static int ocfs2_read_locked_inode(struct inode *inode,
}
}
if (can_lock)
status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh,
OCFS2_BH_IGNORE_CACHE);
else
if (can_lock) {
status = ocfs2_read_inode_block_full(inode, &bh,
OCFS2_BH_IGNORE_CACHE);
} else {
status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
if (!status)
status = ocfs2_validate_inode_block(osb->sb, bh);
}
if (status < 0) {
mlog_errno(status);
goto bail;
@ -472,12 +469,6 @@ static int ocfs2_read_locked_inode(struct inode *inode,
status = -EINVAL;
fe = (struct ocfs2_dinode *) bh->b_data;
if (!OCFS2_IS_VALID_DINODE(fe)) {
mlog(0, "Invalid dinode #%llu: signature = %.*s\n",
(unsigned long long)args->fi_blkno, 7,
fe->i_signature);
goto bail;
}
/*
* This is a code bug. Right now the caller needs to
@ -491,10 +482,9 @@ static int ocfs2_read_locked_inode(struct inode *inode,
if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
S_ISBLK(le16_to_cpu(fe->i_mode)))
inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
if (ocfs2_populate_inode(inode, fe, 0) < 0)
goto bail;
ocfs2_populate_inode(inode, fe, 0);
BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
@ -547,8 +537,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
goto out;
}
status = ocfs2_journal_access(handle, inode, fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, inode, fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto out;
@ -615,7 +605,8 @@ static int ocfs2_remove_inode(struct inode *inode,
goto bail;
}
handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS);
handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS +
ocfs2_quota_trans_credits(inode->i_sb));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
@ -630,8 +621,8 @@ static int ocfs2_remove_inode(struct inode *inode,
}
/* set the inodes dtime */
status = ocfs2_journal_access(handle, inode, di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, inode, di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail_commit;
@ -647,6 +638,7 @@ static int ocfs2_remove_inode(struct inode *inode,
}
ocfs2_remove_from_cache(inode, di_bh);
vfs_dq_free_inode(inode);
status = ocfs2_free_dinode(handle, inode_alloc_inode,
inode_alloc_bh, di);
@ -929,7 +921,10 @@ void ocfs2_delete_inode(struct inode *inode)
mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
if (is_bad_inode(inode)) {
/* When we fail in read_inode() we mark inode as bad. The second test
* catches the case when inode allocation fails before allocating
* a block for inode. */
if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) {
mlog(0, "Skipping delete of bad inode\n");
goto bail;
}
@ -1195,8 +1190,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
mlog_entry("(inode %llu)\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
status = ocfs2_journal_access(handle, inode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, inode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto leave;
@ -1264,3 +1259,89 @@ void ocfs2_refresh_inode(struct inode *inode,
spin_unlock(&OCFS2_I(inode)->ip_lock);
}
int ocfs2_validate_inode_block(struct super_block *sb,
struct buffer_head *bh)
{
int rc;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
mlog(0, "Validating dinode %llu\n",
(unsigned long long)bh->b_blocknr);
BUG_ON(!buffer_uptodate(bh));
/*
* If the ecc fails, we return the error but otherwise
* leave the filesystem running. We know any error is
* local to this block.
*/
rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check);
if (rc) {
mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
(unsigned long long)bh->b_blocknr);
goto bail;
}
/*
* Errors after here are fatal.
*/
rc = -EINVAL;
if (!OCFS2_IS_VALID_DINODE(di)) {
ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n",
(unsigned long long)bh->b_blocknr, 7,
di->i_signature);
goto bail;
}
if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n",
(unsigned long long)bh->b_blocknr,
(unsigned long long)le64_to_cpu(di->i_blkno));
goto bail;
}
if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
ocfs2_error(sb,
"Invalid dinode #%llu: OCFS2_VALID_FL not set\n",
(unsigned long long)bh->b_blocknr);
goto bail;
}
if (le32_to_cpu(di->i_fs_generation) !=
OCFS2_SB(sb)->fs_generation) {
ocfs2_error(sb,
"Invalid dinode #%llu: fs_generation is %u\n",
(unsigned long long)bh->b_blocknr,
le32_to_cpu(di->i_fs_generation));
goto bail;
}
rc = 0;
bail:
return rc;
}
int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
int flags)
{
int rc;
struct buffer_head *tmp = *bh;
rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp,
flags, ocfs2_validate_inode_block);
/* If ocfs2_read_blocks() got us a new bh, pass it up. */
if (!rc && !*bh)
*bh = tmp;
return rc;
}
int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
{
return ocfs2_read_inode_block_full(inode, bh, 0);
}

View file

@ -128,8 +128,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
int sysfile_type);
int ocfs2_inode_init_private(struct inode *inode);
int ocfs2_inode_revalidate(struct dentry *dentry);
int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
int create_ino);
void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
int create_ino);
void ocfs2_read_inode(struct inode *inode);
void ocfs2_read_inode2(struct inode *inode, void *opaque);
ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf,
@ -142,6 +142,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
struct buffer_head *bh);
int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
struct buffer_head *ocfs2_bread(struct inode *inode,
int block, int *err, int reada);
void ocfs2_set_inode_flags(struct inode *inode);
void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
@ -153,4 +155,16 @@ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits);
}
/* Validate that a bh contains a valid inode */
int ocfs2_validate_inode_block(struct super_block *sb,
struct buffer_head *bh);
/*
* Read an inode block into *bh. If *bh is NULL, a bh will be allocated.
* This is a cached read. The inode will be validated with
* ocfs2_validate_inode_block().
*/
int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh);
/* The same, but can be passed OCFS2_BH_* flags */
int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
int flags);
#endif /* OCFS2_INODE_H */

View file

@ -35,6 +35,7 @@
#include "ocfs2.h"
#include "alloc.h"
#include "blockcheck.h"
#include "dir.h"
#include "dlmglue.h"
#include "extent_map.h"
@ -45,6 +46,7 @@
#include "slot_map.h"
#include "super.h"
#include "sysfile.h"
#include "quota.h"
#include "buffer_head_io.h"
@ -52,10 +54,10 @@ DEFINE_SPINLOCK(trans_inc_lock);
static int ocfs2_force_read_journal(struct inode *inode);
static int ocfs2_recover_node(struct ocfs2_super *osb,
int node_num);
int node_num, int slot_num);
static int __ocfs2_recovery_thread(void *arg);
static int ocfs2_commit_cache(struct ocfs2_super *osb);
static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
int dirty, int replayed);
static int ocfs2_trylock_journal(struct ocfs2_super *osb,
@ -64,6 +66,17 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
int slot);
static int ocfs2_commit_thread(void *arg);
static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
{
return __ocfs2_wait_on_mount(osb, 0);
}
static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
{
return __ocfs2_wait_on_mount(osb, 1);
}
/*
* The recovery_list is a simple linked list of node numbers to recover.
@ -256,11 +269,9 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
BUG_ON(max_buffs <= 0);
/* JBD might support this, but our journalling code doesn't yet. */
if (journal_current_handle()) {
mlog(ML_ERROR, "Recursive transaction attempted!\n");
BUG();
}
/* Nested transaction? Just return the handle... */
if (journal_current_handle())
return jbd2_journal_start(journal, max_buffs);
down_read(&osb->journal->j_trans_barrier);
@ -285,16 +296,18 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
int ocfs2_commit_trans(struct ocfs2_super *osb,
handle_t *handle)
{
int ret;
int ret, nested;
struct ocfs2_journal *journal = osb->journal;
BUG_ON(!handle);
nested = handle->h_ref > 1;
ret = jbd2_journal_stop(handle);
if (ret < 0)
mlog_errno(ret);
up_read(&journal->j_trans_barrier);
if (!nested)
up_read(&journal->j_trans_barrier);
return ret;
}
@ -357,10 +370,137 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
return status;
}
int ocfs2_journal_access(handle_t *handle,
struct inode *inode,
struct buffer_head *bh,
int type)
struct ocfs2_triggers {
struct jbd2_buffer_trigger_type ot_triggers;
int ot_offset;
};
static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
{
return container_of(triggers, struct ocfs2_triggers, ot_triggers);
}
static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
/*
* We aren't guaranteed to have the superblock here, so we
* must unconditionally compute the ecc data.
* __ocfs2_journal_access() will only set the triggers if
* metaecc is enabled.
*/
ocfs2_block_check_compute(data, size, data + ot->ot_offset);
}
/*
* Quota blocks have their own trigger because the struct ocfs2_block_check
* offset depends on the blocksize.
*/
static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
struct ocfs2_disk_dqtrailer *dqt =
ocfs2_block_dqtrailer(size, data);
/*
* We aren't guaranteed to have the superblock here, so we
* must unconditionally compute the ecc data.
* __ocfs2_journal_access() will only set the triggers if
* metaecc is enabled.
*/
ocfs2_block_check_compute(data, size, &dqt->dq_check);
}
/*
* Directory blocks also have their own trigger because the
* struct ocfs2_block_check offset depends on the blocksize.
*/
static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
struct ocfs2_dir_block_trailer *trailer =
ocfs2_dir_trailer_from_size(size, data);
/*
* We aren't guaranteed to have the superblock here, so we
* must unconditionally compute the ecc data.
* __ocfs2_journal_access() will only set the triggers if
* metaecc is enabled.
*/
ocfs2_block_check_compute(data, size, &trailer->db_check);
}
static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh)
{
mlog(ML_ERROR,
"ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
"bh->b_blocknr = %llu\n",
(unsigned long)bh,
(unsigned long long)bh->b_blocknr);
/* We aren't guaranteed to have the superblock here - but if we
* don't, it'll just crash. */
ocfs2_error(bh->b_assoc_map->host->i_sb,
"JBD2 has aborted our journal, ocfs2 cannot continue\n");
}
static struct ocfs2_triggers di_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dinode, i_check),
};
static struct ocfs2_triggers eb_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_extent_block, h_check),
};
static struct ocfs2_triggers gd_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
};
static struct ocfs2_triggers db_triggers = {
.ot_triggers = {
.t_commit = ocfs2_db_commit_trigger,
.t_abort = ocfs2_abort_trigger,
},
};
static struct ocfs2_triggers xb_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
};
static struct ocfs2_triggers dq_triggers = {
.ot_triggers = {
.t_commit = ocfs2_dq_commit_trigger,
.t_abort = ocfs2_abort_trigger,
},
};
static int __ocfs2_journal_access(handle_t *handle,
struct inode *inode,
struct buffer_head *bh,
struct ocfs2_triggers *triggers,
int type)
{
int status;
@ -406,6 +546,8 @@ int ocfs2_journal_access(handle_t *handle,
status = -EINVAL;
mlog(ML_ERROR, "Uknown access type!\n");
}
if (!status && ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)) && triggers)
jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
if (status < 0)
@ -416,6 +558,54 @@ int ocfs2_journal_access(handle_t *handle,
return status;
}
int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
return __ocfs2_journal_access(handle, inode, bh, &di_triggers,
type);
}
int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
return __ocfs2_journal_access(handle, inode, bh, &eb_triggers,
type);
}
int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
return __ocfs2_journal_access(handle, inode, bh, &gd_triggers,
type);
}
int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
return __ocfs2_journal_access(handle, inode, bh, &db_triggers,
type);
}
int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
return __ocfs2_journal_access(handle, inode, bh, &xb_triggers,
type);
}
int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
return __ocfs2_journal_access(handle, inode, bh, &dq_triggers,
type);
}
int ocfs2_journal_access(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type)
{
return __ocfs2_journal_access(handle, inode, bh, NULL, type);
}
int ocfs2_journal_dirty(handle_t *handle,
struct buffer_head *bh)
{
@ -434,20 +624,6 @@ int ocfs2_journal_dirty(handle_t *handle,
return status;
}
#ifdef CONFIG_OCFS2_COMPAT_JBD
int ocfs2_journal_dirty_data(handle_t *handle,
struct buffer_head *bh)
{
int err = journal_dirty_data(handle, bh);
if (err)
mlog_errno(err);
/* TODO: When we can handle it, abort the handle and go RO on
* error here. */
return err;
}
#endif
#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
void ocfs2_set_journal_params(struct ocfs2_super *osb)
@ -587,17 +763,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
mlog_entry_void();
fe = (struct ocfs2_dinode *)bh->b_data;
if (!OCFS2_IS_VALID_DINODE(fe)) {
/* This is called from startup/shutdown which will
* handle the errors in a specific manner, so no need
* to call ocfs2_error() here. */
mlog(ML_ERROR, "Journal dinode %llu has invalid "
"signature: %.*s",
(unsigned long long)le64_to_cpu(fe->i_blkno), 7,
fe->i_signature);
status = -EIO;
goto out;
}
/* The journal bh on the osb always comes from ocfs2_journal_init()
* and was validated there inside ocfs2_inode_lock_full(). It's a
* code bug if we mess it up. */
BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
flags = le32_to_cpu(fe->id1.journal1.ij_flags);
if (dirty)
@ -609,11 +779,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
if (replayed)
ocfs2_bump_recovery_generation(fe);
ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
status = ocfs2_write_block(osb, bh, journal->j_inode);
if (status < 0)
mlog_errno(status);
out:
mlog_exit(status);
return status;
}
@ -878,6 +1048,7 @@ struct ocfs2_la_recovery_item {
int lri_slot;
struct ocfs2_dinode *lri_la_dinode;
struct ocfs2_dinode *lri_tl_dinode;
struct ocfs2_quota_recovery *lri_qrec;
};
/* Does the second half of the recovery process. By this point, the
@ -898,6 +1069,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
struct ocfs2_super *osb = journal->j_osb;
struct ocfs2_dinode *la_dinode, *tl_dinode;
struct ocfs2_la_recovery_item *item, *n;
struct ocfs2_quota_recovery *qrec;
LIST_HEAD(tmp_la_list);
mlog_entry_void();
@ -913,6 +1085,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
ocfs2_wait_on_quotas(osb);
la_dinode = item->lri_la_dinode;
if (la_dinode) {
mlog(0, "Clean up local alloc %llu\n",
@ -943,6 +1117,16 @@ void ocfs2_complete_recovery(struct work_struct *work)
if (ret < 0)
mlog_errno(ret);
qrec = item->lri_qrec;
if (qrec) {
mlog(0, "Recovering quota files");
ret = ocfs2_finish_quota_recovery(osb, qrec,
item->lri_slot);
if (ret < 0)
mlog_errno(ret);
/* Recovery info is already freed now */
}
kfree(item);
}
@ -956,7 +1140,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
int slot_num,
struct ocfs2_dinode *la_dinode,
struct ocfs2_dinode *tl_dinode)
struct ocfs2_dinode *tl_dinode,
struct ocfs2_quota_recovery *qrec)
{
struct ocfs2_la_recovery_item *item;
@ -971,6 +1156,9 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
if (tl_dinode)
kfree(tl_dinode);
if (qrec)
ocfs2_free_quota_recovery(qrec);
mlog_errno(-ENOMEM);
return;
}
@ -979,6 +1167,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
item->lri_la_dinode = la_dinode;
item->lri_slot = slot_num;
item->lri_tl_dinode = tl_dinode;
item->lri_qrec = qrec;
spin_lock(&journal->j_lock);
list_add_tail(&item->lri_list, &journal->j_la_cleanups);
@ -998,6 +1187,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
ocfs2_queue_recovery_completion(journal,
osb->slot_num,
osb->local_alloc_copy,
NULL,
NULL);
ocfs2_schedule_truncate_log_flush(osb, 0);
@ -1006,11 +1196,26 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
}
}
void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
{
if (osb->quota_rec) {
ocfs2_queue_recovery_completion(osb->journal,
osb->slot_num,
NULL,
NULL,
osb->quota_rec);
osb->quota_rec = NULL;
}
}
static int __ocfs2_recovery_thread(void *arg)
{
int status, node_num;
int status, node_num, slot_num;
struct ocfs2_super *osb = arg;
struct ocfs2_recovery_map *rm = osb->recovery_map;
int *rm_quota = NULL;
int rm_quota_used = 0, i;
struct ocfs2_quota_recovery *qrec;
mlog_entry_void();
@ -1019,6 +1224,11 @@ static int __ocfs2_recovery_thread(void *arg)
goto bail;
}
rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS);
if (!rm_quota) {
status = -ENOMEM;
goto bail;
}
restart:
status = ocfs2_super_lock(osb, 1);
if (status < 0) {
@ -1032,8 +1242,28 @@ static int __ocfs2_recovery_thread(void *arg)
* clear it until ocfs2_recover_node() has succeeded. */
node_num = rm->rm_entries[0];
spin_unlock(&osb->osb_lock);
mlog(0, "checking node %d\n", node_num);
slot_num = ocfs2_node_num_to_slot(osb, node_num);
if (slot_num == -ENOENT) {
status = 0;
mlog(0, "no slot for this node, so no recovery"
"required.\n");
goto skip_recovery;
}
mlog(0, "node %d was using slot %d\n", node_num, slot_num);
status = ocfs2_recover_node(osb, node_num);
/* It is a bit subtle with quota recovery. We cannot do it
* immediately because we have to obtain cluster locks from
* quota files and we also don't want to just skip it because
* then quota usage would be out of sync until some node takes
* the slot. So we remember which nodes need quota recovery
* and when everything else is done, we recover quotas. */
for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
if (i == rm_quota_used)
rm_quota[rm_quota_used++] = slot_num;
status = ocfs2_recover_node(osb, node_num, slot_num);
skip_recovery:
if (!status) {
ocfs2_recovery_map_clear(osb, node_num);
} else {
@ -1055,13 +1285,27 @@ static int __ocfs2_recovery_thread(void *arg)
if (status < 0)
mlog_errno(status);
/* Now it is right time to recover quotas... We have to do this under
* superblock lock so that noone can start using the slot (and crash)
* before we recover it */
for (i = 0; i < rm_quota_used; i++) {
qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
if (IS_ERR(qrec)) {
status = PTR_ERR(qrec);
mlog_errno(status);
continue;
}
ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
NULL, NULL, qrec);
}
ocfs2_super_unlock(osb, 1);
/* We always run recovery on our own orphan dir - the dead
* node(s) may have disallowd a previos inode delete. Re-processing
* is therefore required. */
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
NULL);
NULL, NULL);
bail:
mutex_lock(&osb->recovery_lock);
@ -1076,6 +1320,9 @@ static int __ocfs2_recovery_thread(void *arg)
mutex_unlock(&osb->recovery_lock);
if (rm_quota)
kfree(rm_quota);
mlog_exit(status);
/* no one is callint kthread_stop() for us so the kthread() api
* requires that we call do_exit(). And it isn't exported, but
@ -1135,8 +1382,7 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
}
SET_INODE_JOURNAL(inode);
status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh,
OCFS2_BH_IGNORE_CACHE);
status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
@ -1268,6 +1514,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
osb->slot_recovery_generations[slot_num] =
ocfs2_get_recovery_generation(fe);
ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
status = ocfs2_write_block(osb, bh, inode);
if (status < 0)
mlog_errno(status);
@ -1304,31 +1551,19 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
* far less concerning.
*/
static int ocfs2_recover_node(struct ocfs2_super *osb,
int node_num)
int node_num, int slot_num)
{
int status = 0;
int slot_num;
struct ocfs2_dinode *la_copy = NULL;
struct ocfs2_dinode *tl_copy = NULL;
mlog_entry("(node_num=%d, osb->node_num = %d)\n",
node_num, osb->node_num);
mlog(0, "checking node %d\n", node_num);
mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n",
node_num, slot_num, osb->node_num);
/* Should not ever be called to recover ourselves -- in that
* case we should've called ocfs2_journal_load instead. */
BUG_ON(osb->node_num == node_num);
slot_num = ocfs2_node_num_to_slot(osb, node_num);
if (slot_num == -ENOENT) {
status = 0;
mlog(0, "no slot for this node, so no recovery required.\n");
goto done;
}
mlog(0, "node %d was using slot %d\n", node_num, slot_num);
status = ocfs2_replay_journal(osb, node_num, slot_num);
if (status < 0) {
if (status == -EBUSY) {
@ -1364,7 +1599,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
/* This will kfree the memory pointed to by la_copy and tl_copy */
ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
tl_copy);
tl_copy, NULL);
status = 0;
done:
@ -1659,13 +1894,14 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
return ret;
}
static int ocfs2_wait_on_mount(struct ocfs2_super *osb)
static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
{
/* This check is good because ocfs2 will wait on our recovery
* thread before changing it to something other than MOUNTED
* or DISABLED. */
wait_event(osb->osb_mount_event,
atomic_read(&osb->vol_state) == VOLUME_MOUNTED ||
(!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
atomic_read(&osb->vol_state) == VOLUME_DISABLED);
/* If there's an error on mount, then we may never get to the

View file

@ -27,12 +27,7 @@
#define OCFS2_JOURNAL_H
#include <linux/fs.h>
#ifndef CONFIG_OCFS2_COMPAT_JBD
# include <linux/jbd2.h>
#else
# include <linux/jbd.h>
# include "ocfs2_jbd_compat.h"
#endif
#include <linux/jbd2.h>
enum ocfs2_journal_state {
OCFS2_JOURNAL_FREE = 0,
@ -173,6 +168,7 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb,
int node_num);
int ocfs2_mark_dead_nodes(struct ocfs2_super *osb);
void ocfs2_complete_mount_recovery(struct ocfs2_super *osb);
void ocfs2_complete_quota_recovery(struct ocfs2_super *osb);
static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb)
{
@ -216,9 +212,12 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
* ocfs2_extend_trans - Extend a handle by nblocks credits. This may
* commit the handle to disk in the process, but will
* not release any locks taken during the transaction.
* ocfs2_journal_access - Notify the handle that we want to journal this
* ocfs2_journal_access* - Notify the handle that we want to journal this
* buffer. Will have to call ocfs2_journal_dirty once
* we've actually dirtied it. Type is one of . or .
* Always call the specific flavor of
* ocfs2_journal_access_*() unless you intend to
* manage the checksum by hand.
* ocfs2_journal_dirty - Mark a journalled buffer as having dirty data.
* ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before
* the current handle commits.
@ -248,10 +247,29 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks);
#define OCFS2_JOURNAL_ACCESS_WRITE 1
#define OCFS2_JOURNAL_ACCESS_UNDO 2
int ocfs2_journal_access(handle_t *handle,
struct inode *inode,
struct buffer_head *bh,
int type);
/* ocfs2_inode */
int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* ocfs2_extent_block */
int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* ocfs2_group_desc */
int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* ocfs2_xattr_block */
int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* quota blocks */
int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* dirblock */
int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/* Anything that has no ecc */
int ocfs2_journal_access(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
/*
* A word about the journal_access/journal_dirty "dance". It is
* entirely legal to journal_access a buffer more than once (as long
@ -273,10 +291,6 @@ int ocfs2_journal_access(handle_t *handle,
*/
int ocfs2_journal_dirty(handle_t *handle,
struct buffer_head *bh);
#ifdef CONFIG_OCFS2_COMPAT_JBD
int ocfs2_journal_dirty_data(handle_t *handle,
struct buffer_head *bh);
#endif
/*
* Credit Macros:
@ -293,6 +307,37 @@ int ocfs2_journal_dirty_data(handle_t *handle,
/* extended attribute block update */
#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
/* global quotafile inode update, data block */
#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
/*
* The two writes below can accidentally see global info dirty due
* to set_info() quotactl so make them prepared for the writes.
*/
/* quota data block, global info */
/* Write to local quota file */
#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1)
/* global quota data block, local quota data block, global quota inode,
* global quota info */
#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3)
static inline int ocfs2_quota_trans_credits(struct super_block *sb)
{
int credits = 0;
if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA))
credits += OCFS2_QWRITE_CREDITS;
if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA))
credits += OCFS2_QWRITE_CREDITS;
return credits;
}
/* Number of credits needed for removing quota structure from file */
int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
/* Number of credits needed for initialization of new quota structure */
int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
/* group extend. inode update and last group update. */
#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
@ -303,8 +348,11 @@ int ocfs2_journal_dirty_data(handle_t *handle,
* prev. group desc. if we relink. */
#define OCFS2_SUBALLOC_ALLOC (3)
#define OCFS2_INLINE_TO_EXTENTS_CREDITS (OCFS2_SUBALLOC_ALLOC \
+ OCFS2_INODE_UPDATE_CREDITS)
static inline int ocfs2_inline_to_extents_credits(struct super_block *sb)
{
return OCFS2_SUBALLOC_ALLOC + OCFS2_INODE_UPDATE_CREDITS +
ocfs2_quota_trans_credits(sb);
}
/* dinode + group descriptor update. We don't relink on free yet. */
#define OCFS2_SUBALLOC_FREE (2)
@ -313,16 +361,23 @@ int ocfs2_journal_dirty_data(handle_t *handle,
#define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \
+ OCFS2_TRUNCATE_LOG_UPDATE)
#define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS)
static inline int ocfs2_remove_extent_credits(struct super_block *sb)
{
return OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS +
ocfs2_quota_trans_credits(sb);
}
/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
* bitmap block for the new bit) */
#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
/* parent fe, parent block, new file entry, inode alloc fe, inode alloc
* group descriptor + mkdir/symlink blocks */
#define OCFS2_MKNOD_CREDITS (3 + OCFS2_SUBALLOC_ALLOC \
+ OCFS2_DIR_LINK_ADDITIONAL_CREDITS)
* group descriptor + mkdir/symlink blocks + quota update */
static inline int ocfs2_mknod_credits(struct super_block *sb)
{
return 3 + OCFS2_SUBALLOC_ALLOC + OCFS2_DIR_LINK_ADDITIONAL_CREDITS +
ocfs2_quota_trans_credits(sb);
}
/* local alloc metadata change + main bitmap updates */
#define OCFS2_WINDOW_MOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS \
@ -332,13 +387,21 @@ int ocfs2_journal_dirty_data(handle_t *handle,
* for the dinode, one for the new block. */
#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
/* file update (nlink, etc) + directory mtime/ctime + dir entry block */
#define OCFS2_LINK_CREDITS (2*OCFS2_INODE_UPDATE_CREDITS + 1)
/* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota
* update on dir */
static inline int ocfs2_link_credits(struct super_block *sb)
{
return 2*OCFS2_INODE_UPDATE_CREDITS + 1 +
ocfs2_quota_trans_credits(sb);
}
/* inode + dir inode (if we unlink a dir), + dir entry block + orphan
* dir inode link */
#define OCFS2_UNLINK_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 1 \
+ OCFS2_LINK_CREDITS)
static inline int ocfs2_unlink_credits(struct super_block *sb)
{
/* The quota update from ocfs2_link_credits is unused here... */
return 2 * OCFS2_INODE_UPDATE_CREDITS + 1 + ocfs2_link_credits(sb);
}
/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
* inode alloc group descriptor */
@ -347,8 +410,10 @@ int ocfs2_journal_dirty_data(handle_t *handle,
/* dinode update, old dir dinode update, new dir dinode update, old
* dir dir entry, new dir dir entry, dir entry update for renaming
* directory + target unlink */
#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \
+ OCFS2_UNLINK_CREDITS)
static inline int ocfs2_rename_credits(struct super_block *sb)
{
return 3 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_unlink_credits(sb);
}
/* global bitmap dinode, group desc., relinked group,
* suballocator dinode, group desc., relinked group,
@ -386,18 +451,19 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
* credit for the dinode there. */
extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks;
return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks +
ocfs2_quota_trans_credits(sb);
}
static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
{
int blocks = OCFS2_MKNOD_CREDITS;
int blocks = ocfs2_mknod_credits(sb);
/* links can be longer than one block so we may update many
* within our single allocated extent. */
blocks += ocfs2_clusters_to_blocks(sb, 1);
return blocks;
return blocks + ocfs2_quota_trans_credits(sb);
}
static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb,
@ -434,6 +500,8 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
/* update to the truncate log. */
credits += OCFS2_TRUNCATE_LOG_UPDATE;
credits += ocfs2_quota_trans_credits(sb);
return credits;
}

View file

@ -36,6 +36,7 @@
#include "ocfs2.h"
#include "alloc.h"
#include "blockcheck.h"
#include "dlmglue.h"
#include "inode.h"
#include "journal.h"
@ -248,8 +249,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
goto bail;
}
status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
&alloc_bh, OCFS2_BH_IGNORE_CACHE);
status = ocfs2_read_inode_block_full(inode, &alloc_bh,
OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
@ -382,8 +383,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
}
memcpy(alloc_copy, alloc, bh->b_size);
status = ocfs2_journal_access(handle, local_alloc_inode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, local_alloc_inode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto out_commit;
@ -459,8 +460,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
mutex_lock(&inode->i_mutex);
status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
&alloc_bh, OCFS2_BH_IGNORE_CACHE);
status = ocfs2_read_inode_block_full(inode, &alloc_bh,
OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
@ -476,6 +477,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
ocfs2_clear_local_alloc(alloc);
ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
status = ocfs2_write_block(osb, alloc_bh, inode);
if (status < 0)
mlog_errno(status);
@ -762,9 +764,9 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
* delete bits from it! */
*num_bits = bits_wanted;
status = ocfs2_journal_access(handle, local_alloc_inode,
osb->local_alloc_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, local_alloc_inode,
osb->local_alloc_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail;
@ -1240,9 +1242,9 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
}
memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
status = ocfs2_journal_access(handle, local_alloc_inode,
osb->local_alloc_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, local_alloc_inode,
osb->local_alloc_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail;

View file

@ -40,6 +40,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_NAMEI
#include <cluster/masklog.h>
@ -61,17 +62,18 @@
#include "sysfile.h"
#include "uptodate.h"
#include "xattr.h"
#include "acl.h"
#include "buffer_head_io.h"
static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct inode *dir,
struct dentry *dentry, int mode,
struct inode *inode,
struct dentry *dentry,
dev_t dev,
struct buffer_head **new_fe_bh,
struct buffer_head *parent_fe_bh,
handle_t *handle,
struct inode **ret_inode,
struct ocfs2_alloc_context *inode_ac);
static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
@ -186,6 +188,35 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
return ret;
}
static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
{
struct inode *inode;
inode = new_inode(dir->i_sb);
if (!inode) {
mlog(ML_ERROR, "new_inode failed!\n");
return NULL;
}
/* populate as many fields early on as possible - many of
* these are used by the support functions here and in
* callers. */
if (S_ISDIR(mode))
inode->i_nlink = 2;
else
inode->i_nlink = 1;
inode->i_uid = current_fsuid();
if (dir->i_mode & S_ISGID) {
inode->i_gid = dir->i_gid;
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
inode->i_gid = current_fsgid();
inode->i_mode = mode;
vfs_dq_init(inode);
return inode;
}
static int ocfs2_mknod(struct inode *dir,
struct dentry *dentry,
int mode,
@ -201,6 +232,13 @@ static int ocfs2_mknod(struct inode *dir,
struct inode *inode = NULL;
struct ocfs2_alloc_context *inode_ac = NULL;
struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_alloc_context *xattr_ac = NULL;
int want_clusters = 0;
int xattr_credits = 0;
struct ocfs2_security_xattr_info si = {
.enable = 1,
};
int did_quota_inode = 0;
mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
(unsigned long)dev, dentry->d_name.len,
@ -250,17 +288,46 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
/* Reserve a cluster if creating an extent based directory. */
if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
status = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
inode = ocfs2_get_init_inode(dir, mode);
if (!inode) {
status = -ENOMEM;
mlog_errno(status);
goto leave;
}
/* get security xattr */
status = ocfs2_init_security_get(inode, dir, &si);
if (status) {
if (status == -EOPNOTSUPP)
si.enable = 0;
else {
mlog_errno(status);
goto leave;
}
}
handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS);
/* calculate meta data/clusters for setting security and acl xattr */
status = ocfs2_calc_xattr_init(dir, parent_fe_bh, mode,
&si, &want_clusters,
&xattr_credits, &xattr_ac);
if (status < 0) {
mlog_errno(status);
goto leave;
}
/* Reserve a cluster if creating an extent based directory. */
if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb))
want_clusters += 1;
status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
goto leave;
}
handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb) +
xattr_credits);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
@ -268,10 +335,19 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
/* We don't use standard VFS wrapper because we don't want vfs_dq_init
* to be called. */
if (sb_any_quota_active(osb->sb) &&
osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
status = -EDQUOT;
goto leave;
}
did_quota_inode = 1;
/* do the real work now. */
status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev,
status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev,
&new_fe_bh, parent_fe_bh, handle,
&inode, inode_ac);
inode_ac);
if (status < 0) {
mlog_errno(status);
goto leave;
@ -285,8 +361,8 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
status = ocfs2_journal_access(handle, dir, parent_fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, dir, parent_fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto leave;
@ -300,6 +376,22 @@ static int ocfs2_mknod(struct inode *dir,
inc_nlink(dir);
}
status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
xattr_ac, data_ac);
if (status < 0) {
mlog_errno(status);
goto leave;
}
if (si.enable) {
status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
xattr_ac, data_ac);
if (status < 0) {
mlog_errno(status);
goto leave;
}
}
status = ocfs2_add_entry(handle, dentry, inode,
OCFS2_I(inode)->ip_blkno, parent_fe_bh,
de_bh);
@ -320,6 +412,8 @@ static int ocfs2_mknod(struct inode *dir,
d_instantiate(dentry, inode);
status = 0;
leave:
if (status < 0 && did_quota_inode)
vfs_dq_free_inode(inode);
if (handle)
ocfs2_commit_trans(osb, handle);
@ -331,9 +425,13 @@ static int ocfs2_mknod(struct inode *dir,
brelse(new_fe_bh);
brelse(de_bh);
brelse(parent_fe_bh);
kfree(si.name);
kfree(si.value);
if ((status < 0) && inode)
if ((status < 0) && inode) {
clear_nlink(inode);
iput(inode);
}
if (inode_ac)
ocfs2_free_alloc_context(inode_ac);
@ -341,6 +439,9 @@ static int ocfs2_mknod(struct inode *dir,
if (data_ac)
ocfs2_free_alloc_context(data_ac);
if (xattr_ac)
ocfs2_free_alloc_context(xattr_ac);
mlog_exit(status);
return status;
@ -348,12 +449,12 @@ static int ocfs2_mknod(struct inode *dir,
static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct inode *dir,
struct dentry *dentry, int mode,
struct inode *inode,
struct dentry *dentry,
dev_t dev,
struct buffer_head **new_fe_bh,
struct buffer_head *parent_fe_bh,
handle_t *handle,
struct inode **ret_inode,
struct ocfs2_alloc_context *inode_ac)
{
int status = 0;
@ -361,14 +462,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct ocfs2_extent_list *fel;
u64 fe_blkno = 0;
u16 suballoc_bit;
struct inode *inode = NULL;
mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
(unsigned long)dev, dentry->d_name.len,
mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
inode->i_mode, (unsigned long)dev, dentry->d_name.len,
dentry->d_name.name);
*new_fe_bh = NULL;
*ret_inode = NULL;
status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
&fe_blkno);
@ -377,23 +476,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
goto leave;
}
inode = new_inode(dir->i_sb);
if (!inode) {
status = -ENOMEM;
mlog(ML_ERROR, "new_inode failed!\n");
goto leave;
}
/* populate as many fields early on as possible - many of
* these are used by the support functions here and in
* callers. */
inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
OCFS2_I(inode)->ip_blkno = fe_blkno;
if (S_ISDIR(mode))
inode->i_nlink = 2;
else
inode->i_nlink = 1;
inode->i_mode = mode;
spin_lock(&osb->osb_lock);
inode->i_generation = osb->s_next_generation++;
spin_unlock(&osb->osb_lock);
@ -406,8 +493,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
}
ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh);
status = ocfs2_journal_access(handle, inode, *new_fe_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
status = ocfs2_journal_access_di(handle, inode, *new_fe_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) {
mlog_errno(status);
goto leave;
@ -421,17 +508,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
fe->i_blkno = cpu_to_le64(fe_blkno);
fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
fe->i_uid = cpu_to_le32(current_fsuid());
if (dir->i_mode & S_ISGID) {
fe->i_gid = cpu_to_le32(dir->i_gid);
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
fe->i_gid = cpu_to_le32(current_fsgid());
fe->i_mode = cpu_to_le16(mode);
if (S_ISCHR(mode) || S_ISBLK(mode))
fe->i_uid = cpu_to_le32(inode->i_uid);
fe->i_gid = cpu_to_le32(inode->i_gid);
fe->i_mode = cpu_to_le16(inode->i_mode);
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
fe->i_links_count = cpu_to_le16(inode->i_nlink);
fe->i_last_eb_blk = 0;
@ -446,7 +527,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
/*
* If supported, directories start with inline data.
*/
if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) {
if (S_ISDIR(inode->i_mode) && ocfs2_supports_inline_data(osb)) {
u16 feat = le16_to_cpu(fe->i_dyn_features);
fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
@ -465,15 +546,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
goto leave;
}
if (ocfs2_populate_inode(inode, fe, 1) < 0) {
mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
"i_blkno=%llu, i_ino=%lu\n",
(unsigned long long)(*new_fe_bh)->b_blocknr,
(unsigned long long)le64_to_cpu(fe->i_blkno),
inode->i_ino);
BUG();
}
ocfs2_populate_inode(inode, fe, 1);
ocfs2_inode_set_new(osb, inode);
if (!ocfs2_mount_local(osb)) {
status = ocfs2_create_new_inode_locks(inode);
@ -484,17 +557,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
status = 0; /* error in ocfs2_create_new_inode_locks is not
* critical */
*ret_inode = inode;
leave:
if (status < 0) {
if (*new_fe_bh) {
brelse(*new_fe_bh);
*new_fe_bh = NULL;
}
if (inode) {
clear_nlink(inode);
iput(inode);
}
}
mlog_exit(status);
@ -588,7 +656,7 @@ static int ocfs2_link(struct dentry *old_dentry,
goto out_unlock_inode;
}
handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS);
handle = ocfs2_start_trans(osb, ocfs2_link_credits(osb->sb));
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
handle = NULL;
@ -596,8 +664,8 @@ static int ocfs2_link(struct dentry *old_dentry,
goto out_unlock_inode;
}
err = ocfs2_journal_access(handle, inode, fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
err = ocfs2_journal_access_di(handle, inode, fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (err < 0) {
mlog_errno(err);
goto out_commit;
@ -775,7 +843,7 @@ static int ocfs2_unlink(struct inode *dir,
}
}
handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS);
handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
@ -783,8 +851,8 @@ static int ocfs2_unlink(struct inode *dir,
goto leave;
}
status = ocfs2_journal_access(handle, inode, fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, inode, fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto leave;
@ -1181,7 +1249,7 @@ static int ocfs2_rename(struct inode *old_dir,
}
}
handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS);
handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
@ -1197,8 +1265,8 @@ static int ocfs2_rename(struct inode *old_dir,
goto bail;
}
}
status = ocfs2_journal_access(handle, new_inode, newfe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, new_inode, newfe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail;
@ -1244,8 +1312,8 @@ static int ocfs2_rename(struct inode *old_dir,
old_inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(old_inode);
status = ocfs2_journal_access(handle, old_inode, old_inode_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, old_inode, old_inode_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status >= 0) {
old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
@ -1321,9 +1389,9 @@ static int ocfs2_rename(struct inode *old_dir,
(int)old_dir_nlink, old_dir->i_nlink);
} else {
struct ocfs2_dinode *fe;
status = ocfs2_journal_access(handle, old_dir,
old_dir_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, old_dir,
old_dir_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
fe->i_links_count = cpu_to_le16(old_dir->i_nlink);
status = ocfs2_journal_dirty(handle, old_dir_bh);
@ -1496,6 +1564,13 @@ static int ocfs2_symlink(struct inode *dir,
handle_t *handle = NULL;
struct ocfs2_alloc_context *inode_ac = NULL;
struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_alloc_context *xattr_ac = NULL;
int want_clusters = 0;
int xattr_credits = 0;
struct ocfs2_security_xattr_info si = {
.enable = 1,
};
int did_quota = 0, did_quota_inode = 0;
mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
dentry, symname, dentry->d_name.len, dentry->d_name.name);
@ -1542,17 +1617,46 @@ static int ocfs2_symlink(struct inode *dir,
goto bail;
}
/* don't reserve bitmap space for fast symlinks. */
if (l > ocfs2_fast_symlink_chars(sb)) {
status = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
inode = ocfs2_get_init_inode(dir, S_IFLNK | S_IRWXUGO);
if (!inode) {
status = -ENOMEM;
mlog_errno(status);
goto bail;
}
/* get security xattr */
status = ocfs2_init_security_get(inode, dir, &si);
if (status) {
if (status == -EOPNOTSUPP)
si.enable = 0;
else {
mlog_errno(status);
goto bail;
}
}
handle = ocfs2_start_trans(osb, credits);
/* calculate meta data/clusters for setting security xattr */
if (si.enable) {
status = ocfs2_calc_security_init(dir, &si, &want_clusters,
&xattr_credits, &xattr_ac);
if (status < 0) {
mlog_errno(status);
goto bail;
}
}
/* don't reserve bitmap space for fast symlinks. */
if (l > ocfs2_fast_symlink_chars(sb))
want_clusters += 1;
status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
goto bail;
}
handle = ocfs2_start_trans(osb, credits + xattr_credits);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
@ -1560,10 +1664,18 @@ static int ocfs2_symlink(struct inode *dir,
goto bail;
}
status = ocfs2_mknod_locked(osb, dir, dentry,
S_IFLNK | S_IRWXUGO, 0,
&new_fe_bh, parent_fe_bh, handle,
&inode, inode_ac);
/* We don't use standard VFS wrapper because we don't want vfs_dq_init
* to be called. */
if (sb_any_quota_active(osb->sb) &&
osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
status = -EDQUOT;
goto bail;
}
did_quota_inode = 1;
status = ocfs2_mknod_locked(osb, dir, inode, dentry,
0, &new_fe_bh, parent_fe_bh, handle,
inode_ac);
if (status < 0) {
mlog_errno(status);
goto bail;
@ -1576,6 +1688,12 @@ static int ocfs2_symlink(struct inode *dir,
u32 offset = 0;
inode->i_op = &ocfs2_symlink_inode_operations;
if (vfs_dq_alloc_space_nodirty(inode,
ocfs2_clusters_to_bytes(osb->sb, 1))) {
status = -EDQUOT;
goto bail;
}
did_quota = 1;
status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
new_fe_bh,
handle, data_ac, NULL,
@ -1614,6 +1732,15 @@ static int ocfs2_symlink(struct inode *dir,
}
}
if (si.enable) {
status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
xattr_ac, data_ac);
if (status < 0) {
mlog_errno(status);
goto bail;
}
}
status = ocfs2_add_entry(handle, dentry, inode,
le64_to_cpu(fe->i_blkno), parent_fe_bh,
de_bh);
@ -1632,6 +1759,11 @@ static int ocfs2_symlink(struct inode *dir,
dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
bail:
if (status < 0 && did_quota)
vfs_dq_free_space_nodirty(inode,
ocfs2_clusters_to_bytes(osb->sb, 1));
if (status < 0 && did_quota_inode)
vfs_dq_free_inode(inode);
if (handle)
ocfs2_commit_trans(osb, handle);
@ -1640,12 +1772,18 @@ static int ocfs2_symlink(struct inode *dir,
brelse(new_fe_bh);
brelse(parent_fe_bh);
brelse(de_bh);
kfree(si.name);
kfree(si.value);
if (inode_ac)
ocfs2_free_alloc_context(inode_ac);
if (data_ac)
ocfs2_free_alloc_context(data_ac);
if ((status < 0) && inode)
if (xattr_ac)
ocfs2_free_alloc_context(xattr_ac);
if ((status < 0) && inode) {
clear_nlink(inode);
iput(inode);
}
mlog_exit(status);
@ -1754,16 +1892,14 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
status = ocfs2_read_block(orphan_dir_inode,
OCFS2_I(orphan_dir_inode)->ip_blkno,
&orphan_dir_bh);
status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh);
if (status < 0) {
mlog_errno(status);
goto leave;
}
status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, orphan_dir_inode, orphan_dir_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto leave;
@ -1850,8 +1986,8 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
goto leave;
}
status = ocfs2_journal_access(handle,orphan_dir_inode, orphan_dir_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle,orphan_dir_inode, orphan_dir_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto leave;

View file

@ -161,6 +161,7 @@ enum ocfs2_vol_state
{
VOLUME_INIT = 0,
VOLUME_MOUNTED,
VOLUME_MOUNTED_QUOTAS,
VOLUME_DISMOUNTED,
VOLUME_DISABLED
};
@ -195,6 +196,9 @@ enum ocfs2_mount_options
OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */
OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */
OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
};
#define OCFS2_OSB_SOFT_RO 0x0001
@ -205,6 +209,7 @@ enum ocfs2_mount_options
struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
struct ocfs2_quota_recovery;
struct ocfs2_super
{
struct task_struct *commit_task;
@ -286,10 +291,11 @@ struct ocfs2_super
char *local_alloc_debug_buf;
#endif
/* Next two fields are for local node slot recovery during
/* Next three fields are for local node slot recovery during
* mount. */
int dirty;
struct ocfs2_dinode *local_alloc_copy;
struct ocfs2_quota_recovery *quota_rec;
struct ocfs2_alloc_stats alloc_stats;
char dev_str[20]; /* "major,minor" of the device */
@ -333,6 +339,10 @@ struct ocfs2_super
#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
/* Useful typedef for passing around journal access functions */
typedef int (*ocfs2_journal_access_func)(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int type);
static inline int ocfs2_should_order_data(struct inode *inode)
{
if (!S_ISREG(inode->i_mode))
@ -376,6 +386,13 @@ static inline int ocfs2_supports_xattr(struct ocfs2_super *osb)
return 0;
}
static inline int ocfs2_meta_ecc(struct ocfs2_super *osb)
{
if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_META_ECC)
return 1;
return 0;
}
/* set / clear functions because cluster events can make these happen
* in parallel so we want the transitions to be atomic. this also
* means that any future flags osb_flags must be protected by spinlock
@ -443,39 +460,19 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
#define OCFS2_IS_VALID_DINODE(ptr) \
(!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
#define OCFS2_RO_ON_INVALID_DINODE(__sb, __di) do { \
typeof(__di) ____di = (__di); \
ocfs2_error((__sb), \
"Dinode # %llu has bad signature %.*s", \
(unsigned long long)le64_to_cpu((____di)->i_blkno), 7, \
(____di)->i_signature); \
} while (0)
#define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \
(!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
#define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb) do { \
typeof(__eb) ____eb = (__eb); \
ocfs2_error((__sb), \
"Extent Block # %llu has bad signature %.*s", \
(unsigned long long)le64_to_cpu((____eb)->h_blkno), 7, \
(____eb)->h_signature); \
} while (0)
#define OCFS2_IS_VALID_GROUP_DESC(ptr) \
(!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE))
#define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd) do { \
typeof(__gd) ____gd = (__gd); \
ocfs2_error((__sb), \
"Group Descriptor # %llu has bad signature %.*s", \
(unsigned long long)le64_to_cpu((____gd)->bg_blkno), 7, \
(____gd)->bg_signature); \
} while (0)
#define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \
(!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE))
#define OCFS2_IS_VALID_DIR_TRAILER(ptr) \
(!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE))
static inline unsigned long ino_from_blkno(struct super_block *sb,
u64 blkno)
{
@ -632,5 +629,6 @@ static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
#define ocfs2_clear_bit ext2_clear_bit
#define ocfs2_test_bit ext2_test_bit
#define ocfs2_find_next_zero_bit ext2_find_next_zero_bit
#define ocfs2_find_next_bit ext2_find_next_bit
#endif /* OCFS2_H */

View file

@ -65,6 +65,7 @@
#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01"
#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01"
#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01"
#define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1"
/* Compatibility flags */
#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
@ -93,8 +94,11 @@
| OCFS2_FEATURE_INCOMPAT_INLINE_DATA \
| OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
| OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
| OCFS2_FEATURE_INCOMPAT_XATTR)
#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
| OCFS2_FEATURE_INCOMPAT_XATTR \
| OCFS2_FEATURE_INCOMPAT_META_ECC)
#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
| OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
| OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
/*
* Heartbeat-only devices are missing journals and other files. The
@ -147,6 +151,9 @@
/* Support for extended attributes */
#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200
/* Metadata checksum and error correction */
#define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800
/*
* backup superblock flag is used to indicate that this volume
* has backup superblocks.
@ -163,6 +170,12 @@
*/
#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001
/*
* Maintain quota information for this filesystem
*/
#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002
#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004
/* The byte offset of the first backup block will be 1G.
* The following will be 4G, 16G, 64G, 256G and 1T.
*/
@ -192,6 +205,7 @@
#define OCFS2_HEARTBEAT_FL (0x00000200) /* Heartbeat area */
#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
#define OCFS2_QUOTA_FL (0x00001000) /* Quota file */
/*
* Flags on ocfs2_dinode.i_dyn_features
@ -329,13 +343,17 @@ enum {
#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
HEARTBEAT_SYSTEM_INODE,
GLOBAL_BITMAP_SYSTEM_INODE,
#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GLOBAL_BITMAP_SYSTEM_INODE
USER_QUOTA_SYSTEM_INODE,
GROUP_QUOTA_SYSTEM_INODE,
#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE
ORPHAN_DIR_SYSTEM_INODE,
EXTENT_ALLOC_SYSTEM_INODE,
INODE_ALLOC_SYSTEM_INODE,
JOURNAL_SYSTEM_INODE,
LOCAL_ALLOC_SYSTEM_INODE,
TRUNCATE_LOG_SYSTEM_INODE,
LOCAL_USER_QUOTA_SYSTEM_INODE,
LOCAL_GROUP_QUOTA_SYSTEM_INODE,
NUM_SYSTEM_INODES
};
@ -349,6 +367,8 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
[SLOT_MAP_SYSTEM_INODE] = { "slot_map", 0, S_IFREG | 0644 },
[HEARTBEAT_SYSTEM_INODE] = { "heartbeat", OCFS2_HEARTBEAT_FL, S_IFREG | 0644 },
[GLOBAL_BITMAP_SYSTEM_INODE] = { "global_bitmap", 0, S_IFREG | 0644 },
[USER_QUOTA_SYSTEM_INODE] = { "aquota.user", OCFS2_QUOTA_FL, S_IFREG | 0644 },
[GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group", OCFS2_QUOTA_FL, S_IFREG | 0644 },
/* Slot-specific system inodes (one copy per slot) */
[ORPHAN_DIR_SYSTEM_INODE] = { "orphan_dir:%04d", 0, S_IFDIR | 0755 },
@ -356,7 +376,9 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
[INODE_ALLOC_SYSTEM_INODE] = { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
[JOURNAL_SYSTEM_INODE] = { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 },
[LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 },
[TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 }
[TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 },
[LOCAL_USER_QUOTA_SYSTEM_INODE] = { "aquota.user:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 },
[LOCAL_GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 },
};
/* Parameter passed from mount.ocfs2 to module */
@ -409,6 +431,22 @@ static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
*/
#define OCFS2_RAW_SB(dinode) (&((dinode)->id2.i_super))
/*
* Block checking structure. This is used in metadata to validate the
* contents. If OCFS2_FEATURE_INCOMPAT_META_ECC is not set, it is all
* zeros.
*/
struct ocfs2_block_check {
/*00*/ __le32 bc_crc32e; /* 802.3 Ethernet II CRC32 */
__le16 bc_ecc; /* Single-error-correction parity vector.
This is a simple Hamming code dependant
on the blocksize. OCFS2's maximum
blocksize, 4K, requires 16 parity bits,
so we fit in __le16. */
__le16 bc_reserved1;
/*08*/
};
/*
* On disk extent record for OCFS2
* It describes a range of clusters on disk.
@ -496,7 +534,7 @@ struct ocfs2_truncate_log {
struct ocfs2_extent_block
{
/*00*/ __u8 h_signature[8]; /* Signature for verification */
__le64 h_reserved1;
struct ocfs2_block_check h_check; /* Error checking */
/*10*/ __le16 h_suballoc_slot; /* Slot suballocator this
extent_header belongs to */
__le16 h_suballoc_bit; /* Bit offset in suballocator
@ -666,7 +704,8 @@ struct ocfs2_dinode {
was set in i_flags */
__le16 i_dyn_features;
__le64 i_xattr_loc;
/*80*/ __le64 i_reserved2[7];
/*80*/ struct ocfs2_block_check i_check; /* Error checking */
/*88*/ __le64 i_reserved2[6];
/*B8*/ union {
__le64 i_pad1; /* Generic way to refer to this
64bit union */
@ -714,6 +753,34 @@ struct ocfs2_dir_entry {
/* Actual on-disk length specified by rec_len */
} __attribute__ ((packed));
/*
* Per-block record for the unindexed directory btree. This is carefully
* crafted so that the rec_len and name_len records of an ocfs2_dir_entry are
* mirrored. That way, the directory manipulation code needs a minimal amount
* of update.
*
* NOTE: Keep this structure aligned to a multiple of 4 bytes.
*/
struct ocfs2_dir_block_trailer {
/*00*/ __le64 db_compat_inode; /* Always zero. Was inode */
__le16 db_compat_rec_len; /* Backwards compatible with
* ocfs2_dir_entry. */
__u8 db_compat_name_len; /* Always zero. Was name_len */
__u8 db_reserved0;
__le16 db_reserved1;
__le16 db_free_rec_len; /* Size of largest empty hole
* in this block. (unused) */
/*10*/ __u8 db_signature[8]; /* Signature for verification */
__le64 db_reserved2;
__le64 db_free_next; /* Next block in list (unused) */
/*20*/ __le64 db_blkno; /* Offset on disk, in blocks */
__le64 db_parent_dinode; /* dinode which owns me, in
blocks */
/*30*/ struct ocfs2_block_check db_check; /* Error checking */
/*40*/
};
/*
* On disk allocator group structure for OCFS2
*/
@ -733,7 +800,8 @@ struct ocfs2_group_desc
/*20*/ __le64 bg_parent_dinode; /* dinode which owns me, in
blocks */
__le64 bg_blkno; /* Offset on disk, in blocks */
/*30*/ __le64 bg_reserved2[2];
/*30*/ struct ocfs2_block_check bg_check; /* Error checking */
__le64 bg_reserved2;
/*40*/ __u8 bg_bitmap[0];
};
@ -776,7 +844,12 @@ struct ocfs2_xattr_header {
in this extent record,
only valid in the first
bucket. */
__le64 xh_csum;
struct ocfs2_block_check xh_check; /* Error checking
(Note, this is only
used for xattr
buckets. A block uses
xb_check and sets
this field to zero.) */
struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */
};
@ -827,7 +900,7 @@ struct ocfs2_xattr_block {
block group */
__le32 xb_fs_generation; /* Must match super block */
/*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */
__le64 xb_csum;
struct ocfs2_block_check xb_check; /* Error checking */
/*20*/ __le16 xb_flags; /* Indicates whether this block contains
real xattr or a xattr tree. */
__le16 xb_reserved0;
@ -868,6 +941,128 @@ static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe)
return xe->xe_type & OCFS2_XATTR_TYPE_MASK;
}
/*
* On disk structures for global quota file
*/
/* Magic numbers and known versions for global quota files */
#define OCFS2_GLOBAL_QMAGICS {\
0x0cf52470, /* USRQUOTA */ \
0x0cf52471 /* GRPQUOTA */ \
}
#define OCFS2_GLOBAL_QVERSIONS {\
0, \
0, \
}
/* Each block of each quota file has a certain fixed number of bytes reserved
* for OCFS2 internal use at its end. OCFS2 can use it for things like
* checksums, etc. */
#define OCFS2_QBLK_RESERVED_SPACE 8
/* Generic header of all quota files */
struct ocfs2_disk_dqheader {
__le32 dqh_magic; /* Magic number identifying file */
__le32 dqh_version; /* Quota format version */
};
#define OCFS2_GLOBAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader))
/* Information header of global quota file (immediately follows the generic
* header) */
struct ocfs2_global_disk_dqinfo {
/*00*/ __le32 dqi_bgrace; /* Grace time for space softlimit excess */
__le32 dqi_igrace; /* Grace time for inode softlimit excess */
__le32 dqi_syncms; /* Time after which we sync local changes to
* global quota file */
__le32 dqi_blocks; /* Number of blocks in quota file */
/*10*/ __le32 dqi_free_blk; /* First free block in quota file */
__le32 dqi_free_entry; /* First block with free dquot entry in quota
* file */
};
/* Structure with global user / group information. We reserve some space
* for future use. */
struct ocfs2_global_disk_dqblk {
/*00*/ __le32 dqb_id; /* ID the structure belongs to */
__le32 dqb_use_count; /* Number of nodes having reference to this structure */
__le64 dqb_ihardlimit; /* absolute limit on allocated inodes */
/*10*/ __le64 dqb_isoftlimit; /* preferred inode limit */
__le64 dqb_curinodes; /* current # allocated inodes */
/*20*/ __le64 dqb_bhardlimit; /* absolute limit on disk space */
__le64 dqb_bsoftlimit; /* preferred limit on disk space */
/*30*/ __le64 dqb_curspace; /* current space occupied */
__le64 dqb_btime; /* time limit for excessive disk use */
/*40*/ __le64 dqb_itime; /* time limit for excessive inode use */
__le64 dqb_pad1;
/*50*/ __le64 dqb_pad2;
};
/*
* On-disk structures for local quota file
*/
/* Magic numbers and known versions for local quota files */
#define OCFS2_LOCAL_QMAGICS {\
0x0cf524c0, /* USRQUOTA */ \
0x0cf524c1 /* GRPQUOTA */ \
}
#define OCFS2_LOCAL_QVERSIONS {\
0, \
0, \
}
/* Quota flags in dqinfo header */
#define OLQF_CLEAN 0x0001 /* Quota file is empty (this should be after\
* quota has been cleanly turned off) */
#define OCFS2_LOCAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader))
/* Information header of local quota file (immediately follows the generic
* header) */
struct ocfs2_local_disk_dqinfo {
__le32 dqi_flags; /* Flags for quota file */
__le32 dqi_chunks; /* Number of chunks of quota structures
* with a bitmap */
__le32 dqi_blocks; /* Number of blocks allocated for quota file */
};
/* Header of one chunk of a quota file */
struct ocfs2_local_disk_chunk {
__le32 dqc_free; /* Number of free entries in the bitmap */
u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding
* chunk of quota file */
};
/* One entry in local quota file */
struct ocfs2_local_disk_dqblk {
/*00*/ __le64 dqb_id; /* id this quota applies to */
__le64 dqb_spacemod; /* Change in the amount of used space */
/*10*/ __le64 dqb_inodemod; /* Change in the amount of used inodes */
};
/*
* The quota trailer lives at the end of each quota block.
*/
struct ocfs2_disk_dqtrailer {
/*00*/ struct ocfs2_block_check dq_check; /* Error checking */
/*08*/ /* Cannot be larger than OCFS2_QBLK_RESERVED_SPACE */
};
static inline struct ocfs2_disk_dqtrailer *ocfs2_block_dqtrailer(int blocksize,
void *buf)
{
char *ptr = buf;
ptr += blocksize - OCFS2_QBLK_RESERVED_SPACE;
return (struct ocfs2_disk_dqtrailer *)ptr;
}
#ifdef __KERNEL__
static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
{

View file

@ -1,82 +0,0 @@
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* ocfs2_jbd_compat.h
*
* Compatibility defines for JBD.
*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef OCFS2_JBD_COMPAT_H
#define OCFS2_JBD_COMPAT_H
#ifndef CONFIG_OCFS2_COMPAT_JBD
# error Should not have been included
#endif
struct jbd2_inode {
unsigned int dummy;
};
#define JBD2_BARRIER JFS_BARRIER
#define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE
#define jbd2_journal_ack_err journal_ack_err
#define jbd2_journal_clear_err journal_clear_err
#define jbd2_journal_destroy journal_destroy
#define jbd2_journal_dirty_metadata journal_dirty_metadata
#define jbd2_journal_errno journal_errno
#define jbd2_journal_extend journal_extend
#define jbd2_journal_flush journal_flush
#define jbd2_journal_force_commit journal_force_commit
#define jbd2_journal_get_write_access journal_get_write_access
#define jbd2_journal_get_undo_access journal_get_undo_access
#define jbd2_journal_init_inode journal_init_inode
#define jbd2_journal_invalidatepage journal_invalidatepage
#define jbd2_journal_load journal_load
#define jbd2_journal_lock_updates journal_lock_updates
#define jbd2_journal_restart journal_restart
#define jbd2_journal_start journal_start
#define jbd2_journal_start_commit journal_start_commit
#define jbd2_journal_stop journal_stop
#define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers
#define jbd2_journal_unlock_updates journal_unlock_updates
#define jbd2_journal_wipe journal_wipe
#define jbd2_log_wait_commit log_wait_commit
static inline int jbd2_journal_file_inode(handle_t *handle,
struct jbd2_inode *inode)
{
return 0;
}
static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
loff_t new_size)
{
return 0;
}
static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode,
struct inode *inode)
{
return;
}
static inline void jbd2_journal_release_jbd_inode(journal_t *journal,
struct jbd2_inode *jinode)
{
return;
}
#endif /* OCFS2_JBD_COMPAT_H */

View file

@ -46,6 +46,7 @@ enum ocfs2_lock_type {
OCFS2_LOCK_TYPE_DENTRY,
OCFS2_LOCK_TYPE_OPEN,
OCFS2_LOCK_TYPE_FLOCK,
OCFS2_LOCK_TYPE_QINFO,
OCFS2_NUM_LOCK_TYPES
};
@ -77,6 +78,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
case OCFS2_LOCK_TYPE_FLOCK:
c = 'F';
break;
case OCFS2_LOCK_TYPE_QINFO:
c = 'Q';
break;
default:
c = '\0';
}
@ -95,6 +99,7 @@ static char *ocfs2_lock_type_strings[] = {
[OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
[OCFS2_LOCK_TYPE_OPEN] = "Open",
[OCFS2_LOCK_TYPE_FLOCK] = "Flock",
[OCFS2_LOCK_TYPE_QINFO] = "Quota",
};
static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)

119
fs/ocfs2/quota.h Normal file
View file

@ -0,0 +1,119 @@
/*
* quota.h for OCFS2
*
* On disk quota structures for local and global quota file, in-memory
* structures.
*
*/
#ifndef _OCFS2_QUOTA_H
#define _OCFS2_QUOTA_H
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/quota.h>
#include <linux/list.h>
#include <linux/dqblk_qtree.h>
#include "ocfs2.h"
/* Common stuff */
/* id number of quota format */
#define QFMT_OCFS2 3
/*
* In-memory structures
*/
struct ocfs2_dquot {
struct dquot dq_dquot; /* Generic VFS dquot */
loff_t dq_local_off; /* Offset in the local quota file */
struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */
unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */
s64 dq_origspace; /* Last globally synced space usage */
s64 dq_originodes; /* Last globally synced inode usage */
};
/* Description of one chunk to recover in memory */
struct ocfs2_recovery_chunk {
struct list_head rc_list; /* List of chunks */
int rc_chunk; /* Chunk number */
unsigned long *rc_bitmap; /* Bitmap of entries to recover */
};
struct ocfs2_quota_recovery {
struct list_head r_list[MAXQUOTAS]; /* List of chunks to recover */
};
/* In-memory structure with quota header information */
struct ocfs2_mem_dqinfo {
unsigned int dqi_type; /* Quota type this structure describes */
unsigned int dqi_chunks; /* Number of chunks in local quota file */
unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
unsigned int dqi_syncms; /* How often should we sync with other nodes */
unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
struct list_head dqi_chunk; /* List of chunks */
struct inode *dqi_gqinode; /* Global quota file inode */
struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */
int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */
struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */
struct buffer_head *dqi_ibh; /* Buffer with information header */
struct qtree_mem_dqinfo dqi_gi; /* Info about global file */
struct delayed_work dqi_sync_work; /* Work for syncing dquots */
struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery
* information, in case we
* enable quotas on file
* needing it */
};
static inline struct ocfs2_dquot *OCFS2_DQUOT(struct dquot *dquot)
{
return container_of(dquot, struct ocfs2_dquot, dq_dquot);
}
struct ocfs2_quota_chunk {
struct list_head qc_chunk; /* List of quotafile chunks */
int qc_num; /* Number of quota chunk */
struct buffer_head *qc_headerbh; /* Buffer head with chunk header */
};
extern struct kmem_cache *ocfs2_dquot_cachep;
extern struct kmem_cache *ocfs2_qf_chunk_cachep;
extern struct qtree_fmt_operations ocfs2_global_ops;
struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
struct ocfs2_super *osb, int slot_num);
int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
struct ocfs2_quota_recovery *rec,
int slot_num);
void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec);
ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
ssize_t ocfs2_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off);
int ocfs2_global_read_info(struct super_block *sb, int type);
int ocfs2_global_write_info(struct super_block *sb, int type);
int ocfs2_global_read_dquot(struct dquot *dquot);
int __ocfs2_sync_dquot(struct dquot *dquot, int freeing);
static inline int ocfs2_sync_dquot(struct dquot *dquot)
{
return __ocfs2_sync_dquot(dquot, 0);
}
static inline int ocfs2_global_release_dquot(struct dquot *dquot)
{
return __ocfs2_sync_dquot(dquot, 1);
}
int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
struct buffer_head **bh);
extern struct dquot_operations ocfs2_quota_operations;
extern struct quota_format_type ocfs2_quota_format;
int ocfs2_quota_setup(void);
void ocfs2_quota_shutdown(void);
#endif /* _OCFS2_QUOTA_H */

1025
fs/ocfs2/quota_global.c Normal file

File diff suppressed because it is too large Load diff

1253
fs/ocfs2/quota_local.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -106,8 +106,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n",
new_clusters, first_new_cluster);
ret = ocfs2_journal_access(handle, bm_inode, group_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
ret = ocfs2_journal_access_gd(handle, bm_inode, group_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
mlog_errno(ret);
goto out;
@ -141,8 +141,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
}
/* update the inode accordingly. */
ret = ocfs2_journal_access(handle, bm_inode, bm_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
ret = ocfs2_journal_access_di(handle, bm_inode, bm_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
mlog_errno(ret);
goto out_rollback;
@ -314,6 +314,10 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
/* main_bm_bh is validated by inode read inside ocfs2_inode_lock(),
* so any corruption is a code bug. */
BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
ocfs2_group_bitmap_size(osb->sb) * 8) {
mlog(ML_ERROR, "The disk is too old and small. "
@ -322,30 +326,18 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
goto out_unlock;
}
if (!OCFS2_IS_VALID_DINODE(fe)) {
OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe);
ret = -EIO;
goto out_unlock;
}
first_new_cluster = le32_to_cpu(fe->i_clusters);
lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
first_new_cluster - 1);
ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh);
ret = ocfs2_read_group_descriptor(main_bm_inode, fe, lgd_blkno,
&group_bh);
if (ret < 0) {
mlog_errno(ret);
goto out_unlock;
}
group = (struct ocfs2_group_desc *)group_bh->b_data;
ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group);
if (ret) {
mlog_errno(ret);
goto out_unlock;
}
cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters >
le16_to_cpu(fe->id2.i_chain.cl_cpg)) {
@ -398,41 +390,16 @@ static int ocfs2_check_new_group(struct inode *inode,
struct buffer_head *group_bh)
{
int ret;
struct ocfs2_group_desc *gd;
struct ocfs2_group_desc *gd =
(struct ocfs2_group_desc *)group_bh->b_data;
u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) *
le16_to_cpu(di->id2.i_chain.cl_bpc);
ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh);
if (ret)
goto out;
gd = (struct ocfs2_group_desc *)group_bh->b_data;
ret = -EIO;
if (!OCFS2_IS_VALID_GROUP_DESC(gd))
mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n",
(unsigned long long)le64_to_cpu(gd->bg_blkno));
else if (di->i_blkno != gd->bg_parent_dinode)
mlog(ML_ERROR, "Group descriptor # %llu has bad parent "
"pointer (%llu, expected %llu)\n",
(unsigned long long)le64_to_cpu(gd->bg_blkno),
(unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
(unsigned long long)le64_to_cpu(di->i_blkno));
else if (le16_to_cpu(gd->bg_bits) > max_bits)
mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n",
(unsigned long long)le64_to_cpu(gd->bg_blkno),
le16_to_cpu(gd->bg_bits));
else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits))
mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
"claims that %u are free\n",
(unsigned long long)le64_to_cpu(gd->bg_blkno),
le16_to_cpu(gd->bg_bits),
le16_to_cpu(gd->bg_free_bits_count));
else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size)))
mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
"max bitmap bits of %u\n",
(unsigned long long)le64_to_cpu(gd->bg_blkno),
le16_to_cpu(gd->bg_bits),
8 * le16_to_cpu(gd->bg_size));
else if (le16_to_cpu(gd->bg_chain) != input->chain)
ret = -EINVAL;
if (le16_to_cpu(gd->bg_chain) != input->chain)
mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u "
"while input has %u set.\n",
(unsigned long long)le64_to_cpu(gd->bg_blkno),
@ -451,6 +418,7 @@ static int ocfs2_check_new_group(struct inode *inode,
else
ret = 0;
out:
return ret;
}
@ -568,8 +536,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
cl = &fe->id2.i_chain;
cr = &cl->cl_recs[input->chain];
ret = ocfs2_journal_access(handle, main_bm_inode, group_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
ret = ocfs2_journal_access_gd(handle, main_bm_inode, group_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
mlog_errno(ret);
goto out_commit;
@ -584,8 +552,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
goto out_commit;
}
ret = ocfs2_journal_access(handle, main_bm_inode, main_bm_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
ret = ocfs2_journal_access_di(handle, main_bm_inode, main_bm_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
mlog_errno(ret);
goto out_commit;

View file

@ -151,7 +151,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
* this is not true, the read of -1 (UINT64_MAX) will fail.
*/
ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
OCFS2_BH_IGNORE_CACHE);
OCFS2_BH_IGNORE_CACHE, NULL);
if (ret == 0) {
spin_lock(&osb->osb_lock);
ocfs2_update_slot_info(si);
@ -405,7 +405,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
bh = NULL; /* Acquire a fresh bh */
status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
OCFS2_BH_IGNORE_CACHE);
OCFS2_BH_IGNORE_CACHE, NULL);
if (status < 0) {
mlog_errno(status);
goto bail;

View file

@ -35,6 +35,7 @@
#include "ocfs2.h"
#include "alloc.h"
#include "blockcheck.h"
#include "dlmglue.h"
#include "inode.h"
#include "journal.h"
@ -145,64 +146,185 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
}
/* somewhat more expensive than our other checks, so use sparingly. */
int ocfs2_check_group_descriptor(struct super_block *sb,
struct ocfs2_dinode *di,
struct ocfs2_group_desc *gd)
#define do_error(fmt, ...) \
do{ \
if (clean_error) \
mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \
else \
ocfs2_error(sb, fmt, ##__VA_ARGS__); \
} while (0)
static int ocfs2_validate_gd_self(struct super_block *sb,
struct buffer_head *bh,
int clean_error)
{
unsigned int max_bits;
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
return -EIO;
do_error("Group descriptor #%llu has bad signature %.*s",
(unsigned long long)bh->b_blocknr, 7,
gd->bg_signature);
return -EINVAL;
}
if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) {
do_error("Group descriptor #%llu has an invalid bg_blkno "
"of %llu",
(unsigned long long)bh->b_blocknr,
(unsigned long long)le64_to_cpu(gd->bg_blkno));
return -EINVAL;
}
if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) {
do_error("Group descriptor #%llu has an invalid "
"fs_generation of #%u",
(unsigned long long)bh->b_blocknr,
le32_to_cpu(gd->bg_generation));
return -EINVAL;
}
if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
do_error("Group descriptor #%llu has bit count %u but "
"claims that %u are free",
(unsigned long long)bh->b_blocknr,
le16_to_cpu(gd->bg_bits),
le16_to_cpu(gd->bg_free_bits_count));
return -EINVAL;
}
if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
do_error("Group descriptor #%llu has bit count %u but "
"max bitmap bits of %u",
(unsigned long long)bh->b_blocknr,
le16_to_cpu(gd->bg_bits),
8 * le16_to_cpu(gd->bg_size));
return -EINVAL;
}
return 0;
}
static int ocfs2_validate_gd_parent(struct super_block *sb,
struct ocfs2_dinode *di,
struct buffer_head *bh,
int clean_error)
{
unsigned int max_bits;
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
if (di->i_blkno != gd->bg_parent_dinode) {
ocfs2_error(sb, "Group descriptor # %llu has bad parent "
"pointer (%llu, expected %llu)",
(unsigned long long)le64_to_cpu(gd->bg_blkno),
(unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
(unsigned long long)le64_to_cpu(di->i_blkno));
return -EIO;
do_error("Group descriptor #%llu has bad parent "
"pointer (%llu, expected %llu)",
(unsigned long long)bh->b_blocknr,
(unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
(unsigned long long)le64_to_cpu(di->i_blkno));
return -EINVAL;
}
max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
if (le16_to_cpu(gd->bg_bits) > max_bits) {
ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
(unsigned long long)le64_to_cpu(gd->bg_blkno),
le16_to_cpu(gd->bg_bits));
return -EIO;
do_error("Group descriptor #%llu has bit count of %u",
(unsigned long long)bh->b_blocknr,
le16_to_cpu(gd->bg_bits));
return -EINVAL;
}
if (le16_to_cpu(gd->bg_chain) >=
le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
(unsigned long long)le64_to_cpu(gd->bg_blkno),
le16_to_cpu(gd->bg_chain));
return -EIO;
}
if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
"claims that %u are free",
(unsigned long long)le64_to_cpu(gd->bg_blkno),
le16_to_cpu(gd->bg_bits),
le16_to_cpu(gd->bg_free_bits_count));
return -EIO;
}
if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
"max bitmap bits of %u",
(unsigned long long)le64_to_cpu(gd->bg_blkno),
le16_to_cpu(gd->bg_bits),
8 * le16_to_cpu(gd->bg_size));
return -EIO;
do_error("Group descriptor #%llu has bad chain %u",
(unsigned long long)bh->b_blocknr,
le16_to_cpu(gd->bg_chain));
return -EINVAL;
}
return 0;
}
#undef do_error
/*
* This version only prints errors. It does not fail the filesystem, and
* exists only for resize.
*/
int ocfs2_check_group_descriptor(struct super_block *sb,
struct ocfs2_dinode *di,
struct buffer_head *bh)
{
int rc;
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
BUG_ON(!buffer_uptodate(bh));
/*
* If the ecc fails, we return the error but otherwise
* leave the filesystem running. We know any error is
* local to this block.
*/
rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
if (rc) {
mlog(ML_ERROR,
"Checksum failed for group descriptor %llu\n",
(unsigned long long)bh->b_blocknr);
} else
rc = ocfs2_validate_gd_self(sb, bh, 1);
if (!rc)
rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
return rc;
}
static int ocfs2_validate_group_descriptor(struct super_block *sb,
struct buffer_head *bh)
{
int rc;
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
mlog(0, "Validating group descriptor %llu\n",
(unsigned long long)bh->b_blocknr);
BUG_ON(!buffer_uptodate(bh));
/*
* If the ecc fails, we return the error but otherwise
* leave the filesystem running. We know any error is
* local to this block.
*/
rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
if (rc)
return rc;
/*
* Errors after here are fatal.
*/
return ocfs2_validate_gd_self(sb, bh, 0);
}
int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
u64 gd_blkno, struct buffer_head **bh)
{
int rc;
struct buffer_head *tmp = *bh;
rc = ocfs2_read_block(inode, gd_blkno, &tmp,
ocfs2_validate_group_descriptor);
if (rc)
goto out;
rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
if (rc) {
brelse(tmp);
goto out;
}
/* If ocfs2_read_block() got us a new bh, pass it up. */
if (!*bh)
*bh = tmp;
out:
return rc;
}
static int ocfs2_block_group_fill(handle_t *handle,
struct inode *alloc_inode,
struct buffer_head *bg_bh,
@ -225,10 +347,10 @@ static int ocfs2_block_group_fill(handle_t *handle,
goto bail;
}
status = ocfs2_journal_access(handle,
alloc_inode,
bg_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
status = ocfs2_journal_access_gd(handle,
alloc_inode,
bg_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) {
mlog_errno(status);
goto bail;
@ -358,8 +480,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
bg = (struct ocfs2_group_desc *) bg_bh->b_data;
status = ocfs2_journal_access(handle, alloc_inode,
bh, OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, alloc_inode,
bh, OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail;
@ -441,11 +563,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
ac->ac_alloc_slot = slot;
fe = (struct ocfs2_dinode *) bh->b_data;
if (!OCFS2_IS_VALID_DINODE(fe)) {
OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
status = -EIO;
goto bail;
}
/* The bh was validated by the inode read inside
* ocfs2_inode_lock(). Any corruption is a code bug. */
BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
(unsigned long long)le64_to_cpu(fe->i_blkno));
@ -790,10 +912,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
int offset, start, found, status = 0;
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg);
return -EIO;
}
/* Callers got this descriptor from
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
found = start = best_offset = best_size = 0;
bitmap = bg->bg_bitmap;
@ -858,11 +979,9 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
mlog_entry_void();
if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
status = -EIO;
goto bail;
}
/* All callers get the descriptor via
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
@ -871,10 +990,10 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
if (ocfs2_is_cluster_bitmap(alloc_inode))
journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
status = ocfs2_journal_access(handle,
alloc_inode,
group_bh,
journal_type);
status = ocfs2_journal_access_gd(handle,
alloc_inode,
group_bh,
journal_type);
if (status < 0) {
mlog_errno(status);
goto bail;
@ -931,21 +1050,10 @@ static int ocfs2_relink_block_group(handle_t *handle,
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
if (!OCFS2_IS_VALID_DINODE(fe)) {
OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
status = -EIO;
goto out;
}
if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
status = -EIO;
goto out;
}
if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) {
OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg);
status = -EIO;
goto out;
}
/* The caller got these descriptors from
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
(unsigned long long)le64_to_cpu(fe->i_blkno), chain,
@ -956,8 +1064,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
bg_ptr = le64_to_cpu(bg->bg_next_group);
prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_gd(handle, alloc_inode, prev_bg_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto out_rollback;
@ -971,8 +1079,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
goto out_rollback;
}
status = ocfs2_journal_access(handle, alloc_inode, bg_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_gd(handle, alloc_inode, bg_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto out_rollback;
@ -986,8 +1094,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
goto out_rollback;
}
status = ocfs2_journal_access(handle, alloc_inode, fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, alloc_inode, fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto out_rollback;
@ -1008,7 +1116,7 @@ static int ocfs2_relink_block_group(handle_t *handle,
bg->bg_next_group = cpu_to_le64(bg_ptr);
prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
}
out:
mlog_exit(status);
return status;
}
@ -1138,8 +1246,8 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
ret = ocfs2_journal_access(handle, inode, di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
ret = ocfs2_journal_access_di(handle, inode, di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
mlog_errno(ret);
goto out;
@ -1170,21 +1278,17 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
u16 found;
struct buffer_head *group_bh = NULL;
struct ocfs2_group_desc *gd;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
struct inode *alloc_inode = ac->ac_inode;
ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh);
ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno,
&group_bh);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
gd = (struct ocfs2_group_desc *) group_bh->b_data;
if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
ret = -EIO;
goto out;
}
ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
ac->ac_max_block, bit_off, &found);
if (ret < 0) {
@ -1241,19 +1345,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
bits_wanted, chain,
(unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
status = ocfs2_read_block(alloc_inode,
le64_to_cpu(cl->cl_recs[chain].c_blkno),
&group_bh);
status = ocfs2_read_group_descriptor(alloc_inode, fe,
le64_to_cpu(cl->cl_recs[chain].c_blkno),
&group_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
bg = (struct ocfs2_group_desc *) group_bh->b_data;
status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
if (status) {
mlog_errno(status);
goto bail;
}
status = -ENOSPC;
/* for now, the chain search is a bit simplistic. We just use
@ -1271,18 +1370,13 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
next_group = le64_to_cpu(bg->bg_next_group);
prev_group_bh = group_bh;
group_bh = NULL;
status = ocfs2_read_block(alloc_inode,
next_group, &group_bh);
status = ocfs2_read_group_descriptor(alloc_inode, fe,
next_group, &group_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
bg = (struct ocfs2_group_desc *) group_bh->b_data;
status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
if (status) {
mlog_errno(status);
goto bail;
}
}
if (status < 0) {
if (status != -ENOSPC)
@ -1324,10 +1418,10 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
/* Ok, claim our bits now: set the info on dinode, chainlist
* and then the group */
status = ocfs2_journal_access(handle,
alloc_inode,
ac->ac_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle,
alloc_inode,
ac->ac_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail;
@ -1392,11 +1486,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
BUG_ON(!ac->ac_bh);
fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
if (!OCFS2_IS_VALID_DINODE(fe)) {
OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe);
status = -EIO;
goto bail;
}
/* The bh was validated by the inode read during
* ocfs2_reserve_suballoc_bits(). Any corruption is a code bug. */
BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
le32_to_cpu(fe->id1.bitmap1.i_total)) {
ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
@ -1725,19 +1819,17 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
mlog_entry_void();
if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
status = -EIO;
goto bail;
}
/* The caller got this descriptor from
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
if (ocfs2_is_cluster_bitmap(alloc_inode))
journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
status = ocfs2_journal_access(handle, alloc_inode, group_bh,
journal_type);
status = ocfs2_journal_access_gd(handle, alloc_inode, group_bh,
journal_type);
if (status < 0) {
mlog_errno(status);
goto bail;
@ -1782,29 +1874,26 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
mlog_entry_void();
if (!OCFS2_IS_VALID_DINODE(fe)) {
OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
status = -EIO;
goto bail;
}
/* The alloc_bh comes from ocfs2_free_dinode() or
* ocfs2_free_clusters(). The callers have all locked the
* allocator and gotten alloc_bh from the lock call. This
* validates the dinode buffer. Any corruption that has happended
* is a code bug. */
BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
(unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
(unsigned long long)bg_blkno, start_bit);
status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh);
status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
&group_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
group = (struct ocfs2_group_desc *) group_bh->b_data;
status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
if (status) {
mlog_errno(status);
goto bail;
}
BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
status = ocfs2_block_group_clear_bits(handle, alloc_inode,
@ -1815,8 +1904,8 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
goto bail;
}
status = ocfs2_journal_access(handle, alloc_inode, alloc_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
status = ocfs2_journal_access_di(handle, alloc_inode, alloc_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail;

View file

@ -164,10 +164,24 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
* and return that block offset. */
u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
/* somewhat more expensive than our other checks, so use sparingly. */
/*
* By default, ocfs2_read_group_descriptor() calls ocfs2_error() when it
* finds a problem. A caller that wants to check a group descriptor
* without going readonly should read the block with ocfs2_read_block[s]()
* and then checking it with this function. This is only resize, really.
* Everyone else should be using ocfs2_read_group_descriptor().
*/
int ocfs2_check_group_descriptor(struct super_block *sb,
struct ocfs2_dinode *di,
struct ocfs2_group_desc *gd);
struct buffer_head *bh);
/*
* Read a group descriptor block into *bh. If *bh is NULL, a bh will be
* allocated. This is a cached read. The descriptor will be validated with
* ocfs2_validate_group_descriptor().
*/
int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
u64 gd_blkno, struct buffer_head **bh);
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,

View file

@ -41,6 +41,7 @@
#include <linux/debugfs.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_SUPER
#include <cluster/masklog.h>
@ -51,6 +52,7 @@
#include "ocfs1_fs_compat.h"
#include "alloc.h"
#include "blockcheck.h"
#include "dlmglue.h"
#include "export.h"
#include "extent_map.h"
@ -65,10 +67,13 @@
#include "uptodate.h"
#include "ver.h"
#include "xattr.h"
#include "quota.h"
#include "buffer_head_io.h"
static struct kmem_cache *ocfs2_inode_cachep = NULL;
struct kmem_cache *ocfs2_dquot_cachep;
struct kmem_cache *ocfs2_qf_chunk_cachep;
/* OCFS2 needs to schedule several differnt types of work which
* require cluster locking, disk I/O, recovery waits, etc. Since these
@ -124,6 +129,9 @@ static int ocfs2_get_sector(struct super_block *sb,
static void ocfs2_write_super(struct super_block *sb);
static struct inode *ocfs2_alloc_inode(struct super_block *sb);
static void ocfs2_destroy_inode(struct inode *inode);
static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
static int ocfs2_enable_quotas(struct ocfs2_super *osb);
static void ocfs2_disable_quotas(struct ocfs2_super *osb);
static const struct super_operations ocfs2_sops = {
.statfs = ocfs2_statfs,
@ -137,6 +145,8 @@ static const struct super_operations ocfs2_sops = {
.put_super = ocfs2_put_super,
.remount_fs = ocfs2_remount,
.show_options = ocfs2_show_options,
.quota_read = ocfs2_quota_read,
.quota_write = ocfs2_quota_write,
};
enum {
@ -158,6 +168,10 @@ enum {
Opt_user_xattr,
Opt_nouser_xattr,
Opt_inode64,
Opt_acl,
Opt_noacl,
Opt_usrquota,
Opt_grpquota,
Opt_err,
};
@ -180,6 +194,10 @@ static const match_table_t tokens = {
{Opt_user_xattr, "user_xattr"},
{Opt_nouser_xattr, "nouser_xattr"},
{Opt_inode64, "inode64"},
{Opt_acl, "acl"},
{Opt_noacl, "noacl"},
{Opt_usrquota, "usrquota"},
{Opt_grpquota, "grpquota"},
{Opt_err, NULL}
};
@ -221,6 +239,19 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait)
return 0;
}
static int ocfs2_need_system_inode(struct ocfs2_super *osb, int ino)
{
if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
&& (ino == USER_QUOTA_SYSTEM_INODE
|| ino == LOCAL_USER_QUOTA_SYSTEM_INODE))
return 0;
if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
&& (ino == GROUP_QUOTA_SYSTEM_INODE
|| ino == LOCAL_GROUP_QUOTA_SYSTEM_INODE))
return 0;
return 1;
}
static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
{
struct inode *new = NULL;
@ -247,6 +278,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE;
i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) {
if (!ocfs2_need_system_inode(osb, i))
continue;
new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
if (!new) {
ocfs2_release_system_inodes(osb);
@ -277,6 +310,8 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1;
i < NUM_SYSTEM_INODES;
i++) {
if (!ocfs2_need_system_inode(osb, i))
continue;
new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
if (!new) {
ocfs2_release_system_inodes(osb);
@ -426,6 +461,12 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
/* We're going to/from readonly mode. */
if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
/* Disable quota accounting before remounting RO */
if (*flags & MS_RDONLY) {
ret = ocfs2_susp_quotas(osb, 0);
if (ret < 0)
goto out;
}
/* Lock here so the check of HARD_RO and the potential
* setting of SOFT_RO is atomic. */
spin_lock(&osb->osb_lock);
@ -461,11 +502,28 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
}
unlock_osb:
spin_unlock(&osb->osb_lock);
/* Enable quota accounting after remounting RW */
if (!ret && !(*flags & MS_RDONLY)) {
if (sb_any_quota_suspended(sb))
ret = ocfs2_susp_quotas(osb, 1);
else
ret = ocfs2_enable_quotas(osb);
if (ret < 0) {
/* Return back changes... */
spin_lock(&osb->osb_lock);
sb->s_flags |= MS_RDONLY;
osb->osb_flags |= OCFS2_OSB_SOFT_RO;
spin_unlock(&osb->osb_lock);
goto out;
}
}
}
if (!ret) {
/* Only save off the new mount options in case of a successful
* remount. */
if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
osb->s_mount_opt = parsed_options.mount_opt;
osb->s_atime_quantum = parsed_options.atime_quantum;
osb->preferred_slot = parsed_options.slot;
@ -619,6 +677,131 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
return 0;
}
static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend)
{
int type;
struct super_block *sb = osb->sb;
unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
int status = 0;
for (type = 0; type < MAXQUOTAS; type++) {
if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
continue;
if (unsuspend)
status = vfs_quota_enable(
sb_dqopt(sb)->files[type],
type, QFMT_OCFS2,
DQUOT_SUSPENDED);
else
status = vfs_quota_disable(sb, type,
DQUOT_SUSPENDED);
if (status < 0)
break;
}
if (status < 0)
mlog(ML_ERROR, "Failed to suspend/unsuspend quotas on "
"remount (error = %d).\n", status);
return status;
}
static int ocfs2_enable_quotas(struct ocfs2_super *osb)
{
struct inode *inode[MAXQUOTAS] = { NULL, NULL };
struct super_block *sb = osb->sb;
unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
LOCAL_GROUP_QUOTA_SYSTEM_INODE };
int status;
int type;
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE;
for (type = 0; type < MAXQUOTAS; type++) {
if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
continue;
inode[type] = ocfs2_get_system_file_inode(osb, ino[type],
osb->slot_num);
if (!inode[type]) {
status = -ENOENT;
goto out_quota_off;
}
status = vfs_quota_enable(inode[type], type, QFMT_OCFS2,
DQUOT_USAGE_ENABLED);
if (status < 0)
goto out_quota_off;
}
for (type = 0; type < MAXQUOTAS; type++)
iput(inode[type]);
return 0;
out_quota_off:
ocfs2_disable_quotas(osb);
for (type = 0; type < MAXQUOTAS; type++)
iput(inode[type]);
mlog_errno(status);
return status;
}
static void ocfs2_disable_quotas(struct ocfs2_super *osb)
{
int type;
struct inode *inode;
struct super_block *sb = osb->sb;
/* We mostly ignore errors in this function because there's not much
* we can do when we see them */
for (type = 0; type < MAXQUOTAS; type++) {
if (!sb_has_quota_loaded(sb, type))
continue;
inode = igrab(sb->s_dquot.files[type]);
/* Turn off quotas. This will remove all dquot structures from
* memory and so they will be automatically synced to global
* quota files */
vfs_quota_disable(sb, type, DQUOT_USAGE_ENABLED |
DQUOT_LIMITS_ENABLED);
if (!inode)
continue;
iput(inode);
}
}
/* Handle quota on quotactl */
static int ocfs2_quota_on(struct super_block *sb, int type, int format_id,
char *path, int remount)
{
unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
return -EINVAL;
if (remount)
return 0; /* Just ignore it has been handled in
* ocfs2_remount() */
return vfs_quota_enable(sb_dqopt(sb)->files[type], type,
format_id, DQUOT_LIMITS_ENABLED);
}
/* Handle quota off quotactl */
static int ocfs2_quota_off(struct super_block *sb, int type, int remount)
{
if (remount)
return 0; /* Ignore now and handle later in
* ocfs2_remount() */
return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
}
static struct quotactl_ops ocfs2_quotactl_ops = {
.quota_on = ocfs2_quota_on,
.quota_off = ocfs2_quota_off,
.quota_sync = vfs_quota_sync,
.get_info = vfs_get_dqinfo,
.set_info = vfs_set_dqinfo,
.get_dqblk = vfs_get_dqblk,
.set_dqblk = vfs_set_dqblk,
};
static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
{
struct dentry *root;
@ -651,12 +834,32 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
}
brelse(bh);
bh = NULL;
if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
osb->s_mount_opt = parsed_options.mount_opt;
osb->s_atime_quantum = parsed_options.atime_quantum;
osb->preferred_slot = parsed_options.slot;
osb->osb_commit_interval = parsed_options.commit_interval;
osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
osb->local_alloc_bits = osb->local_alloc_default_bits;
if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA &&
!OCFS2_HAS_RO_COMPAT_FEATURE(sb,
OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
status = -EINVAL;
mlog(ML_ERROR, "User quotas were requested, but this "
"filesystem does not have the feature enabled.\n");
goto read_super_error;
}
if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA &&
!OCFS2_HAS_RO_COMPAT_FEATURE(sb,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
status = -EINVAL;
mlog(ML_ERROR, "Group quotas were requested, but this "
"filesystem does not have the feature enabled.\n");
goto read_super_error;
}
status = ocfs2_verify_userspace_stack(osb, &parsed_options);
if (status)
@ -664,6 +867,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
sb->s_magic = OCFS2_SUPER_MAGIC;
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
/* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
* heartbeat=none */
if (bdev_read_only(sb->s_bdev)) {
@ -758,6 +964,28 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
atomic_set(&osb->vol_state, VOLUME_MOUNTED);
wake_up(&osb->osb_mount_event);
/* Now we can initialize quotas because we can afford to wait
* for cluster locks recovery now. That also means that truncation
* log recovery can happen but that waits for proper quota setup */
if (!(sb->s_flags & MS_RDONLY)) {
status = ocfs2_enable_quotas(osb);
if (status < 0) {
/* We have to err-out specially here because
* s_root is already set */
mlog_errno(status);
atomic_set(&osb->vol_state, VOLUME_DISABLED);
wake_up(&osb->osb_mount_event);
mlog_exit(status);
return status;
}
}
ocfs2_complete_quota_recovery(osb);
/* Now we wake up again for processes waiting for quotas */
atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
wake_up(&osb->osb_mount_event);
mlog_exit(status);
return status;
@ -945,6 +1173,41 @@ static int ocfs2_parse_options(struct super_block *sb,
case Opt_inode64:
mopt->mount_opt |= OCFS2_MOUNT_INODE64;
break;
case Opt_usrquota:
/* We check only on remount, otherwise features
* aren't yet initialized. */
if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
mlog(ML_ERROR, "User quota requested but "
"filesystem feature is not set\n");
status = 0;
goto bail;
}
mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
break;
case Opt_grpquota:
if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
mlog(ML_ERROR, "Group quota requested but "
"filesystem feature is not set\n");
status = 0;
goto bail;
}
mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
break;
#ifdef CONFIG_OCFS2_FS_POSIX_ACL
case Opt_acl:
mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
break;
case Opt_noacl:
mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
break;
#else
case Opt_acl:
case Opt_noacl:
printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
break;
#endif
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "
@ -1008,6 +1271,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
if (osb->osb_cluster_stack[0])
seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN,
osb->osb_cluster_stack);
if (opts & OCFS2_MOUNT_USRQUOTA)
seq_printf(s, ",usrquota");
if (opts & OCFS2_MOUNT_GRPQUOTA)
seq_printf(s, ",grpquota");
if (opts & OCFS2_MOUNT_NOUSERXATTR)
seq_printf(s, ",nouser_xattr");
@ -1017,6 +1284,13 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
if (opts & OCFS2_MOUNT_INODE64)
seq_printf(s, ",inode64");
#ifdef CONFIG_OCFS2_FS_POSIX_ACL
if (opts & OCFS2_MOUNT_POSIX_ACL)
seq_printf(s, ",acl");
else
seq_printf(s, ",noacl");
#endif
return 0;
}
@ -1052,10 +1326,16 @@ static int __init ocfs2_init(void)
mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
}
status = ocfs2_quota_setup();
if (status)
goto leave;
ocfs2_set_locking_protocol();
status = register_quota_format(&ocfs2_quota_format);
leave:
if (status < 0) {
ocfs2_quota_shutdown();
ocfs2_free_mem_caches();
exit_ocfs2_uptodate_cache();
}
@ -1072,11 +1352,15 @@ static void __exit ocfs2_exit(void)
{
mlog_entry_void();
ocfs2_quota_shutdown();
if (ocfs2_wq) {
flush_workqueue(ocfs2_wq);
destroy_workqueue(ocfs2_wq);
}
unregister_quota_format(&ocfs2_quota_format);
debugfs_remove(ocfs2_debugfs_root);
ocfs2_free_mem_caches();
@ -1192,8 +1476,27 @@ static int ocfs2_initialize_mem_caches(void)
(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD),
ocfs2_inode_init_once);
if (!ocfs2_inode_cachep)
ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache",
sizeof(struct ocfs2_dquot),
0,
(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD),
NULL);
ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache",
sizeof(struct ocfs2_quota_chunk),
0,
(SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
NULL);
if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep ||
!ocfs2_qf_chunk_cachep) {
if (ocfs2_inode_cachep)
kmem_cache_destroy(ocfs2_inode_cachep);
if (ocfs2_dquot_cachep)
kmem_cache_destroy(ocfs2_dquot_cachep);
if (ocfs2_qf_chunk_cachep)
kmem_cache_destroy(ocfs2_qf_chunk_cachep);
return -ENOMEM;
}
return 0;
}
@ -1202,8 +1505,15 @@ static void ocfs2_free_mem_caches(void)
{
if (ocfs2_inode_cachep)
kmem_cache_destroy(ocfs2_inode_cachep);
ocfs2_inode_cachep = NULL;
if (ocfs2_dquot_cachep)
kmem_cache_destroy(ocfs2_dquot_cachep);
ocfs2_dquot_cachep = NULL;
if (ocfs2_qf_chunk_cachep)
kmem_cache_destroy(ocfs2_qf_chunk_cachep);
ocfs2_qf_chunk_cachep = NULL;
}
static int ocfs2_get_sector(struct super_block *sb,
@ -1303,6 +1613,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
osb = OCFS2_SB(sb);
BUG_ON(!osb);
ocfs2_disable_quotas(osb);
ocfs2_shutdown_local_alloc(osb);
ocfs2_truncate_log_shutdown(osb);
@ -1413,6 +1725,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
sb->s_fs_info = osb;
sb->s_op = &ocfs2_sops;
sb->s_export_op = &ocfs2_export_ops;
sb->s_qcop = &ocfs2_quotactl_ops;
sb->dq_op = &ocfs2_quota_operations;
sb->s_xattr = ocfs2_xattr_handlers;
sb->s_time_gran = 1;
sb->s_flags |= MS_NOATIME;
@ -1676,6 +1990,15 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
/* We have to do a raw check of the feature here */
if (le32_to_cpu(di->id2.i_super.s_feature_incompat) &
OCFS2_FEATURE_INCOMPAT_META_ECC) {
status = ocfs2_block_check_validate(bh->b_data,
bh->b_size,
&di->i_check);
if (status)
goto out;
}
status = -EINVAL;
if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) {
mlog(ML_ERROR, "found superblock with incorrect block "
@ -1717,6 +2040,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
}
}
out:
mlog_exit(status);
return status;
}

View file

@ -84,7 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
mlog_entry_void();
status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh);
status = ocfs2_read_inode_block(inode, bh);
if (status < 0) {
mlog_errno(status);
link = ERR_PTR(status);

File diff suppressed because it is too large Load diff

View file

@ -30,13 +30,58 @@ enum ocfs2_xattr_type {
OCFS2_XATTR_MAX
};
struct ocfs2_security_xattr_info {
int enable;
char *name;
void *value;
size_t value_len;
};
extern struct xattr_handler ocfs2_xattr_user_handler;
extern struct xattr_handler ocfs2_xattr_trusted_handler;
extern struct xattr_handler ocfs2_xattr_security_handler;
#ifdef CONFIG_OCFS2_FS_POSIX_ACL
extern struct xattr_handler ocfs2_xattr_acl_access_handler;
extern struct xattr_handler ocfs2_xattr_acl_default_handler;
#endif
extern struct xattr_handler *ocfs2_xattr_handlers[];
ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int,
const char *, void *, size_t);
int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
size_t, int);
int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *,
int, const char *, const void *, size_t, int,
struct ocfs2_alloc_context *,
struct ocfs2_alloc_context *);
int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
int ocfs2_init_security_get(struct inode *, struct inode *,
struct ocfs2_security_xattr_info *);
int ocfs2_init_security_set(handle_t *, struct inode *,
struct buffer_head *,
struct ocfs2_security_xattr_info *,
struct ocfs2_alloc_context *,
struct ocfs2_alloc_context *);
int ocfs2_calc_security_init(struct inode *,
struct ocfs2_security_xattr_info *,
int *, int *, struct ocfs2_alloc_context **);
int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
int, struct ocfs2_security_xattr_info *,
int *, int *, struct ocfs2_alloc_context **);
/*
* xattrs can live inside an inode, as part of an external xattr block,
* or inside an xattr bucket, which is the leaf of a tree rooted in an
* xattr block. Some of the xattr calls, especially the value setting
* functions, want to treat each of these locations as equal. Let's wrap
* them in a structure that we can pass around instead of raw buffer_heads.
*/
struct ocfs2_xattr_value_buf {
struct buffer_head *vb_bh;
ocfs2_journal_access_func vb_access;
struct ocfs2_xattr_value_root *vb_xv;
};
#endif /* OCFS2_XATTR_H */

View file

@ -73,7 +73,7 @@ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid
case Q_SETQUOTA:
case Q_GETQUOTA:
/* This is just informative test so we are satisfied without a lock */
if (!sb_has_quota_enabled(sb, type))
if (!sb_has_quota_active(sb, type))
return -ESRCH;
}
@ -160,6 +160,9 @@ static void quota_sync_sb(struct super_block *sb, int type)
int cnt;
sb->s_qcop->quota_sync(sb, type);
if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)
return;
/* This is not very clever (and fast) but currently I don't know about
* any other simple way of getting quota data to disk and we must get
* them there for userspace to be visible... */
@ -175,7 +178,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && cnt != type)
continue;
if (!sb_has_quota_enabled(sb, cnt))
if (!sb_has_quota_active(sb, cnt))
continue;
mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex, I_MUTEX_QUOTA);
truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
@ -201,7 +204,7 @@ void sync_dquots(struct super_block *sb, int type)
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && type != cnt)
continue;
if (!sb_has_quota_enabled(sb, cnt))
if (!sb_has_quota_active(sb, cnt))
continue;
if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
@ -245,7 +248,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void
__u32 fmt;
down_read(&sb_dqopt(sb)->dqptr_sem);
if (!sb_has_quota_enabled(sb, type)) {
if (!sb_has_quota_active(sb, type)) {
up_read(&sb_dqopt(sb)->dqptr_sem);
return -ESRCH;
}

645
fs/quota_tree.c Normal file
View file

@ -0,0 +1,645 @@
/*
* vfsv0 quota IO operations on file
*/
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/dqblk_v2.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/quotaops.h>
#include <asm/byteorder.h>
#include "quota_tree.h"
MODULE_AUTHOR("Jan Kara");
MODULE_DESCRIPTION("Quota trie support");
MODULE_LICENSE("GPL");
#define __QUOTA_QT_PARANOIA
typedef char *dqbuf_t;
static int get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth)
{
unsigned int epb = info->dqi_usable_bs >> 2;
depth = info->dqi_qtree_depth - depth - 1;
while (depth--)
id /= epb;
return id % epb;
}
/* Number of entries in one blocks */
static inline int qtree_dqstr_in_blk(struct qtree_mem_dqinfo *info)
{
return (info->dqi_usable_bs - sizeof(struct qt_disk_dqdbheader))
/ info->dqi_entry_size;
}
static dqbuf_t getdqbuf(size_t size)
{
dqbuf_t buf = kmalloc(size, GFP_NOFS);
if (!buf)
printk(KERN_WARNING "VFS: Not enough memory for quota buffers.\n");
return buf;
}
static inline void freedqbuf(dqbuf_t buf)
{
kfree(buf);
}
static inline ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, dqbuf_t buf)
{
struct super_block *sb = info->dqi_sb;
memset(buf, 0, info->dqi_usable_bs);
return sb->s_op->quota_read(sb, info->dqi_type, (char *)buf,
info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
}
static inline ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, dqbuf_t buf)
{
struct super_block *sb = info->dqi_sb;
return sb->s_op->quota_write(sb, info->dqi_type, (char *)buf,
info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
}
/* Remove empty block from list and return it */
static int get_free_dqblk(struct qtree_mem_dqinfo *info)
{
dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
int ret, blk;
if (!buf)
return -ENOMEM;
if (info->dqi_free_blk) {
blk = info->dqi_free_blk;
ret = read_blk(info, blk, buf);
if (ret < 0)
goto out_buf;
info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
}
else {
memset(buf, 0, info->dqi_usable_bs);
/* Assure block allocation... */
ret = write_blk(info, info->dqi_blocks, buf);
if (ret < 0)
goto out_buf;
blk = info->dqi_blocks++;
}
mark_info_dirty(info->dqi_sb, info->dqi_type);
ret = blk;
out_buf:
freedqbuf(buf);
return ret;
}
/* Insert empty block to the list */
static int put_free_dqblk(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
{
struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
int err;
dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
dh->dqdh_prev_free = cpu_to_le32(0);
dh->dqdh_entries = cpu_to_le16(0);
err = write_blk(info, blk, buf);
if (err < 0)
return err;
info->dqi_free_blk = blk;
mark_info_dirty(info->dqi_sb, info->dqi_type);
return 0;
}
/* Remove given block from the list of blocks with free entries */
static int remove_free_dqentry(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
{
dqbuf_t tmpbuf = getdqbuf(info->dqi_usable_bs);
struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
uint nextblk = le32_to_cpu(dh->dqdh_next_free);
uint prevblk = le32_to_cpu(dh->dqdh_prev_free);
int err;
if (!tmpbuf)
return -ENOMEM;
if (nextblk) {
err = read_blk(info, nextblk, tmpbuf);
if (err < 0)
goto out_buf;
((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
dh->dqdh_prev_free;
err = write_blk(info, nextblk, tmpbuf);
if (err < 0)
goto out_buf;
}
if (prevblk) {
err = read_blk(info, prevblk, tmpbuf);
if (err < 0)
goto out_buf;
((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
dh->dqdh_next_free;
err = write_blk(info, prevblk, tmpbuf);
if (err < 0)
goto out_buf;
} else {
info->dqi_free_entry = nextblk;
mark_info_dirty(info->dqi_sb, info->dqi_type);
}
freedqbuf(tmpbuf);
dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
/* No matter whether write succeeds block is out of list */
if (write_blk(info, blk, buf) < 0)
printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
return 0;
out_buf:
freedqbuf(tmpbuf);
return err;
}
/* Insert given block to the beginning of list with free entries */
static int insert_free_dqentry(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
{
dqbuf_t tmpbuf = getdqbuf(info->dqi_usable_bs);
struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
int err;
if (!tmpbuf)
return -ENOMEM;
dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
dh->dqdh_prev_free = cpu_to_le32(0);
err = write_blk(info, blk, buf);
if (err < 0)
goto out_buf;
if (info->dqi_free_entry) {
err = read_blk(info, info->dqi_free_entry, tmpbuf);
if (err < 0)
goto out_buf;
((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
cpu_to_le32(blk);
err = write_blk(info, info->dqi_free_entry, tmpbuf);
if (err < 0)
goto out_buf;
}
freedqbuf(tmpbuf);
info->dqi_free_entry = blk;
mark_info_dirty(info->dqi_sb, info->dqi_type);
return 0;
out_buf:
freedqbuf(tmpbuf);
return err;
}
/* Is the entry in the block free? */
int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk)
{
int i;
for (i = 0; i < info->dqi_entry_size; i++)
if (disk[i])
return 0;
return 1;
}
EXPORT_SYMBOL(qtree_entry_unused);
/* Find space for dquot */
static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
struct dquot *dquot, int *err)
{
uint blk, i;
struct qt_disk_dqdbheader *dh;
dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
char *ddquot;
*err = 0;
if (!buf) {
*err = -ENOMEM;
return 0;
}
dh = (struct qt_disk_dqdbheader *)buf;
if (info->dqi_free_entry) {
blk = info->dqi_free_entry;
*err = read_blk(info, blk, buf);
if (*err < 0)
goto out_buf;
} else {
blk = get_free_dqblk(info);
if ((int)blk < 0) {
*err = blk;
freedqbuf(buf);
return 0;
}
memset(buf, 0, info->dqi_usable_bs);
/* This is enough as block is already zeroed and entry list is empty... */
info->dqi_free_entry = blk;
mark_info_dirty(dquot->dq_sb, dquot->dq_type);
}
/* Block will be full? */
if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) {
*err = remove_free_dqentry(info, buf, blk);
if (*err < 0) {
printk(KERN_ERR "VFS: find_free_dqentry(): Can't "
"remove block (%u) from entry free list.\n",
blk);
goto out_buf;
}
}
le16_add_cpu(&dh->dqdh_entries, 1);
/* Find free structure in block */
for (i = 0, ddquot = ((char *)buf) + sizeof(struct qt_disk_dqdbheader);
i < qtree_dqstr_in_blk(info) && !qtree_entry_unused(info, ddquot);
i++, ddquot += info->dqi_entry_size);
#ifdef __QUOTA_QT_PARANOIA
if (i == qtree_dqstr_in_blk(info)) {
printk(KERN_ERR "VFS: find_free_dqentry(): Data block full "
"but it shouldn't.\n");
*err = -EIO;
goto out_buf;
}
#endif
*err = write_blk(info, blk, buf);
if (*err < 0) {
printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota "
"data block %u.\n", blk);
goto out_buf;
}
dquot->dq_off = (blk << info->dqi_blocksize_bits) +
sizeof(struct qt_disk_dqdbheader) +
i * info->dqi_entry_size;
freedqbuf(buf);
return blk;
out_buf:
freedqbuf(buf);
return 0;
}
/* Insert reference to structure into the trie */
static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
uint *treeblk, int depth)
{
dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
int ret = 0, newson = 0, newact = 0;
__le32 *ref;
uint newblk;
if (!buf)
return -ENOMEM;
if (!*treeblk) {
ret = get_free_dqblk(info);
if (ret < 0)
goto out_buf;
*treeblk = ret;
memset(buf, 0, info->dqi_usable_bs);
newact = 1;
} else {
ret = read_blk(info, *treeblk, buf);
if (ret < 0) {
printk(KERN_ERR "VFS: Can't read tree quota block "
"%u.\n", *treeblk);
goto out_buf;
}
}
ref = (__le32 *)buf;
newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
if (!newblk)
newson = 1;
if (depth == info->dqi_qtree_depth - 1) {
#ifdef __QUOTA_QT_PARANOIA
if (newblk) {
printk(KERN_ERR "VFS: Inserting already present quota "
"entry (block %u).\n",
le32_to_cpu(ref[get_index(info,
dquot->dq_id, depth)]));
ret = -EIO;
goto out_buf;
}
#endif
newblk = find_free_dqentry(info, dquot, &ret);
} else {
ret = do_insert_tree(info, dquot, &newblk, depth+1);
}
if (newson && ret >= 0) {
ref[get_index(info, dquot->dq_id, depth)] =
cpu_to_le32(newblk);
ret = write_blk(info, *treeblk, buf);
} else if (newact && ret < 0) {
put_free_dqblk(info, buf, *treeblk);
}
out_buf:
freedqbuf(buf);
return ret;
}
/* Wrapper for inserting quota structure into tree */
static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
struct dquot *dquot)
{
int tmp = QT_TREEOFF;
return do_insert_tree(info, dquot, &tmp, 0);
}
/*
* We don't have to be afraid of deadlocks as we never have quotas on quota files...
*/
int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
{
int type = dquot->dq_type;
struct super_block *sb = dquot->dq_sb;
ssize_t ret;
dqbuf_t ddquot = getdqbuf(info->dqi_entry_size);
if (!ddquot)
return -ENOMEM;
/* dq_off is guarded by dqio_mutex */
if (!dquot->dq_off) {
ret = dq_insert_tree(info, dquot);
if (ret < 0) {
printk(KERN_ERR "VFS: Error %zd occurred while "
"creating quota.\n", ret);
freedqbuf(ddquot);
return ret;
}
}
spin_lock(&dq_data_lock);
info->dqi_ops->mem2disk_dqblk(ddquot, dquot);
spin_unlock(&dq_data_lock);
ret = sb->s_op->quota_write(sb, type, (char *)ddquot,
info->dqi_entry_size, dquot->dq_off);
if (ret != info->dqi_entry_size) {
printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
sb->s_id);
if (ret >= 0)
ret = -ENOSPC;
} else {
ret = 0;
}
dqstats.writes++;
freedqbuf(ddquot);
return ret;
}
EXPORT_SYMBOL(qtree_write_dquot);
/* Free dquot entry in data block */
static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
uint blk)
{
struct qt_disk_dqdbheader *dh;
dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
int ret = 0;
if (!buf)
return -ENOMEM;
if (dquot->dq_off >> info->dqi_blocksize_bits != blk) {
printk(KERN_ERR "VFS: Quota structure has offset to other "
"block (%u) than it should (%u).\n", blk,
(uint)(dquot->dq_off >> info->dqi_blocksize_bits));
goto out_buf;
}
ret = read_blk(info, blk, buf);
if (ret < 0) {
printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
goto out_buf;
}
dh = (struct qt_disk_dqdbheader *)buf;
le16_add_cpu(&dh->dqdh_entries, -1);
if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */
ret = remove_free_dqentry(info, buf, blk);
if (ret >= 0)
ret = put_free_dqblk(info, buf, blk);
if (ret < 0) {
printk(KERN_ERR "VFS: Can't move quota data block (%u) "
"to free list.\n", blk);
goto out_buf;
}
} else {
memset(buf +
(dquot->dq_off & ((1 << info->dqi_blocksize_bits) - 1)),
0, info->dqi_entry_size);
if (le16_to_cpu(dh->dqdh_entries) ==
qtree_dqstr_in_blk(info) - 1) {
/* Insert will write block itself */
ret = insert_free_dqentry(info, buf, blk);
if (ret < 0) {
printk(KERN_ERR "VFS: Can't insert quota data "
"block (%u) to free entry list.\n", blk);
goto out_buf;
}
} else {
ret = write_blk(info, blk, buf);
if (ret < 0) {
printk(KERN_ERR "VFS: Can't write quota data "
"block %u\n", blk);
goto out_buf;
}
}
}
dquot->dq_off = 0; /* Quota is now unattached */
out_buf:
freedqbuf(buf);
return ret;
}
/* Remove reference to dquot from tree */
static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
uint *blk, int depth)
{
dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
int ret = 0;
uint newblk;
__le32 *ref = (__le32 *)buf;
if (!buf)
return -ENOMEM;
ret = read_blk(info, *blk, buf);
if (ret < 0) {
printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
goto out_buf;
}
newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
if (depth == info->dqi_qtree_depth - 1) {
ret = free_dqentry(info, dquot, newblk);
newblk = 0;
} else {
ret = remove_tree(info, dquot, &newblk, depth+1);
}
if (ret >= 0 && !newblk) {
int i;
ref[get_index(info, dquot->dq_id, depth)] = cpu_to_le32(0);
/* Block got empty? */
for (i = 0;
i < (info->dqi_usable_bs >> 2) && !ref[i];
i++);
/* Don't put the root block into the free block list */
if (i == (info->dqi_usable_bs >> 2)
&& *blk != QT_TREEOFF) {
put_free_dqblk(info, buf, *blk);
*blk = 0;
} else {
ret = write_blk(info, *blk, buf);
if (ret < 0)
printk(KERN_ERR "VFS: Can't write quota tree "
"block %u.\n", *blk);
}
}
out_buf:
freedqbuf(buf);
return ret;
}
/* Delete dquot from tree */
int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
{
uint tmp = QT_TREEOFF;
if (!dquot->dq_off) /* Even not allocated? */
return 0;
return remove_tree(info, dquot, &tmp, 0);
}
EXPORT_SYMBOL(qtree_delete_dquot);
/* Find entry in block */
static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
struct dquot *dquot, uint blk)
{
dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
loff_t ret = 0;
int i;
char *ddquot;
if (!buf)
return -ENOMEM;
ret = read_blk(info, blk, buf);
if (ret < 0) {
printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
goto out_buf;
}
for (i = 0, ddquot = ((char *)buf) + sizeof(struct qt_disk_dqdbheader);
i < qtree_dqstr_in_blk(info) && !info->dqi_ops->is_id(ddquot, dquot);
i++, ddquot += info->dqi_entry_size);
if (i == qtree_dqstr_in_blk(info)) {
printk(KERN_ERR "VFS: Quota for id %u referenced "
"but not present.\n", dquot->dq_id);
ret = -EIO;
goto out_buf;
} else {
ret = (blk << info->dqi_blocksize_bits) + sizeof(struct
qt_disk_dqdbheader) + i * info->dqi_entry_size;
}
out_buf:
freedqbuf(buf);
return ret;
}
/* Find entry for given id in the tree */
static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
struct dquot *dquot, uint blk, int depth)
{
dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
loff_t ret = 0;
__le32 *ref = (__le32 *)buf;
if (!buf)
return -ENOMEM;
ret = read_blk(info, blk, buf);
if (ret < 0) {
printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
goto out_buf;
}
ret = 0;
blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
if (!blk) /* No reference? */
goto out_buf;
if (depth < info->dqi_qtree_depth - 1)
ret = find_tree_dqentry(info, dquot, blk, depth+1);
else
ret = find_block_dqentry(info, dquot, blk);
out_buf:
freedqbuf(buf);
return ret;
}
/* Find entry for given id in the tree - wrapper function */
static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info,
struct dquot *dquot)
{
return find_tree_dqentry(info, dquot, QT_TREEOFF, 0);
}
int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
{
int type = dquot->dq_type;
struct super_block *sb = dquot->dq_sb;
loff_t offset;
dqbuf_t ddquot;
int ret = 0;
#ifdef __QUOTA_QT_PARANOIA
/* Invalidated quota? */
if (!sb_dqopt(dquot->dq_sb)->files[type]) {
printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
return -EIO;
}
#endif
/* Do we know offset of the dquot entry in the quota file? */
if (!dquot->dq_off) {
offset = find_dqentry(info, dquot);
if (offset <= 0) { /* Entry not present? */
if (offset < 0)
printk(KERN_ERR "VFS: Can't read quota "
"structure for id %u.\n", dquot->dq_id);
dquot->dq_off = 0;
set_bit(DQ_FAKE_B, &dquot->dq_flags);
memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
ret = offset;
goto out;
}
dquot->dq_off = offset;
}
ddquot = getdqbuf(info->dqi_entry_size);
if (!ddquot)
return -ENOMEM;
ret = sb->s_op->quota_read(sb, type, (char *)ddquot,
info->dqi_entry_size, dquot->dq_off);
if (ret != info->dqi_entry_size) {
if (ret >= 0)
ret = -EIO;
printk(KERN_ERR "VFS: Error while reading quota "
"structure for id %u.\n", dquot->dq_id);
set_bit(DQ_FAKE_B, &dquot->dq_flags);
memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
freedqbuf(ddquot);
goto out;
}
spin_lock(&dq_data_lock);
info->dqi_ops->disk2mem_dqblk(dquot, ddquot);
if (!dquot->dq_dqb.dqb_bhardlimit &&
!dquot->dq_dqb.dqb_bsoftlimit &&
!dquot->dq_dqb.dqb_ihardlimit &&
!dquot->dq_dqb.dqb_isoftlimit)
set_bit(DQ_FAKE_B, &dquot->dq_flags);
spin_unlock(&dq_data_lock);
freedqbuf(ddquot);
out:
dqstats.reads++;
return ret;
}
EXPORT_SYMBOL(qtree_read_dquot);
/* Check whether dquot should not be deleted. We know we are
* the only one operating on dquot (thanks to dq_lock) */
int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
{
if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace))
return qtree_delete_dquot(info, dquot);
return 0;
}
EXPORT_SYMBOL(qtree_release_dquot);

25
fs/quota_tree.h Normal file
View file

@ -0,0 +1,25 @@
/*
* Definitions of structures for vfsv0 quota format
*/
#ifndef _LINUX_QUOTA_TREE_H
#define _LINUX_QUOTA_TREE_H
#include <linux/types.h>
#include <linux/quota.h>
/*
* Structure of header of block with quota structures. It is padded to 16 bytes so
* there will be space for exactly 21 quota-entries in a block
*/
struct qt_disk_dqdbheader {
__le32 dqdh_next_free; /* Number of next block with free entry */
__le32 dqdh_prev_free; /* Number of previous block with free entry */
__le16 dqdh_entries; /* Number of valid entries in block */
__le16 dqdh_pad1;
__le32 dqdh_pad2;
};
#define QT_TREEOFF 1 /* Offset of tree in file in blocks */
#endif /* _LINUX_QUOTAIO_TREE_H */

View file

@ -3,25 +3,39 @@
#include <linux/quota.h>
#include <linux/quotaops.h>
#include <linux/dqblk_v1.h>
#include <linux/quotaio_v1.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <asm/byteorder.h>
#include "quotaio_v1.h"
MODULE_AUTHOR("Jan Kara");
MODULE_DESCRIPTION("Old quota format support");
MODULE_LICENSE("GPL");
#define QUOTABLOCK_BITS 10
#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
static inline qsize_t v1_stoqb(qsize_t space)
{
return (space + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS;
}
static inline qsize_t v1_qbtos(qsize_t blocks)
{
return blocks << QUOTABLOCK_BITS;
}
static void v1_disk2mem_dqblk(struct mem_dqblk *m, struct v1_disk_dqblk *d)
{
m->dqb_ihardlimit = d->dqb_ihardlimit;
m->dqb_isoftlimit = d->dqb_isoftlimit;
m->dqb_curinodes = d->dqb_curinodes;
m->dqb_bhardlimit = d->dqb_bhardlimit;
m->dqb_bsoftlimit = d->dqb_bsoftlimit;
m->dqb_curspace = ((qsize_t)d->dqb_curblocks) << QUOTABLOCK_BITS;
m->dqb_bhardlimit = v1_qbtos(d->dqb_bhardlimit);
m->dqb_bsoftlimit = v1_qbtos(d->dqb_bsoftlimit);
m->dqb_curspace = v1_qbtos(d->dqb_curblocks);
m->dqb_itime = d->dqb_itime;
m->dqb_btime = d->dqb_btime;
}
@ -31,9 +45,9 @@ static void v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m)
d->dqb_ihardlimit = m->dqb_ihardlimit;
d->dqb_isoftlimit = m->dqb_isoftlimit;
d->dqb_curinodes = m->dqb_curinodes;
d->dqb_bhardlimit = m->dqb_bhardlimit;
d->dqb_bsoftlimit = m->dqb_bsoftlimit;
d->dqb_curblocks = toqb(m->dqb_curspace);
d->dqb_bhardlimit = v1_stoqb(m->dqb_bhardlimit);
d->dqb_bsoftlimit = v1_stoqb(m->dqb_bsoftlimit);
d->dqb_curblocks = v1_stoqb(m->dqb_curspace);
d->dqb_itime = m->dqb_itime;
d->dqb_btime = m->dqb_btime;
}

View file

@ -6,7 +6,6 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/dqblk_v2.h>
#include <linux/quotaio_v2.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
@ -15,16 +14,37 @@
#include <asm/byteorder.h>
#include "quota_tree.h"
#include "quotaio_v2.h"
MODULE_AUTHOR("Jan Kara");
MODULE_DESCRIPTION("Quota format v2 support");
MODULE_LICENSE("GPL");
#define __QUOTA_V2_PARANOIA
typedef char *dqbuf_t;
static void v2_mem2diskdqb(void *dp, struct dquot *dquot);
static void v2_disk2memdqb(struct dquot *dquot, void *dp);
static int v2_is_id(void *dp, struct dquot *dquot);
#define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
static struct qtree_fmt_operations v2_qtree_ops = {
.mem2disk_dqblk = v2_mem2diskdqb,
.disk2mem_dqblk = v2_disk2memdqb,
.is_id = v2_is_id,
};
#define QUOTABLOCK_BITS 10
#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
static inline qsize_t v2_stoqb(qsize_t space)
{
return (space + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS;
}
static inline qsize_t v2_qbtos(qsize_t blocks)
{
return blocks << QUOTABLOCK_BITS;
}
/* Check whether given file is really vfsv0 quotafile */
static int v2_check_quota_file(struct super_block *sb, int type)
@ -50,7 +70,8 @@ static int v2_check_quota_file(struct super_block *sb, int type)
static int v2_read_file_info(struct super_block *sb, int type)
{
struct v2_disk_dqinfo dinfo;
struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
struct mem_dqinfo *info = sb_dqinfo(sb, type);
struct qtree_mem_dqinfo *qinfo;
ssize_t size;
size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
@ -60,15 +81,29 @@ static int v2_read_file_info(struct super_block *sb, int type)
sb->s_id);
return -1;
}
info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS);
if (!info->dqi_priv) {
printk(KERN_WARNING
"Not enough memory for quota information structure.\n");
return -1;
}
qinfo = info->dqi_priv;
/* limits are stored as unsigned 32-bit data */
info->dqi_maxblimit = 0xffffffff;
info->dqi_maxilimit = 0xffffffff;
info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
qinfo->dqi_sb = sb;
qinfo->dqi_type = type;
qinfo->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
qinfo->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
qinfo->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
qinfo->dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
qinfo->dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
qinfo->dqi_qtree_depth = qtree_depth(qinfo);
qinfo->dqi_entry_size = sizeof(struct v2_disk_dqblk);
qinfo->dqi_ops = &v2_qtree_ops;
return 0;
}
@ -76,7 +111,8 @@ static int v2_read_file_info(struct super_block *sb, int type)
static int v2_write_file_info(struct super_block *sb, int type)
{
struct v2_disk_dqinfo dinfo;
struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
struct mem_dqinfo *info = sb_dqinfo(sb, type);
struct qtree_mem_dqinfo *qinfo = info->dqi_priv;
ssize_t size;
spin_lock(&dq_data_lock);
@ -85,9 +121,9 @@ static int v2_write_file_info(struct super_block *sb, int type)
dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
spin_unlock(&dq_data_lock);
dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks);
dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk);
dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry);
dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks);
dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk);
dinfo.dqi_free_entry = cpu_to_le32(qinfo->dqi_free_entry);
size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
if (size != sizeof(struct v2_disk_dqinfo)) {
@ -98,574 +134,75 @@ static int v2_write_file_info(struct super_block *sb, int type)
return 0;
}
static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
static void v2_disk2memdqb(struct dquot *dquot, void *dp)
{
struct v2_disk_dqblk *d = dp, empty;
struct mem_dqblk *m = &dquot->dq_dqb;
m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
m->dqb_itime = le64_to_cpu(d->dqb_itime);
m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
m->dqb_bhardlimit = v2_qbtos(le32_to_cpu(d->dqb_bhardlimit));
m->dqb_bsoftlimit = v2_qbtos(le32_to_cpu(d->dqb_bsoftlimit));
m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
m->dqb_btime = le64_to_cpu(d->dqb_btime);
/* We need to escape back all-zero structure */
memset(&empty, 0, sizeof(struct v2_disk_dqblk));
empty.dqb_itime = cpu_to_le64(1);
if (!memcmp(&empty, dp, sizeof(struct v2_disk_dqblk)))
m->dqb_itime = 0;
}
static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
{
struct v2_disk_dqblk *d = dp;
struct mem_dqblk *m = &dquot->dq_dqb;
struct qtree_mem_dqinfo *info =
sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
d->dqb_itime = cpu_to_le64(m->dqb_itime);
d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
d->dqb_bhardlimit = cpu_to_le32(v2_stoqb(m->dqb_bhardlimit));
d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit));
d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
d->dqb_btime = cpu_to_le64(m->dqb_btime);
d->dqb_id = cpu_to_le32(id);
d->dqb_id = cpu_to_le32(dquot->dq_id);
if (qtree_entry_unused(info, dp))
d->dqb_itime = cpu_to_le64(1);
}
static dqbuf_t getdqbuf(void)
static int v2_is_id(void *dp, struct dquot *dquot)
{
dqbuf_t buf = kmalloc(V2_DQBLKSIZE, GFP_NOFS);
if (!buf)
printk(KERN_WARNING "VFS: Not enough memory for quota buffers.\n");
return buf;
}
struct v2_disk_dqblk *d = dp;
struct qtree_mem_dqinfo *info =
sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
static inline void freedqbuf(dqbuf_t buf)
{
kfree(buf);
}
static inline ssize_t read_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
{
memset(buf, 0, V2_DQBLKSIZE);
return sb->s_op->quota_read(sb, type, (char *)buf,
V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
}
static inline ssize_t write_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
{
return sb->s_op->quota_write(sb, type, (char *)buf,
V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
}
/* Remove empty block from list and return it */
static int get_free_dqblk(struct super_block *sb, int type)
{
dqbuf_t buf = getdqbuf();
struct mem_dqinfo *info = sb_dqinfo(sb, type);
struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
int ret, blk;
if (!buf)
return -ENOMEM;
if (info->u.v2_i.dqi_free_blk) {
blk = info->u.v2_i.dqi_free_blk;
if ((ret = read_blk(sb, type, blk, buf)) < 0)
goto out_buf;
info->u.v2_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
}
else {
memset(buf, 0, V2_DQBLKSIZE);
/* Assure block allocation... */
if ((ret = write_blk(sb, type, info->u.v2_i.dqi_blocks, buf)) < 0)
goto out_buf;
blk = info->u.v2_i.dqi_blocks++;
}
mark_info_dirty(sb, type);
ret = blk;
out_buf:
freedqbuf(buf);
return ret;
}
/* Insert empty block to the list */
static int put_free_dqblk(struct super_block *sb, int type, dqbuf_t buf, uint blk)
{
struct mem_dqinfo *info = sb_dqinfo(sb, type);
struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
int err;
dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_blk);
dh->dqdh_prev_free = cpu_to_le32(0);
dh->dqdh_entries = cpu_to_le16(0);
info->u.v2_i.dqi_free_blk = blk;
mark_info_dirty(sb, type);
/* Some strange block. We had better leave it... */
if ((err = write_blk(sb, type, blk, buf)) < 0)
return err;
return 0;
}
/* Remove given block from the list of blocks with free entries */
static int remove_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
{
dqbuf_t tmpbuf = getdqbuf();
struct mem_dqinfo *info = sb_dqinfo(sb, type);
struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free);
int err;
if (!tmpbuf)
return -ENOMEM;
if (nextblk) {
if ((err = read_blk(sb, type, nextblk, tmpbuf)) < 0)
goto out_buf;
((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free;
if ((err = write_blk(sb, type, nextblk, tmpbuf)) < 0)
goto out_buf;
}
if (prevblk) {
if ((err = read_blk(sb, type, prevblk, tmpbuf)) < 0)
goto out_buf;
((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free;
if ((err = write_blk(sb, type, prevblk, tmpbuf)) < 0)
goto out_buf;
}
else {
info->u.v2_i.dqi_free_entry = nextblk;
mark_info_dirty(sb, type);
}
freedqbuf(tmpbuf);
dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
/* No matter whether write succeeds block is out of list */
if (write_blk(sb, type, blk, buf) < 0)
printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
return 0;
out_buf:
freedqbuf(tmpbuf);
return err;
}
/* Insert given block to the beginning of list with free entries */
static int insert_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
{
dqbuf_t tmpbuf = getdqbuf();
struct mem_dqinfo *info = sb_dqinfo(sb, type);
struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
int err;
if (!tmpbuf)
return -ENOMEM;
dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_entry);
dh->dqdh_prev_free = cpu_to_le32(0);
if ((err = write_blk(sb, type, blk, buf)) < 0)
goto out_buf;
if (info->u.v2_i.dqi_free_entry) {
if ((err = read_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
goto out_buf;
((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk);
if ((err = write_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
goto out_buf;
}
freedqbuf(tmpbuf);
info->u.v2_i.dqi_free_entry = blk;
mark_info_dirty(sb, type);
return 0;
out_buf:
freedqbuf(tmpbuf);
return err;
}
/* Find space for dquot */
static uint find_free_dqentry(struct dquot *dquot, int *err)
{
struct super_block *sb = dquot->dq_sb;
struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
uint blk, i;
struct v2_disk_dqdbheader *dh;
struct v2_disk_dqblk *ddquot;
struct v2_disk_dqblk fakedquot;
dqbuf_t buf;
*err = 0;
if (!(buf = getdqbuf())) {
*err = -ENOMEM;
if (qtree_entry_unused(info, dp))
return 0;
}
dh = (struct v2_disk_dqdbheader *)buf;
ddquot = GETENTRIES(buf);
if (info->u.v2_i.dqi_free_entry) {
blk = info->u.v2_i.dqi_free_entry;
if ((*err = read_blk(sb, dquot->dq_type, blk, buf)) < 0)
goto out_buf;
}
else {
blk = get_free_dqblk(sb, dquot->dq_type);
if ((int)blk < 0) {
*err = blk;
freedqbuf(buf);
return 0;
}
memset(buf, 0, V2_DQBLKSIZE);
/* This is enough as block is already zeroed and entry list is empty... */
info->u.v2_i.dqi_free_entry = blk;
mark_info_dirty(sb, dquot->dq_type);
}
if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK) /* Block will be full? */
if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
goto out_buf;
}
le16_add_cpu(&dh->dqdh_entries, 1);
memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
/* Find free structure in block */
for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
#ifdef __QUOTA_V2_PARANOIA
if (i == V2_DQSTRINBLK) {
printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
*err = -EIO;
goto out_buf;
}
#endif
if ((*err = write_blk(sb, dquot->dq_type, blk, buf)) < 0) {
printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
goto out_buf;
}
dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
freedqbuf(buf);
return blk;
out_buf:
freedqbuf(buf);
return 0;
}
/* Insert reference to structure into the trie */
static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth)
{
struct super_block *sb = dquot->dq_sb;
dqbuf_t buf;
int ret = 0, newson = 0, newact = 0;
__le32 *ref;
uint newblk;
if (!(buf = getdqbuf()))
return -ENOMEM;
if (!*treeblk) {
ret = get_free_dqblk(sb, dquot->dq_type);
if (ret < 0)
goto out_buf;
*treeblk = ret;
memset(buf, 0, V2_DQBLKSIZE);
newact = 1;
}
else {
if ((ret = read_blk(sb, dquot->dq_type, *treeblk, buf)) < 0) {
printk(KERN_ERR "VFS: Can't read tree quota block %u.\n", *treeblk);
goto out_buf;
}
}
ref = (__le32 *)buf;
newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
if (!newblk)
newson = 1;
if (depth == V2_DQTREEDEPTH-1) {
#ifdef __QUOTA_V2_PARANOIA
if (newblk) {
printk(KERN_ERR "VFS: Inserting already present quota entry (block %u).\n", le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]));
ret = -EIO;
goto out_buf;
}
#endif
newblk = find_free_dqentry(dquot, &ret);
}
else
ret = do_insert_tree(dquot, &newblk, depth+1);
if (newson && ret >= 0) {
ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
ret = write_blk(sb, dquot->dq_type, *treeblk, buf);
}
else if (newact && ret < 0)
put_free_dqblk(sb, dquot->dq_type, buf, *treeblk);
out_buf:
freedqbuf(buf);
return ret;
}
/* Wrapper for inserting quota structure into tree */
static inline int dq_insert_tree(struct dquot *dquot)
{
int tmp = V2_DQTREEOFF;
return do_insert_tree(dquot, &tmp, 0);
}
/*
* We don't have to be afraid of deadlocks as we never have quotas on quota files...
*/
static int v2_write_dquot(struct dquot *dquot)
{
int type = dquot->dq_type;
ssize_t ret;
struct v2_disk_dqblk ddquot, empty;
/* dq_off is guarded by dqio_mutex */
if (!dquot->dq_off)
if ((ret = dq_insert_tree(dquot)) < 0) {
printk(KERN_ERR "VFS: Error %zd occurred while creating quota.\n", ret);
return ret;
}
spin_lock(&dq_data_lock);
mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
/* Argh... We may need to write structure full of zeroes but that would be
* treated as an empty place by the rest of the code. Format change would
* be definitely cleaner but the problems probably are not worth it */
memset(&empty, 0, sizeof(struct v2_disk_dqblk));
if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
ddquot.dqb_itime = cpu_to_le64(1);
spin_unlock(&dq_data_lock);
ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
(char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
if (ret != sizeof(struct v2_disk_dqblk)) {
printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
if (ret >= 0)
ret = -ENOSPC;
}
else
ret = 0;
dqstats.writes++;
return ret;
}
/* Free dquot entry in data block */
static int free_dqentry(struct dquot *dquot, uint blk)
{
struct super_block *sb = dquot->dq_sb;
int type = dquot->dq_type;
struct v2_disk_dqdbheader *dh;
dqbuf_t buf = getdqbuf();
int ret = 0;
if (!buf)
return -ENOMEM;
if (dquot->dq_off >> V2_DQBLKSIZE_BITS != blk) {
printk(KERN_ERR "VFS: Quota structure has offset to other "
"block (%u) than it should (%u).\n", blk,
(uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS));
goto out_buf;
}
if ((ret = read_blk(sb, type, blk, buf)) < 0) {
printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
goto out_buf;
}
dh = (struct v2_disk_dqdbheader *)buf;
le16_add_cpu(&dh->dqdh_entries, -1);
if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */
if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
(ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
printk(KERN_ERR "VFS: Can't move quota data block (%u) "
"to free list.\n", blk);
goto out_buf;
}
}
else {
memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
sizeof(struct v2_disk_dqblk));
if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
/* Insert will write block itself */
if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
goto out_buf;
}
}
else
if ((ret = write_blk(sb, type, blk, buf)) < 0) {
printk(KERN_ERR "VFS: Can't write quota data "
"block %u\n", blk);
goto out_buf;
}
}
dquot->dq_off = 0; /* Quota is now unattached */
out_buf:
freedqbuf(buf);
return ret;
}
/* Remove reference to dquot from tree */
static int remove_tree(struct dquot *dquot, uint *blk, int depth)
{
struct super_block *sb = dquot->dq_sb;
int type = dquot->dq_type;
dqbuf_t buf = getdqbuf();
int ret = 0;
uint newblk;
__le32 *ref = (__le32 *)buf;
if (!buf)
return -ENOMEM;
if ((ret = read_blk(sb, type, *blk, buf)) < 0) {
printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
goto out_buf;
}
newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
if (depth == V2_DQTREEDEPTH-1) {
ret = free_dqentry(dquot, newblk);
newblk = 0;
}
else
ret = remove_tree(dquot, &newblk, depth+1);
if (ret >= 0 && !newblk) {
int i;
ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++); /* Block got empty? */
/* Don't put the root block into the free block list */
if (i == V2_DQBLKSIZE && *blk != V2_DQTREEOFF) {
put_free_dqblk(sb, type, buf, *blk);
*blk = 0;
}
else
if ((ret = write_blk(sb, type, *blk, buf)) < 0)
printk(KERN_ERR "VFS: Can't write quota tree "
"block %u.\n", *blk);
}
out_buf:
freedqbuf(buf);
return ret;
}
/* Delete dquot from tree */
static int v2_delete_dquot(struct dquot *dquot)
{
uint tmp = V2_DQTREEOFF;
if (!dquot->dq_off) /* Even not allocated? */
return 0;
return remove_tree(dquot, &tmp, 0);
}
/* Find entry in block */
static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
{
dqbuf_t buf = getdqbuf();
loff_t ret = 0;
int i;
struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
if (!buf)
return -ENOMEM;
if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
goto out_buf;
}
if (dquot->dq_id)
for (i = 0; i < V2_DQSTRINBLK &&
le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
else { /* ID 0 as a bit more complicated searching... */
struct v2_disk_dqblk fakedquot;
memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
for (i = 0; i < V2_DQSTRINBLK; i++)
if (!le32_to_cpu(ddquot[i].dqb_id) &&
memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
break;
}
if (i == V2_DQSTRINBLK) {
printk(KERN_ERR "VFS: Quota for id %u referenced "
"but not present.\n", dquot->dq_id);
ret = -EIO;
goto out_buf;
}
else
ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
out_buf:
freedqbuf(buf);
return ret;
}
/* Find entry for given id in the tree */
static loff_t find_tree_dqentry(struct dquot *dquot, uint blk, int depth)
{
dqbuf_t buf = getdqbuf();
loff_t ret = 0;
__le32 *ref = (__le32 *)buf;
if (!buf)
return -ENOMEM;
if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
goto out_buf;
}
ret = 0;
blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
if (!blk) /* No reference? */
goto out_buf;
if (depth < V2_DQTREEDEPTH-1)
ret = find_tree_dqentry(dquot, blk, depth+1);
else
ret = find_block_dqentry(dquot, blk);
out_buf:
freedqbuf(buf);
return ret;
}
/* Find entry for given id in the tree - wrapper function */
static inline loff_t find_dqentry(struct dquot *dquot)
{
return find_tree_dqentry(dquot, V2_DQTREEOFF, 0);
return le32_to_cpu(d->dqb_id) == dquot->dq_id;
}
static int v2_read_dquot(struct dquot *dquot)
{
int type = dquot->dq_type;
loff_t offset;
struct v2_disk_dqblk ddquot, empty;
int ret = 0;
#ifdef __QUOTA_V2_PARANOIA
/* Invalidated quota? */
if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
return -EIO;
}
#endif
offset = find_dqentry(dquot);
if (offset <= 0) { /* Entry not present? */
if (offset < 0)
printk(KERN_ERR "VFS: Can't read quota "
"structure for id %u.\n", dquot->dq_id);
dquot->dq_off = 0;
set_bit(DQ_FAKE_B, &dquot->dq_flags);
memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
ret = offset;
}
else {
dquot->dq_off = offset;
if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
(char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
!= sizeof(struct v2_disk_dqblk)) {
if (ret >= 0)
ret = -EIO;
printk(KERN_ERR "VFS: Error while reading quota "
"structure for id %u.\n", dquot->dq_id);
memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
}
else {
ret = 0;
/* We need to escape back all-zero structure */
memset(&empty, 0, sizeof(struct v2_disk_dqblk));
empty.dqb_itime = cpu_to_le64(1);
if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
ddquot.dqb_itime = 0;
}
disk2memdqb(&dquot->dq_dqb, &ddquot);
if (!dquot->dq_dqb.dqb_bhardlimit &&
!dquot->dq_dqb.dqb_bsoftlimit &&
!dquot->dq_dqb.dqb_ihardlimit &&
!dquot->dq_dqb.dqb_isoftlimit)
set_bit(DQ_FAKE_B, &dquot->dq_flags);
}
dqstats.reads++;
return ret;
return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
}
static int v2_write_dquot(struct dquot *dquot)
{
return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
}
/* Check whether dquot should not be deleted. We know we are
* the only one operating on dquot (thanks to dq_lock) */
static int v2_release_dquot(struct dquot *dquot)
{
if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace))
return v2_delete_dquot(dquot);
return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
}
static int v2_free_file_info(struct super_block *sb, int type)
{
kfree(sb_dqinfo(sb, type)->dqi_priv);
return 0;
}
@ -673,7 +210,7 @@ static struct quota_format_ops v2_format_ops = {
.check_quota_file = v2_check_quota_file,
.read_file_info = v2_read_file_info,
.write_file_info = v2_write_file_info,
.free_file_info = NULL,
.free_file_info = v2_free_file_info,
.read_dqblk = v2_read_dquot,
.commit_dqblk = v2_write_dquot,
.release_dqblk = v2_release_dquot,

View file

@ -21,6 +21,12 @@
0 /* GRPQUOTA */\
}
/* First generic header */
struct v2_disk_dqheader {
__le32 dqh_magic; /* Magic number identifying file */
__le32 dqh_version; /* File version */
};
/*
* The following structure defines the format of the disk quota file
* (as it appears on disk) - the file is a radix tree whose leaves point
@ -38,15 +44,6 @@ struct v2_disk_dqblk {
__le64 dqb_itime; /* time limit for excessive inode use */
};
/*
* Here are header structures as written on disk and their in-memory copies
*/
/* First generic header */
struct v2_disk_dqheader {
__le32 dqh_magic; /* Magic number identifying file */
__le32 dqh_version; /* File version */
};
/* Header with type and version specific information */
struct v2_disk_dqinfo {
__le32 dqi_bgrace; /* Time before block soft limit becomes hard limit */
@ -57,23 +54,7 @@ struct v2_disk_dqinfo {
__le32 dqi_free_entry; /* Number of block with at least one free entry */
};
/*
* Structure of header of block with quota structures. It is padded to 16 bytes so
* there will be space for exactly 21 quota-entries in a block
*/
struct v2_disk_dqdbheader {
__le32 dqdh_next_free; /* Number of next block with free entry */
__le32 dqdh_prev_free; /* Number of previous block with free entry */
__le16 dqdh_entries; /* Number of valid entries in block */
__le16 dqdh_pad1;
__le32 dqdh_pad2;
};
#define V2_DQINFOOFF sizeof(struct v2_disk_dqheader) /* Offset of info header in file */
#define V2_DQBLKSIZE_BITS 10
#define V2_DQBLKSIZE (1 << V2_DQBLKSIZE_BITS) /* Size of block with quota structures */
#define V2_DQTREEOFF 1 /* Offset of tree in file in blocks */
#define V2_DQTREEDEPTH 4 /* Depth of quota tree */
#define V2_DQSTRINBLK ((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk)) /* Number of entries in one blocks */
#define V2_DQBLKSIZE_BITS 10 /* Size of leaf block in tree */
#endif /* _LINUX_QUOTAIO_V2_H */

View file

@ -649,6 +649,8 @@ static struct dquot_operations reiserfs_quota_operations = {
.release_dquot = reiserfs_release_dquot,
.mark_dirty = reiserfs_mark_dquot_dirty,
.write_info = reiserfs_write_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
};
static struct quotactl_ops reiserfs_qctl_operations = {
@ -994,8 +996,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
if (c == 'u' || c == 'g') {
int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
if ((sb_any_quota_enabled(s) ||
sb_any_quota_suspended(s)) &&
if (sb_any_quota_loaded(s) &&
(!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
reiserfs_warning(s,
"reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
@ -1041,8 +1042,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
"reiserfs_parse_options: unknown quota format specified.");
return 0;
}
if ((sb_any_quota_enabled(s) ||
sb_any_quota_suspended(s)) &&
if (sb_any_quota_loaded(s) &&
*qfmt != REISERFS_SB(s)->s_jquota_fmt) {
reiserfs_warning(s,
"reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
@ -1067,7 +1067,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
}
/* This checking is not precise wrt the quota type but for our purposes it is sufficient */
if (!(*mount_options & (1 << REISERFS_QUOTA))
&& sb_any_quota_enabled(s)) {
&& sb_any_quota_loaded(s)) {
reiserfs_warning(s,
"reiserfs_parse_options: quota options must be present when quota is turned on.");
return 0;

View file

@ -56,8 +56,6 @@ header-y += dlm_device.h
header-y += dlm_netlink.h
header-y += dm-ioctl.h
header-y += dn.h
header-y += dqblk_v1.h
header-y += dqblk_v2.h
header-y += dqblk_xfs.h
header-y += efs_fs_sb.h
header-y += elf-fdpic.h
@ -134,8 +132,6 @@ header-y += posix_types.h
header-y += ppdev.h
header-y += prctl.h
header-y += qnxtypes.h
header-y += quotaio_v1.h
header-y += quotaio_v2.h
header-y += radeonfb.h
header-y += raw.h
header-y += resource.h

View file

@ -0,0 +1,56 @@
/*
* Definitions of structures and functions for quota formats using trie
*/
#ifndef _LINUX_DQBLK_QTREE_H
#define _LINUX_DQBLK_QTREE_H
#include <linux/types.h>
/* Numbers of blocks needed for updates - we count with the smallest
* possible block size (1024) */
#define QTREE_INIT_ALLOC 4
#define QTREE_INIT_REWRITE 2
#define QTREE_DEL_ALLOC 0
#define QTREE_DEL_REWRITE 6
struct dquot;
/* Operations */
struct qtree_fmt_operations {
void (*mem2disk_dqblk)(void *disk, struct dquot *dquot); /* Convert given entry from in memory format to disk one */
void (*disk2mem_dqblk)(struct dquot *dquot, void *disk); /* Convert given entry from disk format to in memory one */
int (*is_id)(void *disk, struct dquot *dquot); /* Is this structure for given id? */
};
/* Inmemory copy of version specific information */
struct qtree_mem_dqinfo {
struct super_block *dqi_sb; /* Sb quota is on */
int dqi_type; /* Quota type */
unsigned int dqi_blocks; /* # of blocks in quota file */
unsigned int dqi_free_blk; /* First block in list of free blocks */
unsigned int dqi_free_entry; /* First block with free entry */
unsigned int dqi_blocksize_bits; /* Block size of quota file */
unsigned int dqi_entry_size; /* Size of quota entry in quota file */
unsigned int dqi_usable_bs; /* Space usable in block for quota data */
unsigned int dqi_qtree_depth; /* Precomputed depth of quota tree */
struct qtree_fmt_operations *dqi_ops; /* Operations for entry manipulation */
};
int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk);
static inline int qtree_depth(struct qtree_mem_dqinfo *info)
{
unsigned int epb = info->dqi_usable_bs >> 2;
unsigned long long entries = epb;
int i;
for (i = 1; entries < (1ULL << 32); i++)
entries *= epb;
return i;
}
#endif /* _LINUX_DQBLK_QTREE_H */

View file

@ -5,9 +5,6 @@
#ifndef _LINUX_DQBLK_V1_H
#define _LINUX_DQBLK_V1_H
/* Id of quota format */
#define QFMT_VFS_OLD 1
/* Root squash turned on */
#define V1_DQF_RSQUASH 1
@ -17,8 +14,4 @@
#define V1_DEL_ALLOC 0
#define V1_DEL_REWRITE 2
/* Special information about quotafile */
struct v1_mem_dqinfo {
};
#endif /* _LINUX_DQBLK_V1_H */

View file

@ -1,26 +1,16 @@
/*
* Definitions of structures for vfsv0 quota format
* Definitions for vfsv0 quota format
*/
#ifndef _LINUX_DQBLK_V2_H
#define _LINUX_DQBLK_V2_H
#include <linux/types.h>
/* id numbers of quota format */
#define QFMT_VFS_V0 2
#include <linux/dqblk_qtree.h>
/* Numbers of blocks needed for updates */
#define V2_INIT_ALLOC 4
#define V2_INIT_REWRITE 2
#define V2_DEL_ALLOC 0
#define V2_DEL_REWRITE 6
/* Inmemory copy of version specific information */
struct v2_mem_dqinfo {
unsigned int dqi_blocks;
unsigned int dqi_free_blk;
unsigned int dqi_free_entry;
};
#define V2_INIT_ALLOC QTREE_INIT_ALLOC
#define V2_INIT_REWRITE QTREE_INIT_REWRITE
#define V2_DEL_ALLOC QTREE_DEL_ALLOC
#define V2_DEL_REWRITE QTREE_DEL_REWRITE
#endif /* _LINUX_DQBLK_V2_H */

View file

@ -329,6 +329,7 @@ enum jbd_state_bits {
BH_State, /* Pins most journal_head state */
BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */
BH_JBDPrivateStart, /* First bit available for private use by FS */
};
BUFFER_FNS(JBD, jbd)
@ -1007,6 +1008,35 @@ int __jbd2_journal_clean_checkpoint_list(journal_t *journal);
int __jbd2_journal_remove_checkpoint(struct journal_head *);
void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
/*
* Triggers
*/
struct jbd2_buffer_trigger_type {
/*
* Fired just before a buffer is written to the journal.
* mapped_data is a mapped buffer that is the frozen data for
* commit.
*/
void (*t_commit)(struct jbd2_buffer_trigger_type *type,
struct buffer_head *bh, void *mapped_data,
size_t size);
/*
* Fired during journal abort for dirty buffers that will not be
* committed.
*/
void (*t_abort)(struct jbd2_buffer_trigger_type *type,
struct buffer_head *bh);
};
extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
void *mapped_data,
struct jbd2_buffer_trigger_type *triggers);
extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
struct jbd2_buffer_trigger_type *triggers);
/* Buffer IO */
extern int
jbd2_journal_write_metadata_buffer(transaction_t *transaction,
@ -1045,6 +1075,8 @@ extern int jbd2_journal_extend (handle_t *, int nblocks);
extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
void jbd2_journal_set_triggers(struct buffer_head *,
struct jbd2_buffer_trigger_type *type);
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
extern void jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
extern int jbd2_journal_forget (handle_t *, struct buffer_head *);

View file

@ -12,6 +12,8 @@
typedef unsigned int tid_t; /* Unique transaction ID */
typedef struct transaction_s transaction_t; /* Compound transaction type */
struct buffer_head;
struct journal_head {
@ -87,6 +89,12 @@ struct journal_head {
* [j_list_lock]
*/
struct journal_head *b_cpnext, *b_cpprev;
/* Trigger type */
struct jbd2_buffer_trigger_type *b_triggers;
/* Trigger type for the committing transaction's frozen data */
struct jbd2_buffer_trigger_type *b_frozen_triggers;
};
#endif /* JOURNAL_HEAD_H_INCLUDED */

View file

@ -36,17 +36,7 @@
#include <linux/errno.h>
#include <linux/types.h>
#define __DQUOT_VERSION__ "dquot_6.5.1"
#define __DQUOT_NUM_VERSION__ 6*10000+5*100+1
/* Size of blocks in which are counted size limits */
#define QUOTABLOCK_BITS 10
#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
/* Conversion routines from and to quota blocks */
#define qb2kb(x) ((x) << (QUOTABLOCK_BITS-10))
#define kb2qb(x) ((x) >> (QUOTABLOCK_BITS-10))
#define toqb(x) (((x) + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS)
#define __DQUOT_VERSION__ "dquot_6.5.2"
#define MAXQUOTAS 2
#define USRQUOTA 0 /* element used for user quotas */
@ -80,16 +70,34 @@
#define Q_GETQUOTA 0x800007 /* get user quota structure */
#define Q_SETQUOTA 0x800008 /* set user quota structure */
/* Quota format type IDs */
#define QFMT_VFS_OLD 1
#define QFMT_VFS_V0 2
/* Size of block in which space limits are passed through the quota
* interface */
#define QIF_DQBLKSIZE_BITS 10
#define QIF_DQBLKSIZE (1 << QIF_DQBLKSIZE_BITS)
/*
* Quota structure used for communication with userspace via quotactl
* Following flags are used to specify which fields are valid
*/
#define QIF_BLIMITS 1
#define QIF_SPACE 2
#define QIF_ILIMITS 4
#define QIF_INODES 8
#define QIF_BTIME 16
#define QIF_ITIME 32
enum {
QIF_BLIMITS_B = 0,
QIF_SPACE_B,
QIF_ILIMITS_B,
QIF_INODES_B,
QIF_BTIME_B,
QIF_ITIME_B,
};
#define QIF_BLIMITS (1 << QIF_BLIMITS_B)
#define QIF_SPACE (1 << QIF_SPACE_B)
#define QIF_ILIMITS (1 << QIF_ILIMITS_B)
#define QIF_INODES (1 << QIF_INODES_B)
#define QIF_BTIME (1 << QIF_BTIME_B)
#define QIF_ITIME (1 << QIF_ITIME_B)
#define QIF_LIMITS (QIF_BLIMITS | QIF_ILIMITS)
#define QIF_USAGE (QIF_SPACE | QIF_INODES)
#define QIF_TIMES (QIF_BTIME | QIF_ITIME)
@ -172,7 +180,7 @@ enum {
#include <asm/atomic.h>
typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
typedef __u64 qsize_t; /* Type in which we store sizes */
typedef long long qsize_t; /* Type in which we store sizes */
extern spinlock_t dq_data_lock;
@ -187,12 +195,12 @@ extern spinlock_t dq_data_lock;
* Data for one user/group kept in memory
*/
struct mem_dqblk {
__u32 dqb_bhardlimit; /* absolute limit on disk blks alloc */
__u32 dqb_bsoftlimit; /* preferred limit on disk blks */
qsize_t dqb_bhardlimit; /* absolute limit on disk blks alloc */
qsize_t dqb_bsoftlimit; /* preferred limit on disk blks */
qsize_t dqb_curspace; /* current used space */
__u32 dqb_ihardlimit; /* absolute limit on allocated inodes */
__u32 dqb_isoftlimit; /* preferred inode limit */
__u32 dqb_curinodes; /* current # allocated inodes */
qsize_t dqb_ihardlimit; /* absolute limit on allocated inodes */
qsize_t dqb_isoftlimit; /* preferred inode limit */
qsize_t dqb_curinodes; /* current # allocated inodes */
time_t dqb_btime; /* time limit for excessive disk use */
time_t dqb_itime; /* time limit for excessive inode use */
};
@ -212,10 +220,7 @@ struct mem_dqinfo {
unsigned int dqi_igrace;
qsize_t dqi_maxblimit;
qsize_t dqi_maxilimit;
union {
struct v1_mem_dqinfo v1_i;
struct v2_mem_dqinfo v2_i;
} u;
void *dqi_priv;
};
struct super_block;
@ -249,6 +254,11 @@ extern struct dqstats dqstats;
#define DQ_FAKE_B 3 /* no limits only usage */
#define DQ_READ_B 4 /* dquot was read into memory */
#define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */
#define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\
* for the mask of entries set via SETQUOTA\
* quotactl. They are set under dq_data_lock\
* and the quota format handling dquot can\
* clear them when it sees fit. */
struct dquot {
struct hlist_node dq_hash; /* Hash list in memory */
@ -287,11 +297,13 @@ struct dquot_operations {
int (*initialize) (struct inode *, int);
int (*drop) (struct inode *);
int (*alloc_space) (struct inode *, qsize_t, int);
int (*alloc_inode) (const struct inode *, unsigned long);
int (*alloc_inode) (const struct inode *, qsize_t);
int (*free_space) (struct inode *, qsize_t);
int (*free_inode) (const struct inode *, unsigned long);
int (*free_inode) (const struct inode *, qsize_t);
int (*transfer) (struct inode *, struct iattr *);
int (*write_dquot) (struct dquot *); /* Ordinary dquot write */
struct dquot *(*alloc_dquot)(struct super_block *, int); /* Allocate memory for new dquot */
void (*destroy_dquot)(struct dquot *); /* Free memory for dquot */
int (*acquire_dquot) (struct dquot *); /* Quota is going to be created on disk */
int (*release_dquot) (struct dquot *); /* Quota is going to be deleted from disk */
int (*mark_dirty) (struct dquot *); /* Dquot is marked dirty */
@ -320,12 +332,42 @@ struct quota_format_type {
struct quota_format_type *qf_next;
};
#define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */
#define DQUOT_GRP_ENABLED 0x02 /* Group diskquotas enabled */
#define DQUOT_USR_SUSPENDED 0x04 /* User diskquotas are off, but
/* Quota state flags - they actually come in two flavors - for users and groups */
enum {
_DQUOT_USAGE_ENABLED = 0, /* Track disk usage for users */
_DQUOT_LIMITS_ENABLED, /* Enforce quota limits for users */
_DQUOT_SUSPENDED, /* User diskquotas are off, but
* we have necessary info in
* memory to turn them on */
#define DQUOT_GRP_SUSPENDED 0x08 /* The same for group quotas */
_DQUOT_STATE_FLAGS
};
#define DQUOT_USAGE_ENABLED (1 << _DQUOT_USAGE_ENABLED)
#define DQUOT_LIMITS_ENABLED (1 << _DQUOT_LIMITS_ENABLED)
#define DQUOT_SUSPENDED (1 << _DQUOT_SUSPENDED)
#define DQUOT_STATE_FLAGS (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \
DQUOT_SUSPENDED)
/* Other quota flags */
#define DQUOT_QUOTA_SYS_FILE (1 << 6) /* Quota file is a special
* system file and user cannot
* touch it. Filesystem is
* responsible for setting
* S_NOQUOTA, S_NOATIME flags
*/
#define DQUOT_NEGATIVE_USAGE (1 << 7) /* Allow negative quota usage */
static inline unsigned int dquot_state_flag(unsigned int flags, int type)
{
if (type == USRQUOTA)
return flags;
return flags << _DQUOT_STATE_FLAGS;
}
static inline unsigned int dquot_generic_flag(unsigned int flags, int type)
{
if (type == USRQUOTA)
return flags;
return flags >> _DQUOT_STATE_FLAGS;
}
struct quota_info {
unsigned int flags; /* Flags for diskquotas on this device */

View file

@ -24,12 +24,21 @@ void sync_dquots(struct super_block *sb, int type);
int dquot_initialize(struct inode *inode, int type);
int dquot_drop(struct inode *inode);
int dquot_drop_locked(struct inode *inode);
struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
void dqput(struct dquot *dquot);
int dquot_is_cached(struct super_block *sb, unsigned int id, int type);
int dquot_scan_active(struct super_block *sb,
int (*fn)(struct dquot *dquot, unsigned long priv),
unsigned long priv);
struct dquot *dquot_alloc(struct super_block *sb, int type);
void dquot_destroy(struct dquot *dquot);
int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
int dquot_alloc_inode(const struct inode *inode, unsigned long number);
int dquot_alloc_inode(const struct inode *inode, qsize_t number);
int dquot_free_space(struct inode *inode, qsize_t number);
int dquot_free_inode(const struct inode *inode, unsigned long number);
int dquot_free_inode(const struct inode *inode, qsize_t number);
int dquot_transfer(struct inode *inode, struct iattr *iattr);
int dquot_commit(struct dquot *dquot);
@ -40,11 +49,14 @@ int dquot_mark_dquot_dirty(struct dquot *dquot);
int vfs_quota_on(struct super_block *sb, int type, int format_id,
char *path, int remount);
int vfs_quota_enable(struct inode *inode, int type, int format_id,
unsigned int flags);
int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
struct path *path);
int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
int format_id, int type);
int vfs_quota_off(struct super_block *sb, int type, int remount);
int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags);
int vfs_quota_sync(struct super_block *sb, int type);
int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
@ -64,24 +76,22 @@ static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
* Functions for checking status of quota
*/
static inline int sb_has_quota_enabled(struct super_block *sb, int type)
static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
{
if (type == USRQUOTA)
return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED;
return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED;
return sb_dqopt(sb)->flags &
dquot_state_flag(DQUOT_USAGE_ENABLED, type);
}
static inline int sb_any_quota_enabled(struct super_block *sb)
static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type)
{
return sb_has_quota_enabled(sb, USRQUOTA) ||
sb_has_quota_enabled(sb, GRPQUOTA);
return sb_dqopt(sb)->flags &
dquot_state_flag(DQUOT_LIMITS_ENABLED, type);
}
static inline int sb_has_quota_suspended(struct super_block *sb, int type)
{
if (type == USRQUOTA)
return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED;
return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED;
return sb_dqopt(sb)->flags &
dquot_state_flag(DQUOT_SUSPENDED, type);
}
static inline int sb_any_quota_suspended(struct super_block *sb)
@ -90,6 +100,31 @@ static inline int sb_any_quota_suspended(struct super_block *sb)
sb_has_quota_suspended(sb, GRPQUOTA);
}
/* Does kernel know about any quota information for given sb + type? */
static inline int sb_has_quota_loaded(struct super_block *sb, int type)
{
/* Currently if anything is on, then quota usage is on as well */
return sb_has_quota_usage_enabled(sb, type);
}
static inline int sb_any_quota_loaded(struct super_block *sb)
{
return sb_has_quota_loaded(sb, USRQUOTA) ||
sb_has_quota_loaded(sb, GRPQUOTA);
}
static inline int sb_has_quota_active(struct super_block *sb, int type)
{
return sb_has_quota_loaded(sb, type) &&
!sb_has_quota_suspended(sb, type);
}
static inline int sb_any_quota_active(struct super_block *sb)
{
return sb_has_quota_active(sb, USRQUOTA) ||
sb_has_quota_active(sb, GRPQUOTA);
}
/*
* Operations supported for diskquotas.
*/
@ -104,7 +139,7 @@ extern struct quotactl_ops vfs_quotactl_ops;
static inline void vfs_dq_init(struct inode *inode)
{
BUG_ON(!inode->i_sb);
if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode))
inode->i_sb->dq_op->initialize(inode, -1);
}
@ -112,7 +147,7 @@ static inline void vfs_dq_init(struct inode *inode)
* a transaction (deadlocks possible otherwise) */
static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
{
if (sb_any_quota_enabled(inode->i_sb)) {
if (sb_any_quota_active(inode->i_sb)) {
/* Used space is updated in alloc_space() */
if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
return 1;
@ -132,7 +167,7 @@ static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
{
if (sb_any_quota_enabled(inode->i_sb)) {
if (sb_any_quota_active(inode->i_sb)) {
/* Used space is updated in alloc_space() */
if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA)
return 1;
@ -152,7 +187,7 @@ static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
static inline int vfs_dq_alloc_inode(struct inode *inode)
{
if (sb_any_quota_enabled(inode->i_sb)) {
if (sb_any_quota_active(inode->i_sb)) {
vfs_dq_init(inode);
if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
return 1;
@ -162,7 +197,7 @@ static inline int vfs_dq_alloc_inode(struct inode *inode)
static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
{
if (sb_any_quota_enabled(inode->i_sb))
if (sb_any_quota_active(inode->i_sb))
inode->i_sb->dq_op->free_space(inode, nr);
else
inode_sub_bytes(inode, nr);
@ -176,7 +211,7 @@ static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
static inline void vfs_dq_free_inode(struct inode *inode)
{
if (sb_any_quota_enabled(inode->i_sb))
if (sb_any_quota_active(inode->i_sb))
inode->i_sb->dq_op->free_inode(inode, 1);
}
@ -197,12 +232,12 @@ static inline int vfs_dq_off(struct super_block *sb, int remount)
#else
static inline int sb_has_quota_enabled(struct super_block *sb, int type)
static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
{
return 0;
}
static inline int sb_any_quota_enabled(struct super_block *sb)
static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type)
{
return 0;
}
@ -217,6 +252,27 @@ static inline int sb_any_quota_suspended(struct super_block *sb)
return 0;
}
/* Does kernel know about any quota information for given sb + type? */
static inline int sb_has_quota_loaded(struct super_block *sb, int type)
{
return 0;
}
static inline int sb_any_quota_loaded(struct super_block *sb)
{
return 0;
}
static inline int sb_has_quota_active(struct super_block *sb, int type)
{
return 0;
}
static inline int sb_any_quota_active(struct super_block *sb)
{
return 0;
}
/*
* NO-OP when quota not configured.
*/