[XFS] Introduce two new mount options (nolargeio/largeio) to allow
filesystems to expose the filesystem stripe width in stat(2) rather than the page cache size. This allows applications requiring high bandwidth to easily determine the optimum I/O size for the underlying filesystem. The default is to report the page cache size (i.e. "nolargeio"). SGI-PV: 942818 SGI-Modid: xfs-linux:xfs-kern:23830a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
This commit is contained in:
parent
ee34807a65
commit
e8c8b3a79d
6 changed files with 43 additions and 35 deletions
|
@ -189,7 +189,7 @@ xfs_revalidate_inode(
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
inode->i_blksize = PAGE_CACHE_SIZE;
|
inode->i_blksize = xfs_preferred_iosize(mp);
|
||||||
inode->i_generation = ip->i_d.di_gen;
|
inode->i_generation = ip->i_d.di_gen;
|
||||||
i_size_write(inode, ip->i_d.di_size);
|
i_size_write(inode, ip->i_d.di_size);
|
||||||
inode->i_blocks =
|
inode->i_blocks =
|
||||||
|
|
|
@ -124,6 +124,7 @@ vn_revalidate_core(
|
||||||
inode->i_mtime = vap->va_mtime;
|
inode->i_mtime = vap->va_mtime;
|
||||||
inode->i_ctime = vap->va_ctime;
|
inode->i_ctime = vap->va_ctime;
|
||||||
inode->i_atime = vap->va_atime;
|
inode->i_atime = vap->va_atime;
|
||||||
|
inode->i_blksize = vap->va_blocksize;
|
||||||
if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
|
if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
|
||||||
inode->i_flags |= S_IMMUTABLE;
|
inode->i_flags |= S_IMMUTABLE;
|
||||||
else
|
else
|
||||||
|
|
|
@ -106,5 +106,7 @@ struct xfs_mount_args {
|
||||||
#define XFSMNT_IHASHSIZE 0x20000000 /* inode hash table size */
|
#define XFSMNT_IHASHSIZE 0x20000000 /* inode hash table size */
|
||||||
#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename
|
#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename
|
||||||
* symlink,mkdir,rmdir,mknod */
|
* symlink,mkdir,rmdir,mknod */
|
||||||
|
#define XFSMNT_COMPAT_IOSIZE 0x80000000 /* don't report large preferred
|
||||||
|
* I/O size in stat() */
|
||||||
|
|
||||||
#endif /* __XFS_CLNT_H__ */
|
#endif /* __XFS_CLNT_H__ */
|
||||||
|
|
|
@ -421,6 +421,9 @@ typedef struct xfs_mount {
|
||||||
* allocation */
|
* allocation */
|
||||||
#define XFS_MOUNT_IHASHSIZE 0x00100000 /* inode hash table size */
|
#define XFS_MOUNT_IHASHSIZE 0x00100000 /* inode hash table size */
|
||||||
#define XFS_MOUNT_DIRSYNC 0x00200000 /* synchronous directory ops */
|
#define XFS_MOUNT_DIRSYNC 0x00200000 /* synchronous directory ops */
|
||||||
|
#define XFS_MOUNT_COMPAT_IOSIZE 0x00400000 /* don't report large preferred
|
||||||
|
* I/O size in stat() */
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Default minimum read and write sizes.
|
* Default minimum read and write sizes.
|
||||||
|
@ -442,6 +445,30 @@ typedef struct xfs_mount {
|
||||||
#define XFS_WSYNC_READIO_LOG 15 /* 32K */
|
#define XFS_WSYNC_READIO_LOG 15 /* 32K */
|
||||||
#define XFS_WSYNC_WRITEIO_LOG 14 /* 16K */
|
#define XFS_WSYNC_WRITEIO_LOG 14 /* 16K */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allow large block sizes to be reported to userspace programs if the
|
||||||
|
* "largeio" mount option is used.
|
||||||
|
*
|
||||||
|
* If compatibility mode is specified, simply return the basic unit of caching
|
||||||
|
* so that we don't get inefficient read/modify/write I/O from user apps.
|
||||||
|
* Otherwise....
|
||||||
|
*
|
||||||
|
* If the underlying volume is a stripe, then return the stripe width in bytes
|
||||||
|
* as the recommended I/O size. It is not a stripe and we've set a default
|
||||||
|
* buffered I/O size, return that, otherwise return the compat default.
|
||||||
|
*/
|
||||||
|
static inline unsigned long
|
||||||
|
xfs_preferred_iosize(xfs_mount_t *mp)
|
||||||
|
{
|
||||||
|
if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)
|
||||||
|
return PAGE_CACHE_SIZE;
|
||||||
|
return (mp->m_swidth ?
|
||||||
|
(mp->m_swidth << mp->m_sb.sb_blocklog) :
|
||||||
|
((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ?
|
||||||
|
(1 << (int)MAX(mp->m_readio_log, mp->m_writeio_log)) :
|
||||||
|
PAGE_CACHE_SIZE));
|
||||||
|
}
|
||||||
|
|
||||||
#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset)
|
#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset)
|
||||||
|
|
||||||
#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
|
#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
|
||||||
|
|
|
@ -307,6 +307,9 @@ xfs_start_flags(
|
||||||
if (ap->flags & XFSMNT_DIRSYNC)
|
if (ap->flags & XFSMNT_DIRSYNC)
|
||||||
mp->m_flags |= XFS_MOUNT_DIRSYNC;
|
mp->m_flags |= XFS_MOUNT_DIRSYNC;
|
||||||
|
|
||||||
|
if (ap->flags & XFSMNT_COMPAT_IOSIZE)
|
||||||
|
mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* no recovery flag requires a read-only mount
|
* no recovery flag requires a read-only mount
|
||||||
*/
|
*/
|
||||||
|
@ -1645,6 +1648,9 @@ xfs_vget(
|
||||||
#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
|
#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
|
||||||
#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
|
#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
|
||||||
#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
|
#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
|
||||||
|
#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */
|
||||||
|
#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes
|
||||||
|
* in stat(). */
|
||||||
|
|
||||||
STATIC unsigned long
|
STATIC unsigned long
|
||||||
suffix_strtoul(const char *cp, char **endp, unsigned int base)
|
suffix_strtoul(const char *cp, char **endp, unsigned int base)
|
||||||
|
@ -1681,6 +1687,7 @@ xfs_parseargs(
|
||||||
int dsunit, dswidth, vol_dsunit, vol_dswidth;
|
int dsunit, dswidth, vol_dsunit, vol_dswidth;
|
||||||
int iosize;
|
int iosize;
|
||||||
|
|
||||||
|
args->flags |= XFSMNT_COMPAT_IOSIZE;
|
||||||
#if 0 /* XXX: off by default, until some remaining issues ironed out */
|
#if 0 /* XXX: off by default, until some remaining issues ironed out */
|
||||||
args->flags |= XFSMNT_IDELETE; /* default to on */
|
args->flags |= XFSMNT_IDELETE; /* default to on */
|
||||||
#endif
|
#endif
|
||||||
|
@ -1809,6 +1816,10 @@ xfs_parseargs(
|
||||||
args->flags &= ~XFSMNT_IDELETE;
|
args->flags &= ~XFSMNT_IDELETE;
|
||||||
} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
|
} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
|
||||||
args->flags |= XFSMNT_IDELETE;
|
args->flags |= XFSMNT_IDELETE;
|
||||||
|
} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
|
||||||
|
args->flags &= ~XFSMNT_COMPAT_IOSIZE;
|
||||||
|
} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
|
||||||
|
args->flags |= XFSMNT_COMPAT_IOSIZE;
|
||||||
} else if (!strcmp(this_char, "osyncisdsync")) {
|
} else if (!strcmp(this_char, "osyncisdsync")) {
|
||||||
/* no-op, this is now the default */
|
/* no-op, this is now the default */
|
||||||
printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
|
printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
|
||||||
|
|
|
@ -181,40 +181,7 @@ xfs_getattr(
|
||||||
vap->va_rdev = 0;
|
vap->va_rdev = 0;
|
||||||
|
|
||||||
if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
|
if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
|
||||||
|
vap->va_blocksize = xfs_preferred_iosize(mp);
|
||||||
#if 0
|
|
||||||
/* Large block sizes confuse various
|
|
||||||
* user space programs, so letting the
|
|
||||||
* stripe size through is not a good
|
|
||||||
* idea for now.
|
|
||||||
*/
|
|
||||||
vap->va_blocksize = mp->m_swidth ?
|
|
||||||
/*
|
|
||||||
* If the underlying volume is a stripe, then
|
|
||||||
* return the stripe width in bytes as the
|
|
||||||
* recommended I/O size.
|
|
||||||
*/
|
|
||||||
(mp->m_swidth << mp->m_sb.sb_blocklog) :
|
|
||||||
/*
|
|
||||||
* Return the largest of the preferred buffer
|
|
||||||
* sizes since doing small I/Os into larger
|
|
||||||
* buffers causes buffers to be decommissioned.
|
|
||||||
* The value returned is in bytes.
|
|
||||||
*/
|
|
||||||
(1 << (int)MAX(mp->m_readio_log,
|
|
||||||
mp->m_writeio_log));
|
|
||||||
|
|
||||||
#else
|
|
||||||
vap->va_blocksize =
|
|
||||||
/*
|
|
||||||
* Return the largest of the preferred buffer
|
|
||||||
* sizes since doing small I/Os into larger
|
|
||||||
* buffers causes buffers to be decommissioned.
|
|
||||||
* The value returned is in bytes.
|
|
||||||
*/
|
|
||||||
1 << (int)MAX(mp->m_readio_log,
|
|
||||||
mp->m_writeio_log);
|
|
||||||
#endif
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in a new issue