vfs: use 'unsigned long' accesses for dcache name comparison and hashing
Ok, this is hacky, and only works on little-endian machines with goo unaligned handling. And even then only with CONFIG_DEBUG_PAGEALLOC disabled, since it can access up to 7 bytes after the pathname. But it runs like a bat out of hell. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
9f8050c4f9
commit
bfcfaa77bd
4 changed files with 150 additions and 0 deletions
|
@ -82,6 +82,7 @@ config X86
|
||||||
select CLKEVT_I8253
|
select CLKEVT_I8253
|
||||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||||
select GENERIC_IOMAP
|
select GENERIC_IOMAP
|
||||||
|
select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC
|
||||||
|
|
||||||
config INSTRUCTION_DECODER
|
config INSTRUCTION_DECODER
|
||||||
def_bool (KPROBES || PERF_EVENTS)
|
def_bool (KPROBES || PERF_EVENTS)
|
||||||
|
|
|
@ -4,6 +4,10 @@
|
||||||
|
|
||||||
menu "File systems"
|
menu "File systems"
|
||||||
|
|
||||||
|
# Use unaligned word dcache accesses
|
||||||
|
config DCACHE_WORD_ACCESS
|
||||||
|
bool
|
||||||
|
|
||||||
if BLOCK
|
if BLOCK
|
||||||
|
|
||||||
source "fs/ext2/Kconfig"
|
source "fs/ext2/Kconfig"
|
||||||
|
|
23
fs/dcache.c
23
fs/dcache.c
|
@ -144,6 +144,28 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
|
||||||
static inline int dentry_cmp(const unsigned char *cs, size_t scount,
|
static inline int dentry_cmp(const unsigned char *cs, size_t scount,
|
||||||
const unsigned char *ct, size_t tcount)
|
const unsigned char *ct, size_t tcount)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_DCACHE_WORD_ACCESS
|
||||||
|
unsigned long a,b,mask;
|
||||||
|
|
||||||
|
if (unlikely(scount != tcount))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
a = *(unsigned long *)cs;
|
||||||
|
b = *(unsigned long *)ct;
|
||||||
|
if (tcount < sizeof(unsigned long))
|
||||||
|
break;
|
||||||
|
if (unlikely(a != b))
|
||||||
|
return 1;
|
||||||
|
cs += sizeof(unsigned long);
|
||||||
|
ct += sizeof(unsigned long);
|
||||||
|
tcount -= sizeof(unsigned long);
|
||||||
|
if (!tcount)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
mask = ~(~0ul << tcount*8);
|
||||||
|
return unlikely(!!((a ^ b) & mask));
|
||||||
|
#else
|
||||||
if (scount != tcount)
|
if (scount != tcount)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
@ -155,6 +177,7 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
|
||||||
tcount--;
|
tcount--;
|
||||||
} while (tcount);
|
} while (tcount);
|
||||||
return 0;
|
return 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __d_free(struct rcu_head *head)
|
static void __d_free(struct rcu_head *head)
|
||||||
|
|
122
fs/namei.c
122
fs/namei.c
|
@ -1374,6 +1374,126 @@ static inline int can_lookup(struct inode *inode)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We can do the critical dentry name comparison and hashing
|
||||||
|
* operations one word at a time, but we are limited to:
|
||||||
|
*
|
||||||
|
* - Architectures with fast unaligned word accesses. We could
|
||||||
|
* do a "get_unaligned()" if this helps and is sufficiently
|
||||||
|
* fast.
|
||||||
|
*
|
||||||
|
* - Little-endian machines (so that we can generate the mask
|
||||||
|
* of low bytes efficiently). Again, we *could* do a byte
|
||||||
|
* swapping load on big-endian architectures if that is not
|
||||||
|
* expensive enough to make the optimization worthless.
|
||||||
|
*
|
||||||
|
* - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
|
||||||
|
* do not trap on the (extremely unlikely) case of a page
|
||||||
|
* crossing operation.
|
||||||
|
*
|
||||||
|
* - Furthermore, we need an efficient 64-bit compile for the
|
||||||
|
* 64-bit case in order to generate the "number of bytes in
|
||||||
|
* the final mask". Again, that could be replaced with a
|
||||||
|
* efficient population count instruction or similar.
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_DCACHE_WORD_ACCESS
|
||||||
|
|
||||||
|
#ifdef CONFIG_64BIT
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Jan Achrenius on G+: microoptimized version of
|
||||||
|
* the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
|
||||||
|
* that works for the bytemasks without having to
|
||||||
|
* mask them first.
|
||||||
|
*/
|
||||||
|
static inline long count_masked_bytes(unsigned long mask)
|
||||||
|
{
|
||||||
|
return mask*0x0001020304050608 >> 56;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int fold_hash(unsigned long hash)
|
||||||
|
{
|
||||||
|
hash += hash >> (8*sizeof(int));
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* 32-bit case */
|
||||||
|
|
||||||
|
/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
|
||||||
|
static inline long count_masked_bytes(long mask)
|
||||||
|
{
|
||||||
|
/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
|
||||||
|
long a = (0x0ff0001+mask) >> 23;
|
||||||
|
/* Fix the 1 for 00 case */
|
||||||
|
return a & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define fold_hash(x) (x)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
unsigned int full_name_hash(const unsigned char *name, unsigned int len)
|
||||||
|
{
|
||||||
|
unsigned long a, mask;
|
||||||
|
unsigned long hash = 0;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
a = *(unsigned long *)name;
|
||||||
|
hash *= 9;
|
||||||
|
if (len < sizeof(unsigned long))
|
||||||
|
break;
|
||||||
|
hash += a;
|
||||||
|
name += sizeof(unsigned long);
|
||||||
|
len -= sizeof(unsigned long);
|
||||||
|
if (!len)
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
mask = ~(~0ul << len*8);
|
||||||
|
hash += mask & a;
|
||||||
|
done:
|
||||||
|
return fold_hash(hash);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(full_name_hash);
|
||||||
|
|
||||||
|
#define ONEBYTES 0x0101010101010101ul
|
||||||
|
#define SLASHBYTES 0x2f2f2f2f2f2f2f2ful
|
||||||
|
#define HIGHBITS 0x8080808080808080ul
|
||||||
|
|
||||||
|
/* Return the high bit set in the first byte that is a zero */
|
||||||
|
static inline unsigned long has_zero(unsigned long a)
|
||||||
|
{
|
||||||
|
return ((a - ONEBYTES) & ~a) & HIGHBITS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Calculate the length and hash of the path component, and
|
||||||
|
* return the length of the component;
|
||||||
|
*/
|
||||||
|
static inline unsigned long hash_name(const char *name, unsigned int *hashp)
|
||||||
|
{
|
||||||
|
unsigned long a, mask, hash, len;
|
||||||
|
|
||||||
|
hash = a = 0;
|
||||||
|
len = -sizeof(unsigned long);
|
||||||
|
do {
|
||||||
|
hash = (hash + a) * 9;
|
||||||
|
len += sizeof(unsigned long);
|
||||||
|
a = *(unsigned long *)(name+len);
|
||||||
|
/* Do we have any NUL or '/' bytes in this word? */
|
||||||
|
mask = has_zero(a) | has_zero(a ^ SLASHBYTES);
|
||||||
|
} while (!mask);
|
||||||
|
|
||||||
|
/* The mask *below* the first high bit set */
|
||||||
|
mask = (mask - 1) & ~mask;
|
||||||
|
mask >>= 7;
|
||||||
|
hash += a & mask;
|
||||||
|
*hashp = fold_hash(hash);
|
||||||
|
|
||||||
|
return len + count_masked_bytes(mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
unsigned int full_name_hash(const unsigned char *name, unsigned int len)
|
unsigned int full_name_hash(const unsigned char *name, unsigned int len)
|
||||||
{
|
{
|
||||||
unsigned long hash = init_name_hash();
|
unsigned long hash = init_name_hash();
|
||||||
|
@ -1402,6 +1522,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Name resolution.
|
* Name resolution.
|
||||||
* This is the basic name resolution function, turning a pathname into
|
* This is the basic name resolution function, turning a pathname into
|
||||||
|
|
Loading…
Reference in a new issue