[TCP] tcp_cubic: faster cube root
The Newton-Raphson method is quadratically convergent so only a small fixed number of steps are necessary. Therefore it is faster to unroll the loop. Since div64_64 is no longer inline it won't cause code explosion. Also fixes a bug that can occur if x^2 was bigger than 32 bits. Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
8570419fb7
commit
c5f5877c04
1 changed files with 5 additions and 11 deletions
|
@ -96,23 +96,17 @@ static void bictcp_init(struct sock *sk)
|
||||||
*/
|
*/
|
||||||
static u32 cubic_root(u64 a)
|
static u32 cubic_root(u64 a)
|
||||||
{
|
{
|
||||||
u32 x, x1;
|
u32 x;
|
||||||
|
|
||||||
/* Initial estimate is based on:
|
/* Initial estimate is based on:
|
||||||
* cbrt(x) = exp(log(x) / 3)
|
* cbrt(x) = exp(log(x) / 3)
|
||||||
*/
|
*/
|
||||||
x = 1u << (fls64(a)/3);
|
x = 1u << (fls64(a)/3);
|
||||||
|
|
||||||
/*
|
/* converges to 32 bits in 3 iterations */
|
||||||
* Iteration based on:
|
x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
|
||||||
* 2
|
x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
|
||||||
* x = ( 2 * x + a / x ) / 3
|
x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
|
||||||
* k+1 k k
|
|
||||||
*/
|
|
||||||
do {
|
|
||||||
x1 = x;
|
|
||||||
x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
|
|
||||||
} while (abs(x1 - x) > 1);
|
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue