[AF_RXRPC]: Sort out MTU handling.
Sort out the MTU determination and handling in AF_RXRPC: (1) If it's present, parse the additional information supplied by the peer at the end of the ACK packet (struct ackinfo) to determine the MTU sizes that peer is willing to support. (2) Initialise the MTU size to that peer from the kernel's routing records. (3) Send ACKs rather than ACKALLs as the former carry the additional info, and the latter do not. (4) Declare the interface MTU size in outgoing ACKs as a maximum amount of data that can be stuffed into an RxRPC packet without it having to be fragmented to come in this computer's NIC. (5) If sendmsg() is given MSG_MORE then it should allocate an skb of the maximum size rather than one just big enough for the data it's got left to process on the theory that there is more data to come that it can append to that packet. This means, for example, that if AFS does a large StoreData op, all the packets barring the last will be filled to the maximum unfragmented size. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
da99f05654
commit
224711df5c
4 changed files with 115 additions and 14 deletions
|
@ -542,6 +542,38 @@ static void rxrpc_zap_tx_window(struct rxrpc_call *call)
|
||||||
kfree(acks_window);
|
kfree(acks_window);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* process the extra information that may be appended to an ACK packet
|
||||||
|
*/
|
||||||
|
static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
|
||||||
|
unsigned latest, int nAcks)
|
||||||
|
{
|
||||||
|
struct rxrpc_ackinfo ackinfo;
|
||||||
|
struct rxrpc_peer *peer;
|
||||||
|
unsigned mtu;
|
||||||
|
|
||||||
|
if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) {
|
||||||
|
_leave(" [no ackinfo]");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
_proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
|
||||||
|
latest,
|
||||||
|
ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU),
|
||||||
|
ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max));
|
||||||
|
|
||||||
|
mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU));
|
||||||
|
|
||||||
|
peer = call->conn->trans->peer;
|
||||||
|
if (mtu < peer->maxdata) {
|
||||||
|
spin_lock_bh(&peer->lock);
|
||||||
|
peer->maxdata = mtu;
|
||||||
|
peer->mtu = mtu + peer->hdrsize;
|
||||||
|
spin_unlock_bh(&peer->lock);
|
||||||
|
_net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* process packets in the reception queue
|
* process packets in the reception queue
|
||||||
*/
|
*/
|
||||||
|
@ -606,6 +638,8 @@ static int rxrpc_process_rx_queue(struct rxrpc_call *call,
|
||||||
rxrpc_acks[ack.reason],
|
rxrpc_acks[ack.reason],
|
||||||
ack.nAcks);
|
ack.nAcks);
|
||||||
|
|
||||||
|
rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks);
|
||||||
|
|
||||||
if (ack.reason == RXRPC_ACK_PING) {
|
if (ack.reason == RXRPC_ACK_PING) {
|
||||||
_proto("Rx ACK %%%u PING Request", latest);
|
_proto("Rx ACK %%%u PING Request", latest);
|
||||||
rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
|
rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
|
||||||
|
@ -801,9 +835,9 @@ void rxrpc_process_call(struct work_struct *work)
|
||||||
struct msghdr msg;
|
struct msghdr msg;
|
||||||
struct kvec iov[5];
|
struct kvec iov[5];
|
||||||
unsigned long bits;
|
unsigned long bits;
|
||||||
__be32 data;
|
__be32 data, pad;
|
||||||
size_t len;
|
size_t len;
|
||||||
int genbit, loop, nbit, ioc, ret;
|
int genbit, loop, nbit, ioc, ret, mtu;
|
||||||
u32 abort_code = RX_PROTOCOL_ERROR;
|
u32 abort_code = RX_PROTOCOL_ERROR;
|
||||||
u8 *acks = NULL;
|
u8 *acks = NULL;
|
||||||
|
|
||||||
|
@ -899,9 +933,30 @@ void rxrpc_process_call(struct work_struct *work)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) {
|
if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) {
|
||||||
hdr.type = RXRPC_PACKET_TYPE_ACKALL;
|
|
||||||
genbit = RXRPC_CALL_ACK_FINAL;
|
genbit = RXRPC_CALL_ACK_FINAL;
|
||||||
goto send_message;
|
|
||||||
|
ack.bufferSpace = htons(8);
|
||||||
|
ack.maxSkew = 0;
|
||||||
|
ack.serial = 0;
|
||||||
|
ack.reason = RXRPC_ACK_IDLE;
|
||||||
|
ack.nAcks = 0;
|
||||||
|
call->ackr_reason = 0;
|
||||||
|
|
||||||
|
spin_lock_bh(&call->lock);
|
||||||
|
ack.serial = call->ackr_serial;
|
||||||
|
ack.previousPacket = call->ackr_prev_seq;
|
||||||
|
ack.firstPacket = htonl(call->rx_data_eaten + 1);
|
||||||
|
spin_unlock_bh(&call->lock);
|
||||||
|
|
||||||
|
pad = 0;
|
||||||
|
|
||||||
|
iov[1].iov_base = &ack;
|
||||||
|
iov[1].iov_len = sizeof(ack);
|
||||||
|
iov[2].iov_base = &pad;
|
||||||
|
iov[2].iov_len = 3;
|
||||||
|
iov[3].iov_base = &ackinfo;
|
||||||
|
iov[3].iov_len = sizeof(ackinfo);
|
||||||
|
goto send_ACK;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) |
|
if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) |
|
||||||
|
@ -971,8 +1026,6 @@ void rxrpc_process_call(struct work_struct *work)
|
||||||
|
|
||||||
/* consider sending an ordinary ACK */
|
/* consider sending an ordinary ACK */
|
||||||
if (test_bit(RXRPC_CALL_ACK, &call->events)) {
|
if (test_bit(RXRPC_CALL_ACK, &call->events)) {
|
||||||
__be32 pad;
|
|
||||||
|
|
||||||
_debug("send ACK: window: %d - %d { %lx }",
|
_debug("send ACK: window: %d - %d { %lx }",
|
||||||
call->rx_data_eaten, call->ackr_win_top,
|
call->rx_data_eaten, call->ackr_win_top,
|
||||||
call->ackr_window[0]);
|
call->ackr_window[0]);
|
||||||
|
@ -997,12 +1050,6 @@ void rxrpc_process_call(struct work_struct *work)
|
||||||
ack.serial = 0;
|
ack.serial = 0;
|
||||||
ack.reason = 0;
|
ack.reason = 0;
|
||||||
|
|
||||||
ackinfo.rxMTU = htonl(5692);
|
|
||||||
// ackinfo.rxMTU = htonl(call->conn->trans->peer->maxdata);
|
|
||||||
ackinfo.maxMTU = htonl(call->conn->trans->peer->maxdata);
|
|
||||||
ackinfo.rwind = htonl(32);
|
|
||||||
ackinfo.jumbo_max = htonl(4);
|
|
||||||
|
|
||||||
spin_lock_bh(&call->lock);
|
spin_lock_bh(&call->lock);
|
||||||
ack.reason = call->ackr_reason;
|
ack.reason = call->ackr_reason;
|
||||||
ack.serial = call->ackr_serial;
|
ack.serial = call->ackr_serial;
|
||||||
|
@ -1116,6 +1163,15 @@ void rxrpc_process_call(struct work_struct *work)
|
||||||
ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
|
ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
|
||||||
ntohl(ack.serial));
|
ntohl(ack.serial));
|
||||||
send_ACK:
|
send_ACK:
|
||||||
|
mtu = call->conn->trans->peer->if_mtu;
|
||||||
|
mtu -= call->conn->trans->peer->hdrsize;
|
||||||
|
ackinfo.maxMTU = htonl(mtu);
|
||||||
|
ackinfo.rwind = htonl(32);
|
||||||
|
|
||||||
|
/* permit the peer to send us jumbo packets if it wants to */
|
||||||
|
ackinfo.rxMTU = htonl(5692);
|
||||||
|
ackinfo.jumbo_max = htonl(4);
|
||||||
|
|
||||||
hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
|
hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
|
||||||
_proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
|
_proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
|
||||||
ntohl(hdr.serial),
|
ntohl(hdr.serial),
|
||||||
|
|
|
@ -100,8 +100,10 @@ void rxrpc_UDP_error_report(struct sock *sk)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mtu < peer->mtu) {
|
if (mtu < peer->mtu) {
|
||||||
|
spin_lock_bh(&peer->lock);
|
||||||
peer->mtu = mtu;
|
peer->mtu = mtu;
|
||||||
peer->maxdata = peer->mtu - peer->hdrsize;
|
peer->maxdata = peer->mtu - peer->hdrsize;
|
||||||
|
spin_unlock_bh(&peer->lock);
|
||||||
_net("Net MTU %u (maxdata %u)",
|
_net("Net MTU %u (maxdata %u)",
|
||||||
peer->mtu, peer->maxdata);
|
peer->mtu, peer->maxdata);
|
||||||
}
|
}
|
||||||
|
|
|
@ -582,7 +582,7 @@ static int rxrpc_send_data(struct kiocb *iocb,
|
||||||
max &= ~(call->conn->size_align - 1UL);
|
max &= ~(call->conn->size_align - 1UL);
|
||||||
|
|
||||||
chunk = max;
|
chunk = max;
|
||||||
if (chunk > len)
|
if (chunk > len && !more)
|
||||||
chunk = len;
|
chunk = len;
|
||||||
|
|
||||||
space = chunk + call->conn->size_align;
|
space = chunk + call->conn->size_align;
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include <net/sock.h>
|
#include <net/sock.h>
|
||||||
#include <net/af_rxrpc.h>
|
#include <net/af_rxrpc.h>
|
||||||
#include <net/ip.h>
|
#include <net/ip.h>
|
||||||
|
#include <net/route.h>
|
||||||
#include "ar-internal.h"
|
#include "ar-internal.h"
|
||||||
|
|
||||||
static LIST_HEAD(rxrpc_peers);
|
static LIST_HEAD(rxrpc_peers);
|
||||||
|
@ -27,6 +28,47 @@ static DECLARE_WAIT_QUEUE_HEAD(rxrpc_peer_wq);
|
||||||
|
|
||||||
static void rxrpc_destroy_peer(struct work_struct *work);
|
static void rxrpc_destroy_peer(struct work_struct *work);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* assess the MTU size for the network interface through which this peer is
|
||||||
|
* reached
|
||||||
|
*/
|
||||||
|
static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
|
||||||
|
{
|
||||||
|
struct rtable *rt;
|
||||||
|
struct flowi fl;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
peer->if_mtu = 1500;
|
||||||
|
|
||||||
|
memset(&fl, 0, sizeof(fl));
|
||||||
|
|
||||||
|
switch (peer->srx.transport.family) {
|
||||||
|
case AF_INET:
|
||||||
|
fl.oif = 0;
|
||||||
|
fl.proto = IPPROTO_UDP,
|
||||||
|
fl.nl_u.ip4_u.saddr = 0;
|
||||||
|
fl.nl_u.ip4_u.daddr = peer->srx.transport.sin.sin_addr.s_addr;
|
||||||
|
fl.nl_u.ip4_u.tos = 0;
|
||||||
|
/* assume AFS.CM talking to AFS.FS */
|
||||||
|
fl.uli_u.ports.sport = htons(7001);
|
||||||
|
fl.uli_u.ports.dport = htons(7000);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = ip_route_output_key(&rt, &fl);
|
||||||
|
if (ret < 0) {
|
||||||
|
kleave(" [route err %d]", ret);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
peer->if_mtu = dst_mtu(&rt->u.dst);
|
||||||
|
dst_release(&rt->u.dst);
|
||||||
|
|
||||||
|
kleave(" [if_mtu %u]", peer->if_mtu);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* allocate a new peer
|
* allocate a new peer
|
||||||
*/
|
*/
|
||||||
|
@ -47,7 +89,8 @@ static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx,
|
||||||
peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
|
peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
|
||||||
memcpy(&peer->srx, srx, sizeof(*srx));
|
memcpy(&peer->srx, srx, sizeof(*srx));
|
||||||
|
|
||||||
peer->mtu = peer->if_mtu = 65535;
|
rxrpc_assess_MTU_size(peer);
|
||||||
|
peer->mtu = peer->if_mtu;
|
||||||
|
|
||||||
if (srx->transport.family == AF_INET) {
|
if (srx->transport.family == AF_INET) {
|
||||||
peer->hdrsize = sizeof(struct iphdr);
|
peer->hdrsize = sizeof(struct iphdr);
|
||||||
|
|
Loading…
Reference in a new issue