tipc: change socket buffer overflow control to respect sk_rcvbuf
As per feedback from the netdev community, we change the buffer overflow protection algorithm in receiving sockets so that it always respects the nominal upper limit set in sk_rcvbuf. Instead of scaling up from a small sk_rcvbuf value, which leads to violation of the configured sk_rcvbuf limit, we now calculate the weighted per-message limit by scaling down from a much bigger value, still in the same field, according to the importance priority of the received message. To allow for administrative tunability of the socket receive buffer size, we create a tipc_rmem sysctl variable to allow the user to configure an even bigger value via sysctl command. It is a size of three (min/default/max) to be consistent with things like tcp_rmem. By default, the value initialized in tipc_rmem[1] is equal to the receive socket size needed by a TIPC_CRITICAL_IMPORTANCE message. This value is also set as the default value of sk_rcvbuf. Originally-by: Jon Maloy <jon.maloy@ericsson.com> Cc: Neil Horman <nhorman@tuxdriver.com> Cc: Jon Maloy <jon.maloy@ericsson.com> [Ying: added sysctl variation to Jon's original patch] Signed-off-by: Ying Xue <ying.xue@windriver.com> [PG: don't compile sysctl.c if not config'd; add Documentation] Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
8941bbcd57
commit
cc79dd1ba9
7 changed files with 112 additions and 12 deletions
|
@ -26,7 +26,7 @@ Table : Subdirectories in /proc/sys/net
|
|||
ipv4 IP version 4 x25 X.25 protocol
|
||||
ipx IPX token-ring IBM token ring
|
||||
bridge Bridging decnet DEC net
|
||||
ipv6 IP version 6
|
||||
ipv6 IP version 6 tipc TIPC
|
||||
..............................................................................
|
||||
|
||||
1. /proc/sys/net/core - Network core options
|
||||
|
@ -207,3 +207,18 @@ IPX.
|
|||
The /proc/net/ipx_route table holds a list of IPX routes. For each route it
|
||||
gives the destination network, the router node (or Directly) and the network
|
||||
address of the router (or Connected) for internal networks.
|
||||
|
||||
6. TIPC
|
||||
-------------------------------------------------------
|
||||
|
||||
The TIPC protocol now has a tunable for the receive memory, similar to the
|
||||
tcp_rmem - i.e. a vector of 3 INTEGERs: (min, default, max)
|
||||
|
||||
# cat /proc/sys/net/tipc/tipc_rmem
|
||||
4252725 34021800 68043600
|
||||
#
|
||||
|
||||
The max value is set to CONN_OVERLOAD_LIMIT, and the default and min values
|
||||
are scaled (shifted) versions of that same value. Note that the min value
|
||||
is not at this point in time used in any meaningful way, but the triplet is
|
||||
preserved in order to be consistent with things like tcp_rmem.
|
||||
|
|
|
@ -11,3 +11,4 @@ tipc-y += addr.o bcast.o bearer.o config.o \
|
|||
socket.o log.o eth_media.o
|
||||
|
||||
tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
|
||||
tipc-$(CONFIG_SYSCTL) += sysctl.o
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include "name_table.h"
|
||||
#include "subscr.h"
|
||||
#include "config.h"
|
||||
#include "port.h"
|
||||
|
||||
#include <linux/module.h>
|
||||
|
||||
|
@ -50,7 +51,7 @@ u32 tipc_own_addr __read_mostly;
|
|||
int tipc_max_ports __read_mostly;
|
||||
int tipc_net_id __read_mostly;
|
||||
int tipc_remote_management __read_mostly;
|
||||
|
||||
int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */
|
||||
|
||||
/**
|
||||
* tipc_buf_acquire - creates a TIPC message buffer
|
||||
|
@ -118,6 +119,7 @@ static void tipc_core_stop(void)
|
|||
tipc_nametbl_stop();
|
||||
tipc_ref_table_stop();
|
||||
tipc_socket_stop();
|
||||
tipc_unregister_sysctl();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -142,13 +144,14 @@ static int tipc_core_start(void)
|
|||
res = tipc_netlink_start();
|
||||
if (!res)
|
||||
res = tipc_socket_init();
|
||||
if (!res)
|
||||
res = tipc_register_sysctl();
|
||||
if (res)
|
||||
tipc_core_stop();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
static int __init tipc_init(void)
|
||||
{
|
||||
int res;
|
||||
|
@ -160,6 +163,11 @@ static int __init tipc_init(void)
|
|||
tipc_max_ports = CONFIG_TIPC_PORTS;
|
||||
tipc_net_id = 4711;
|
||||
|
||||
sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE;
|
||||
sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 <<
|
||||
TIPC_CRITICAL_IMPORTANCE;
|
||||
sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT;
|
||||
|
||||
res = tipc_core_start();
|
||||
if (res)
|
||||
pr_err("Unable to start in single node mode\n");
|
||||
|
|
|
@ -80,6 +80,7 @@ extern u32 tipc_own_addr __read_mostly;
|
|||
extern int tipc_max_ports __read_mostly;
|
||||
extern int tipc_net_id __read_mostly;
|
||||
extern int tipc_remote_management __read_mostly;
|
||||
extern int sysctl_tipc_rmem[3] __read_mostly;
|
||||
|
||||
/*
|
||||
* Other global variables
|
||||
|
@ -97,6 +98,14 @@ extern void tipc_netlink_stop(void);
|
|||
extern int tipc_socket_init(void);
|
||||
extern void tipc_socket_stop(void);
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
extern int tipc_register_sysctl(void);
|
||||
extern void tipc_unregister_sysctl(void);
|
||||
#else
|
||||
#define tipc_register_sysctl() 0
|
||||
#define tipc_unregister_sysctl()
|
||||
#endif
|
||||
|
||||
/*
|
||||
* TIPC timer and signal code
|
||||
*/
|
||||
|
|
|
@ -43,6 +43,8 @@
|
|||
#include "node_subscr.h"
|
||||
|
||||
#define TIPC_FLOW_CONTROL_WIN 512
|
||||
#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
|
||||
SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
|
||||
|
||||
typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref,
|
||||
struct sk_buff **buf, unsigned char const *data,
|
||||
|
|
|
@ -43,8 +43,6 @@
|
|||
#define SS_LISTENING -1 /* socket is listening */
|
||||
#define SS_READY -2 /* socket is connectionless */
|
||||
|
||||
#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
|
||||
SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
|
||||
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
|
||||
|
||||
struct tipc_sock {
|
||||
|
@ -203,6 +201,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
|
|||
|
||||
sock_init_data(sock, sk);
|
||||
sk->sk_backlog_rcv = backlog_rcv;
|
||||
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
|
||||
sk->sk_data_ready = tipc_data_ready;
|
||||
sk->sk_write_space = tipc_write_space;
|
||||
tipc_sk(sk)->p = tp_ptr;
|
||||
|
@ -1233,10 +1232,10 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
|
|||
* For all connectionless messages, by default new queue limits are
|
||||
* as belows:
|
||||
*
|
||||
* TIPC_LOW_IMPORTANCE (5MB)
|
||||
* TIPC_MEDIUM_IMPORTANCE (10MB)
|
||||
* TIPC_HIGH_IMPORTANCE (20MB)
|
||||
* TIPC_CRITICAL_IMPORTANCE (40MB)
|
||||
* TIPC_LOW_IMPORTANCE (4 MB)
|
||||
* TIPC_MEDIUM_IMPORTANCE (8 MB)
|
||||
* TIPC_HIGH_IMPORTANCE (16 MB)
|
||||
* TIPC_CRITICAL_IMPORTANCE (32 MB)
|
||||
*
|
||||
* Returns overload limit according to corresponding message importance
|
||||
*/
|
||||
|
@ -1246,9 +1245,10 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
|
|||
unsigned int limit;
|
||||
|
||||
if (msg_connected(msg))
|
||||
limit = CONN_OVERLOAD_LIMIT;
|
||||
limit = sysctl_tipc_rmem[2];
|
||||
else
|
||||
limit = sk->sk_rcvbuf << (msg_importance(msg) + 5);
|
||||
limit = sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE <<
|
||||
msg_importance(msg);
|
||||
return limit;
|
||||
}
|
||||
|
||||
|
@ -1847,7 +1847,8 @@ static const struct net_proto_family tipc_family_ops = {
|
|||
static struct proto tipc_proto = {
|
||||
.name = "TIPC",
|
||||
.owner = THIS_MODULE,
|
||||
.obj_size = sizeof(struct tipc_sock)
|
||||
.obj_size = sizeof(struct tipc_sock),
|
||||
.sysctl_rmem = sysctl_tipc_rmem
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
64
net/tipc/sysctl.c
Normal file
64
net/tipc/sysctl.c
Normal file
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* net/tipc/sysctl.c: sysctl interface to TIPC subsystem
|
||||
*
|
||||
* Copyright (c) 2013, Wind River Systems
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the names of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* Alternatively, this software may be distributed under the terms of the
|
||||
* GNU General Public License ("GPL") version 2 as published by the Free
|
||||
* Software Foundation.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "core.h"
|
||||
|
||||
#include <linux/sysctl.h>
|
||||
|
||||
static struct ctl_table_header *tipc_ctl_hdr;
|
||||
|
||||
static struct ctl_table tipc_table[] = {
|
||||
{
|
||||
.procname = "tipc_rmem",
|
||||
.data = &sysctl_tipc_rmem,
|
||||
.maxlen = sizeof(sysctl_tipc_rmem),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
int tipc_register_sysctl(void)
|
||||
{
|
||||
tipc_ctl_hdr = register_net_sysctl(&init_net, "net/tipc", tipc_table);
|
||||
if (tipc_ctl_hdr == NULL)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void tipc_unregister_sysctl(void)
|
||||
{
|
||||
unregister_net_sysctl_table(tipc_ctl_hdr);
|
||||
}
|
Loading…
Reference in a new issue