30dc5e63d6
This patch adds iWARP Port Mapper (IWPM) Version 2 support. The iWARP Port Mapper implementation is based on the port mapper specification section in the Sockets Direct Protocol paper - http://www.rdmaconsortium.org/home/draft-pinkerton-iwarp-sdp-v1.0.pdf Existing iWARP RDMA providers use the same IP address as the native TCP/IP stack when creating RDMA connections. They need a mechanism to claim the TCP ports used for RDMA connections to prevent TCP port collisions when other host applications use TCP ports. The iWARP Port Mapper provides a standard mechanism to accomplish this. Without this service it is possible for RDMA application to bind/listen on the same port which is already being used by native TCP host application. If that happens the incoming TCP connection data can be passed to the RDMA stack with error. The iWARP Port Mapper solution doesn't contain any changes to the existing network stack in the kernel space. All the changes are contained with the infiniband tree and also in user space. The iWARP Port Mapper service is implemented as a user space daemon process. Source for the IWPM service is located at http://git.openfabrics.org/git?p=~tnikolova/libiwpm-1.0.0/.git;a=summary The iWARP driver (port mapper client) sends to the IWPM service the local IP address and TCP port it has received from the RDMA application, when starting a connection. The IWPM service performs a socket bind from user space to get an available TCP port, called a mapped port, and communicates it back to the client. In that sense, the IWPM service is used to map the TCP port, which the RDMA application uses to any port available from the host TCP port space. The mapped ports are used in iWARP RDMA connections to avoid collisions with native TCP stack which is aware that these ports are taken. When an RDMA connection using a mapped port is terminated, the client notifies the IWPM service, which then releases the TCP port. The message exchange between the IWPM service and the iWARP drivers (between user space and kernel space) is implemented using netlink sockets. 1) Netlink interface functions are added: ibnl_unicast() and ibnl_mulitcast() for sending netlink messages to user space 2) The signature of the existing ibnl_put_msg() is changed to be more generic 3) Two netlink clients are added: RDMA_NL_NES, RDMA_NL_C4IW corresponding to the two iWarp drivers - nes and cxgb4 which use the IWPM service 4) Enums are added to enumerate the attributes in the netlink messages, which are exchanged between the user space IWPM service and the iWARP drivers Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com> Signed-off-by: Steve Wise <swise@opengridcomputing.com> Reviewed-by: PJ Waskiewicz <pj.waskiewicz@solidfire.com> [ Fold in range checking fixes and nlh_next removal as suggested by Dan Carpenter and Steve Wise. Fix sparse endianness in hash. - Roland ] Signed-off-by: Roland Dreier <roland@purestorage.com>
216 lines
5.3 KiB
C
216 lines
5.3 KiB
C
/*
|
|
* Copyright (c) 2010 Voltaire Inc. All rights reserved.
|
|
*
|
|
* This software is available to you under a choice of one of two
|
|
* licenses. You may choose to be licensed under the terms of the GNU
|
|
* General Public License (GPL) Version 2, available from the file
|
|
* COPYING in the main directory of this source tree, or the
|
|
* OpenIB.org BSD license below:
|
|
*
|
|
* Redistribution and use in source and binary forms, with or
|
|
* without modification, are permitted provided that the following
|
|
* conditions are met:
|
|
*
|
|
* - Redistributions of source code must retain the above
|
|
* copyright notice, this list of conditions and the following
|
|
* disclaimer.
|
|
*
|
|
* - Redistributions in binary form must reproduce the above
|
|
* copyright notice, this list of conditions and the following
|
|
* disclaimer in the documentation and/or other materials
|
|
* provided with the distribution.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
|
|
|
|
#include <linux/export.h>
|
|
#include <net/netlink.h>
|
|
#include <net/net_namespace.h>
|
|
#include <net/sock.h>
|
|
#include <rdma/rdma_netlink.h>
|
|
|
|
struct ibnl_client {
|
|
struct list_head list;
|
|
int index;
|
|
int nops;
|
|
const struct ibnl_client_cbs *cb_table;
|
|
};
|
|
|
|
static DEFINE_MUTEX(ibnl_mutex);
|
|
static struct sock *nls;
|
|
static LIST_HEAD(client_list);
|
|
|
|
int ibnl_add_client(int index, int nops,
|
|
const struct ibnl_client_cbs cb_table[])
|
|
{
|
|
struct ibnl_client *cur;
|
|
struct ibnl_client *nl_client;
|
|
|
|
nl_client = kmalloc(sizeof *nl_client, GFP_KERNEL);
|
|
if (!nl_client)
|
|
return -ENOMEM;
|
|
|
|
nl_client->index = index;
|
|
nl_client->nops = nops;
|
|
nl_client->cb_table = cb_table;
|
|
|
|
mutex_lock(&ibnl_mutex);
|
|
|
|
list_for_each_entry(cur, &client_list, list) {
|
|
if (cur->index == index) {
|
|
pr_warn("Client for %d already exists\n", index);
|
|
mutex_unlock(&ibnl_mutex);
|
|
kfree(nl_client);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
list_add_tail(&nl_client->list, &client_list);
|
|
|
|
mutex_unlock(&ibnl_mutex);
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(ibnl_add_client);
|
|
|
|
int ibnl_remove_client(int index)
|
|
{
|
|
struct ibnl_client *cur, *next;
|
|
|
|
mutex_lock(&ibnl_mutex);
|
|
list_for_each_entry_safe(cur, next, &client_list, list) {
|
|
if (cur->index == index) {
|
|
list_del(&(cur->list));
|
|
mutex_unlock(&ibnl_mutex);
|
|
kfree(cur);
|
|
return 0;
|
|
}
|
|
}
|
|
pr_warn("Can't remove callback for client idx %d. Not found\n", index);
|
|
mutex_unlock(&ibnl_mutex);
|
|
|
|
return -EINVAL;
|
|
}
|
|
EXPORT_SYMBOL(ibnl_remove_client);
|
|
|
|
void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
|
|
int len, int client, int op, int flags)
|
|
{
|
|
unsigned char *prev_tail;
|
|
|
|
prev_tail = skb_tail_pointer(skb);
|
|
*nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
|
|
len, flags);
|
|
if (!*nlh)
|
|
goto out_nlmsg_trim;
|
|
(*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
|
|
return nlmsg_data(*nlh);
|
|
|
|
out_nlmsg_trim:
|
|
nlmsg_trim(skb, prev_tail);
|
|
return NULL;
|
|
}
|
|
EXPORT_SYMBOL(ibnl_put_msg);
|
|
|
|
int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
int len, void *data, int type)
|
|
{
|
|
unsigned char *prev_tail;
|
|
|
|
prev_tail = skb_tail_pointer(skb);
|
|
if (nla_put(skb, type, len, data))
|
|
goto nla_put_failure;
|
|
nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail;
|
|
return 0;
|
|
|
|
nla_put_failure:
|
|
nlmsg_trim(skb, prev_tail - nlh->nlmsg_len);
|
|
return -EMSGSIZE;
|
|
}
|
|
EXPORT_SYMBOL(ibnl_put_attr);
|
|
|
|
static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
|
{
|
|
struct ibnl_client *client;
|
|
int type = nlh->nlmsg_type;
|
|
int index = RDMA_NL_GET_CLIENT(type);
|
|
int op = RDMA_NL_GET_OP(type);
|
|
|
|
list_for_each_entry(client, &client_list, list) {
|
|
if (client->index == index) {
|
|
if (op < 0 || op >= client->nops ||
|
|
!client->cb_table[op].dump)
|
|
return -EINVAL;
|
|
|
|
{
|
|
struct netlink_dump_control c = {
|
|
.dump = client->cb_table[op].dump,
|
|
.module = client->cb_table[op].module,
|
|
};
|
|
return netlink_dump_start(nls, skb, nlh, &c);
|
|
}
|
|
}
|
|
}
|
|
|
|
pr_info("Index %d wasn't found in client list\n", index);
|
|
return -EINVAL;
|
|
}
|
|
|
|
static void ibnl_rcv(struct sk_buff *skb)
|
|
{
|
|
mutex_lock(&ibnl_mutex);
|
|
netlink_rcv_skb(skb, &ibnl_rcv_msg);
|
|
mutex_unlock(&ibnl_mutex);
|
|
}
|
|
|
|
int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
__u32 pid)
|
|
{
|
|
return nlmsg_unicast(nls, skb, pid);
|
|
}
|
|
EXPORT_SYMBOL(ibnl_unicast);
|
|
|
|
int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
unsigned int group, gfp_t flags)
|
|
{
|
|
return nlmsg_multicast(nls, skb, 0, group, flags);
|
|
}
|
|
EXPORT_SYMBOL(ibnl_multicast);
|
|
|
|
int __init ibnl_init(void)
|
|
{
|
|
struct netlink_kernel_cfg cfg = {
|
|
.input = ibnl_rcv,
|
|
};
|
|
|
|
nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg);
|
|
if (!nls) {
|
|
pr_warn("Failed to create netlink socket\n");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void ibnl_cleanup(void)
|
|
{
|
|
struct ibnl_client *cur, *next;
|
|
|
|
mutex_lock(&ibnl_mutex);
|
|
list_for_each_entry_safe(cur, next, &client_list, list) {
|
|
list_del(&(cur->list));
|
|
kfree(cur);
|
|
}
|
|
mutex_unlock(&ibnl_mutex);
|
|
|
|
netlink_kernel_release(nls);
|
|
}
|