IB/core: Add "type 2" memory windows support
This patch enhances the IB core support for Memory Windows (MWs). MWs allow an application to have better/flexible control over remote access to memory. Two types of MWs are supported, with the second type having two flavors: Type 1 - associated with PD only Type 2A - associated with QPN only Type 2B - associated with PD and QPN Applications can allocate a MW once, and then repeatedly bind the MW to different ranges in MRs that are associated to the same PD. Type 1 windows are bound through a verb, while type 2 windows are bound by posting a work request. The 32-bit memory key is composed of a 24-bit index and an 8-bit key. The key is changed with each bind, thus allowing more control over the peer's use of the memory key. The changes introduced are the following: * add memory window type enum and a corresponding parameter to ib_alloc_mw. * type 2 memory window bind work request support. * create a struct that contains the common part of the bind verb struct ibv_mw_bind and the bind work request into a single struct. * add the ib_inc_rkey helper function to advance the tag part of an rkey. Consumer interface details: * new device capability flags IB_DEVICE_MEM_WINDOW_TYPE_2A and IB_DEVICE_MEM_WINDOW_TYPE_2B are added to indicate device support for these features. Devices can set either IB_DEVICE_MEM_WINDOW_TYPE_2A or IB_DEVICE_MEM_WINDOW_TYPE_2B if it supports type 2A or type 2B memory windows. It can set neither to indicate it doesn't support type 2 windows at all. * modify existing provides and consumers code to the new param of ib_alloc_mw and the ib_mw_bind_info structure Signed-off-by: Haggai Eran <haggaie@mellanox.com> Signed-off-by: Shani Michaeli <shanim@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
parent
836dc9e3fb
commit
7083e42ee2
10 changed files with 110 additions and 41 deletions
|
@ -1099,18 +1099,19 @@ EXPORT_SYMBOL(ib_free_fast_reg_page_list);
|
|||
|
||||
/* Memory windows */
|
||||
|
||||
struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
|
||||
struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
|
||||
{
|
||||
struct ib_mw *mw;
|
||||
|
||||
if (!pd->device->alloc_mw)
|
||||
return ERR_PTR(-ENOSYS);
|
||||
|
||||
mw = pd->device->alloc_mw(pd);
|
||||
mw = pd->device->alloc_mw(pd, type);
|
||||
if (!IS_ERR(mw)) {
|
||||
mw->device = pd->device;
|
||||
mw->pd = pd;
|
||||
mw->uobject = NULL;
|
||||
mw->type = type;
|
||||
atomic_inc(&pd->usecnt);
|
||||
}
|
||||
|
||||
|
|
|
@ -738,7 +738,7 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
|
|||
return ibmr;
|
||||
}
|
||||
|
||||
static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
|
||||
static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
|
||||
{
|
||||
struct iwch_dev *rhp;
|
||||
struct iwch_pd *php;
|
||||
|
@ -747,6 +747,9 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
|
|||
u32 stag = 0;
|
||||
int ret;
|
||||
|
||||
if (type != IB_MW_TYPE_1)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
php = to_iwch_pd(pd);
|
||||
rhp = php->rhp;
|
||||
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
|
||||
|
|
|
@ -567,18 +567,19 @@ int iwch_bind_mw(struct ib_qp *qp,
|
|||
if (mw_bind->send_flags & IB_SEND_SIGNALED)
|
||||
t3_wr_flags = T3_COMPLETION_FLAG;
|
||||
|
||||
sgl.addr = mw_bind->addr;
|
||||
sgl.lkey = mw_bind->mr->lkey;
|
||||
sgl.length = mw_bind->length;
|
||||
sgl.addr = mw_bind->bind_info.addr;
|
||||
sgl.lkey = mw_bind->bind_info.mr->lkey;
|
||||
sgl.length = mw_bind->bind_info.length;
|
||||
wqe->bind.reserved = 0;
|
||||
wqe->bind.type = TPT_VATO;
|
||||
|
||||
/* TBD: check perms */
|
||||
wqe->bind.perms = iwch_ib_to_tpt_bind_access(mw_bind->mw_access_flags);
|
||||
wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
|
||||
wqe->bind.perms = iwch_ib_to_tpt_bind_access(
|
||||
mw_bind->bind_info.mw_access_flags);
|
||||
wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey);
|
||||
wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
|
||||
wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
|
||||
wqe->bind.mw_va = cpu_to_be64(mw_bind->addr);
|
||||
wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length);
|
||||
wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr);
|
||||
err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
|
||||
if (err) {
|
||||
spin_unlock_irqrestore(&qhp->lock, flag);
|
||||
|
|
|
@ -866,7 +866,7 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(
|
|||
int page_list_len);
|
||||
struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth);
|
||||
int c4iw_dealloc_mw(struct ib_mw *mw);
|
||||
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd);
|
||||
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
|
||||
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
|
||||
u64 length, u64 virt, int acc,
|
||||
struct ib_udata *udata);
|
||||
|
|
|
@ -650,7 +650,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
|
|||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd)
|
||||
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
|
||||
{
|
||||
struct c4iw_dev *rhp;
|
||||
struct c4iw_pd *php;
|
||||
|
@ -659,6 +659,9 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd)
|
|||
u32 stag = 0;
|
||||
int ret;
|
||||
|
||||
if (type != IB_MW_TYPE_1)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
php = to_c4iw_pd(pd);
|
||||
rhp = php->rhp;
|
||||
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
|
||||
|
|
|
@ -95,7 +95,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
|
|||
|
||||
int ehca_dereg_mr(struct ib_mr *mr);
|
||||
|
||||
struct ib_mw *ehca_alloc_mw(struct ib_pd *pd);
|
||||
struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
|
||||
|
||||
int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
|
||||
struct ib_mw_bind *mw_bind);
|
||||
|
|
|
@ -688,7 +688,7 @@ int ehca_dereg_mr(struct ib_mr *mr)
|
|||
|
||||
/*----------------------------------------------------------------------*/
|
||||
|
||||
struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
|
||||
struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
|
||||
{
|
||||
struct ib_mw *ib_mw;
|
||||
u64 h_ret;
|
||||
|
@ -698,6 +698,9 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
|
|||
container_of(pd->device, struct ehca_shca, ib_device);
|
||||
struct ehca_mw_hipzout_parms hipzout;
|
||||
|
||||
if (type != IB_MW_TYPE_1)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
e_mw = ehca_mw_new();
|
||||
if (!e_mw) {
|
||||
ib_mw = ERR_PTR(-ENOMEM);
|
||||
|
|
|
@ -55,7 +55,8 @@ static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
|
|||
/**
|
||||
* nes_alloc_mw
|
||||
*/
|
||||
static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
|
||||
static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type)
|
||||
{
|
||||
struct nes_pd *nespd = to_nespd(ibpd);
|
||||
struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
|
||||
struct nes_device *nesdev = nesvnic->nesdev;
|
||||
|
@ -71,6 +72,9 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
|
|||
u32 driver_key = 0;
|
||||
u8 stag_key = 0;
|
||||
|
||||
if (type != IB_MW_TYPE_1)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
get_random_bytes(&next_stag_index, sizeof(next_stag_index));
|
||||
stag_key = (u8)next_stag_index;
|
||||
|
||||
|
@ -244,20 +248,19 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
|
|||
if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
|
||||
wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
|
||||
|
||||
if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_WRITE) {
|
||||
if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE)
|
||||
wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
|
||||
}
|
||||
if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_READ) {
|
||||
if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ)
|
||||
wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
|
||||
}
|
||||
|
||||
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
|
||||
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX, ibmw_bind->mr->lkey);
|
||||
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX,
|
||||
ibmw_bind->bind_info.mr->lkey);
|
||||
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
|
||||
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
|
||||
ibmw_bind->length);
|
||||
ibmw_bind->bind_info.length);
|
||||
wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
|
||||
u64temp = (u64)ibmw_bind->addr;
|
||||
u64temp = (u64)ibmw_bind->bind_info.addr;
|
||||
set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);
|
||||
|
||||
head++;
|
||||
|
|
|
@ -115,6 +115,8 @@ enum ib_device_cap_flags {
|
|||
IB_DEVICE_XRC = (1<<20),
|
||||
IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21),
|
||||
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
|
||||
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
|
||||
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24)
|
||||
};
|
||||
|
||||
enum ib_atomic_cap {
|
||||
|
@ -715,6 +717,11 @@ enum ib_mig_state {
|
|||
IB_MIG_ARMED
|
||||
};
|
||||
|
||||
enum ib_mw_type {
|
||||
IB_MW_TYPE_1 = 1,
|
||||
IB_MW_TYPE_2 = 2
|
||||
};
|
||||
|
||||
struct ib_qp_attr {
|
||||
enum ib_qp_state qp_state;
|
||||
enum ib_qp_state cur_qp_state;
|
||||
|
@ -758,6 +765,7 @@ enum ib_wr_opcode {
|
|||
IB_WR_FAST_REG_MR,
|
||||
IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
|
||||
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
|
||||
IB_WR_BIND_MW,
|
||||
};
|
||||
|
||||
enum ib_send_flags {
|
||||
|
@ -780,6 +788,23 @@ struct ib_fast_reg_page_list {
|
|||
unsigned int max_page_list_len;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct ib_mw_bind_info - Parameters for a memory window bind operation.
|
||||
* @mr: A memory region to bind the memory window to.
|
||||
* @addr: The address where the memory window should begin.
|
||||
* @length: The length of the memory window, in bytes.
|
||||
* @mw_access_flags: Access flags from enum ib_access_flags for the window.
|
||||
*
|
||||
* This struct contains the shared parameters for type 1 and type 2
|
||||
* memory window bind operations.
|
||||
*/
|
||||
struct ib_mw_bind_info {
|
||||
struct ib_mr *mr;
|
||||
u64 addr;
|
||||
u64 length;
|
||||
int mw_access_flags;
|
||||
};
|
||||
|
||||
struct ib_send_wr {
|
||||
struct ib_send_wr *next;
|
||||
u64 wr_id;
|
||||
|
@ -823,6 +848,12 @@ struct ib_send_wr {
|
|||
int access_flags;
|
||||
u32 rkey;
|
||||
} fast_reg;
|
||||
struct {
|
||||
struct ib_mw *mw;
|
||||
/* The new rkey for the memory window. */
|
||||
u32 rkey;
|
||||
struct ib_mw_bind_info bind_info;
|
||||
} bind_mw;
|
||||
} wr;
|
||||
u32 xrc_remote_srq_num; /* XRC TGT QPs only */
|
||||
};
|
||||
|
@ -839,7 +870,8 @@ enum ib_access_flags {
|
|||
IB_ACCESS_REMOTE_WRITE = (1<<1),
|
||||
IB_ACCESS_REMOTE_READ = (1<<2),
|
||||
IB_ACCESS_REMOTE_ATOMIC = (1<<3),
|
||||
IB_ACCESS_MW_BIND = (1<<4)
|
||||
IB_ACCESS_MW_BIND = (1<<4),
|
||||
IB_ZERO_BASED = (1<<5)
|
||||
};
|
||||
|
||||
struct ib_phys_buf {
|
||||
|
@ -862,13 +894,16 @@ enum ib_mr_rereg_flags {
|
|||
IB_MR_REREG_ACCESS = (1<<2)
|
||||
};
|
||||
|
||||
/**
|
||||
* struct ib_mw_bind - Parameters for a type 1 memory window bind operation.
|
||||
* @wr_id: Work request id.
|
||||
* @send_flags: Flags from ib_send_flags enum.
|
||||
* @bind_info: More parameters of the bind operation.
|
||||
*/
|
||||
struct ib_mw_bind {
|
||||
struct ib_mr *mr;
|
||||
u64 wr_id;
|
||||
u64 addr;
|
||||
u32 length;
|
||||
int send_flags;
|
||||
int mw_access_flags;
|
||||
u64 wr_id;
|
||||
int send_flags;
|
||||
struct ib_mw_bind_info bind_info;
|
||||
};
|
||||
|
||||
struct ib_fmr_attr {
|
||||
|
@ -991,6 +1026,7 @@ struct ib_mw {
|
|||
struct ib_pd *pd;
|
||||
struct ib_uobject *uobject;
|
||||
u32 rkey;
|
||||
enum ib_mw_type type;
|
||||
};
|
||||
|
||||
struct ib_fmr {
|
||||
|
@ -1202,7 +1238,8 @@ struct ib_device {
|
|||
int num_phys_buf,
|
||||
int mr_access_flags,
|
||||
u64 *iova_start);
|
||||
struct ib_mw * (*alloc_mw)(struct ib_pd *pd);
|
||||
struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
|
||||
enum ib_mw_type type);
|
||||
int (*bind_mw)(struct ib_qp *qp,
|
||||
struct ib_mw *mw,
|
||||
struct ib_mw_bind *mw_bind);
|
||||
|
@ -2019,6 +2056,8 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
|
|||
* ib_dereg_mr - Deregisters a memory region and removes it from the
|
||||
* HCA translation table.
|
||||
* @mr: The memory region to deregister.
|
||||
*
|
||||
* This function can fail, if the memory region has memory windows bound to it.
|
||||
*/
|
||||
int ib_dereg_mr(struct ib_mr *mr);
|
||||
|
||||
|
@ -2070,11 +2109,23 @@ static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
|
|||
mr->rkey = (mr->rkey & 0xffffff00) | newkey;
|
||||
}
|
||||
|
||||
/**
|
||||
* ib_inc_rkey - increments the key portion of the given rkey. Can be used
|
||||
* for calculating a new rkey for type 2 memory windows.
|
||||
* @rkey - the rkey to increment.
|
||||
*/
|
||||
static inline u32 ib_inc_rkey(u32 rkey)
|
||||
{
|
||||
const u32 mask = 0x000000ff;
|
||||
return ((rkey + 1) & mask) | (rkey & ~mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* ib_alloc_mw - Allocates a memory window.
|
||||
* @pd: The protection domain associated with the memory window.
|
||||
* @type: The type of the memory window (1 or 2).
|
||||
*/
|
||||
struct ib_mw *ib_alloc_mw(struct ib_pd *pd);
|
||||
struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
|
||||
|
||||
/**
|
||||
* ib_bind_mw - Posts a work request to the send queue of the specified
|
||||
|
@ -2084,6 +2135,10 @@ struct ib_mw *ib_alloc_mw(struct ib_pd *pd);
|
|||
* @mw: The memory window to bind.
|
||||
* @mw_bind: Specifies information about the memory window, including
|
||||
* its address range, remote access rights, and associated memory region.
|
||||
*
|
||||
* If there is no immediate error, the function will update the rkey member
|
||||
* of the mw parameter to its new value. The bind operation can still fail
|
||||
* asynchronously.
|
||||
*/
|
||||
static inline int ib_bind_mw(struct ib_qp *qp,
|
||||
struct ib_mw *mw,
|
||||
|
|
|
@ -1086,7 +1086,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
|
|||
case RPCRDMA_MEMWINDOWS:
|
||||
/* Allocate one extra request's worth, for full cycling */
|
||||
for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
|
||||
r->r.mw = ib_alloc_mw(ia->ri_pd);
|
||||
r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1);
|
||||
if (IS_ERR(r->r.mw)) {
|
||||
rc = PTR_ERR(r->r.mw);
|
||||
dprintk("RPC: %s: ib_alloc_mw"
|
||||
|
@ -1673,12 +1673,12 @@ rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
|
|||
|
||||
*nsegs = 1;
|
||||
rpcrdma_map_one(ia, seg, writing);
|
||||
param.mr = ia->ri_bind_mem;
|
||||
param.bind_info.mr = ia->ri_bind_mem;
|
||||
param.wr_id = 0ULL; /* no send cookie */
|
||||
param.addr = seg->mr_dma;
|
||||
param.length = seg->mr_len;
|
||||
param.bind_info.addr = seg->mr_dma;
|
||||
param.bind_info.length = seg->mr_len;
|
||||
param.send_flags = 0;
|
||||
param.mw_access_flags = mem_priv;
|
||||
param.bind_info.mw_access_flags = mem_priv;
|
||||
|
||||
DECR_CQCOUNT(&r_xprt->rx_ep);
|
||||
rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m);
|
||||
|
@ -1690,7 +1690,7 @@ rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
|
|||
rpcrdma_unmap_one(ia, seg);
|
||||
} else {
|
||||
seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
|
||||
seg->mr_base = param.addr;
|
||||
seg->mr_base = param.bind_info.addr;
|
||||
seg->mr_nsegs = 1;
|
||||
}
|
||||
return rc;
|
||||
|
@ -1706,10 +1706,10 @@ rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
|
|||
int rc;
|
||||
|
||||
BUG_ON(seg->mr_nsegs != 1);
|
||||
param.mr = ia->ri_bind_mem;
|
||||
param.addr = 0ULL; /* unbind */
|
||||
param.length = 0;
|
||||
param.mw_access_flags = 0;
|
||||
param.bind_info.mr = ia->ri_bind_mem;
|
||||
param.bind_info.addr = 0ULL; /* unbind */
|
||||
param.bind_info.length = 0;
|
||||
param.bind_info.mw_access_flags = 0;
|
||||
if (*r) {
|
||||
param.wr_id = (u64) (unsigned long) *r;
|
||||
param.send_flags = IB_SEND_SIGNALED;
|
||||
|
|
Loading…
Reference in a new issue