IB/mthca: Always fill MTTs from CPU
Speed up memory registration by filling in MTTs directly when the CPU can write directly to the whole table (all mem-free cards, and to Tavor mode on 64-bit systems with the patch I posted earlier). This reduces the number of FW commands needed to register an MR by at least a factor of 2 and speeds up memory registration significantly. Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
parent
c20e20ab0f
commit
b2875d4c39
3 changed files with 89 additions and 9 deletions
|
@ -464,6 +464,8 @@ void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
|
|||
int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
|
||||
void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
|
||||
|
||||
int mthca_write_mtt_size(struct mthca_dev *dev);
|
||||
|
||||
struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
|
||||
void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt);
|
||||
int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
|
||||
|
|
|
@ -243,8 +243,8 @@ void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
|
|||
kfree(mtt);
|
||||
}
|
||||
|
||||
int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
|
||||
int start_index, u64 *buffer_list, int list_len)
|
||||
static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
|
||||
int start_index, u64 *buffer_list, int list_len)
|
||||
{
|
||||
struct mthca_mailbox *mailbox;
|
||||
__be64 *mtt_entry;
|
||||
|
@ -295,6 +295,84 @@ int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
|
|||
return err;
|
||||
}
|
||||
|
||||
int mthca_write_mtt_size(struct mthca_dev *dev)
|
||||
{
|
||||
if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
|
||||
/*
|
||||
* Be friendly to WRITE_MTT command
|
||||
* and leave two empty slots for the
|
||||
* index and reserved fields of the
|
||||
* mailbox.
|
||||
*/
|
||||
return PAGE_SIZE / sizeof (u64) - 2;
|
||||
|
||||
/* For Arbel, all MTTs must fit in the same page. */
|
||||
return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
|
||||
}
|
||||
|
||||
void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
|
||||
int start_index, u64 *buffer_list, int list_len)
|
||||
{
|
||||
u64 __iomem *mtts;
|
||||
int i;
|
||||
|
||||
mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * MTHCA_MTT_SEG_SIZE +
|
||||
start_index * sizeof (u64);
|
||||
for (i = 0; i < list_len; ++i)
|
||||
mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT),
|
||||
mtts + i);
|
||||
}
|
||||
|
||||
void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
|
||||
int start_index, u64 *buffer_list, int list_len)
|
||||
{
|
||||
__be64 *mtts;
|
||||
dma_addr_t dma_handle;
|
||||
int i;
|
||||
int s = start_index * sizeof (u64);
|
||||
|
||||
/* For Arbel, all MTTs must fit in the same page. */
|
||||
BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
|
||||
/* Require full segments */
|
||||
BUG_ON(s % MTHCA_MTT_SEG_SIZE);
|
||||
|
||||
mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
|
||||
s / MTHCA_MTT_SEG_SIZE, &dma_handle);
|
||||
|
||||
BUG_ON(!mtts);
|
||||
|
||||
for (i = 0; i < list_len; ++i)
|
||||
mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT);
|
||||
|
||||
dma_sync_single(&dev->pdev->dev, dma_handle, list_len * sizeof (u64), DMA_TO_DEVICE);
|
||||
}
|
||||
|
||||
int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
|
||||
int start_index, u64 *buffer_list, int list_len)
|
||||
{
|
||||
int size = mthca_write_mtt_size(dev);
|
||||
int chunk;
|
||||
|
||||
if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
|
||||
return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
|
||||
|
||||
while (list_len > 0) {
|
||||
chunk = min(size, list_len);
|
||||
if (mthca_is_memfree(dev))
|
||||
mthca_arbel_write_mtt_seg(dev, mtt, start_index,
|
||||
buffer_list, chunk);
|
||||
else
|
||||
mthca_tavor_write_mtt_seg(dev, mtt, start_index,
|
||||
buffer_list, chunk);
|
||||
|
||||
list_len -= chunk;
|
||||
start_index += chunk;
|
||||
buffer_list += chunk;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u32 tavor_hw_index_to_key(u32 ind)
|
||||
{
|
||||
return ind;
|
||||
|
|
|
@ -1015,6 +1015,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
|
|||
int shift, n, len;
|
||||
int i, j, k;
|
||||
int err = 0;
|
||||
int write_mtt_size;
|
||||
|
||||
shift = ffs(region->page_size) - 1;
|
||||
|
||||
|
@ -1040,6 +1041,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
|
|||
|
||||
i = n = 0;
|
||||
|
||||
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
|
||||
|
||||
list_for_each_entry(chunk, ®ion->chunk_list, list)
|
||||
for (j = 0; j < chunk->nmap; ++j) {
|
||||
len = sg_dma_len(&chunk->page_list[j]) >> shift;
|
||||
|
@ -1047,14 +1050,11 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
|
|||
pages[i++] = sg_dma_address(&chunk->page_list[j]) +
|
||||
region->page_size * k;
|
||||
/*
|
||||
* Be friendly to WRITE_MTT command
|
||||
* and leave two empty slots for the
|
||||
* index and reserved fields of the
|
||||
* mailbox.
|
||||
* Be friendly to write_mtt and pass it chunks
|
||||
* of appropriate size.
|
||||
*/
|
||||
if (i == PAGE_SIZE / sizeof (u64) - 2) {
|
||||
err = mthca_write_mtt(dev, mr->mtt,
|
||||
n, pages, i);
|
||||
if (i == write_mtt_size) {
|
||||
err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
|
||||
if (err)
|
||||
goto mtt_done;
|
||||
n += i;
|
||||
|
|
Loading…
Reference in a new issue