chelsio: more rx speedup
Cleanup receive processing some more: * do the reserve padding of skb during setup * don't pass constants to get_packet * do smart prefetch of skb * make copybreak a module parameter Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: Jeff Garzik <jeff@garzik.org>
This commit is contained in:
parent
3de00b8958
commit
24a427cf76
1 changed files with 46 additions and 43 deletions
|
@ -71,12 +71,9 @@
|
|||
#define SGE_FREEL_REFILL_THRESH 16
|
||||
#define SGE_RESPQ_E_N 1024
|
||||
#define SGE_INTRTIMER_NRES 1000
|
||||
#define SGE_RX_COPY_THRES 256
|
||||
#define SGE_RX_SM_BUF_SIZE 1536
|
||||
#define SGE_TX_DESC_MAX_PLEN 16384
|
||||
|
||||
# define SGE_RX_DROP_THRES 2
|
||||
|
||||
#define SGE_RESPQ_REPLENISH_THRES (SGE_RESPQ_E_N / 4)
|
||||
|
||||
/*
|
||||
|
@ -846,6 +843,8 @@ static void refill_free_list(struct sge *sge, struct freelQ *q)
|
|||
skb_reserve(skb, q->dma_offset);
|
||||
mapping = pci_map_single(pdev, skb->data, dma_len,
|
||||
PCI_DMA_FROMDEVICE);
|
||||
skb_reserve(skb, sge->rx_pkt_pad);
|
||||
|
||||
ce->skb = skb;
|
||||
pci_unmap_addr_set(ce, dma_addr, mapping);
|
||||
pci_unmap_len_set(ce, dma_len, dma_len);
|
||||
|
@ -1024,6 +1023,10 @@ static void recycle_fl_buf(struct freelQ *fl, int idx)
|
|||
}
|
||||
}
|
||||
|
||||
static int copybreak __read_mostly = 256;
|
||||
module_param(copybreak, int, 0);
|
||||
MODULE_PARM_DESC(copybreak, "Receive copy threshold");
|
||||
|
||||
/**
|
||||
* get_packet - return the next ingress packet buffer
|
||||
* @pdev: the PCI device that received the packet
|
||||
|
@ -1043,45 +1046,42 @@ static void recycle_fl_buf(struct freelQ *fl, int idx)
|
|||
* be copied but there is no memory for the copy.
|
||||
*/
|
||||
static inline struct sk_buff *get_packet(struct pci_dev *pdev,
|
||||
struct freelQ *fl, unsigned int len,
|
||||
int dma_pad, int skb_pad,
|
||||
unsigned int copy_thres,
|
||||
unsigned int drop_thres)
|
||||
struct freelQ *fl, unsigned int len)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
struct freelQ_ce *ce = &fl->centries[fl->cidx];
|
||||
const struct freelQ_ce *ce = &fl->centries[fl->cidx];
|
||||
|
||||
if (len < copy_thres) {
|
||||
skb = alloc_skb(len + skb_pad, GFP_ATOMIC);
|
||||
if (likely(skb != NULL)) {
|
||||
skb_reserve(skb, skb_pad);
|
||||
skb_put(skb, len);
|
||||
pci_dma_sync_single_for_cpu(pdev,
|
||||
pci_unmap_addr(ce, dma_addr),
|
||||
pci_unmap_len(ce, dma_len),
|
||||
PCI_DMA_FROMDEVICE);
|
||||
memcpy(skb->data, ce->skb->data + dma_pad, len);
|
||||
pci_dma_sync_single_for_device(pdev,
|
||||
pci_unmap_addr(ce, dma_addr),
|
||||
pci_unmap_len(ce, dma_len),
|
||||
PCI_DMA_FROMDEVICE);
|
||||
} else if (!drop_thres)
|
||||
if (len < copybreak) {
|
||||
skb = alloc_skb(len + 2, GFP_ATOMIC);
|
||||
if (!skb)
|
||||
goto use_orig_buf;
|
||||
|
||||
skb_reserve(skb, 2); /* align IP header */
|
||||
skb_put(skb, len);
|
||||
pci_dma_sync_single_for_cpu(pdev,
|
||||
pci_unmap_addr(ce, dma_addr),
|
||||
pci_unmap_len(ce, dma_len),
|
||||
PCI_DMA_FROMDEVICE);
|
||||
memcpy(skb->data, ce->skb->data, len);
|
||||
pci_dma_sync_single_for_device(pdev,
|
||||
pci_unmap_addr(ce, dma_addr),
|
||||
pci_unmap_len(ce, dma_len),
|
||||
PCI_DMA_FROMDEVICE);
|
||||
recycle_fl_buf(fl, fl->cidx);
|
||||
return skb;
|
||||
}
|
||||
|
||||
if (fl->credits < drop_thres) {
|
||||
use_orig_buf:
|
||||
if (fl->credits < 2) {
|
||||
recycle_fl_buf(fl, fl->cidx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
use_orig_buf:
|
||||
pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr),
|
||||
pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE);
|
||||
skb = ce->skb;
|
||||
skb_reserve(skb, dma_pad);
|
||||
prefetch(skb->data);
|
||||
|
||||
skb_put(skb, len);
|
||||
return skb;
|
||||
}
|
||||
|
@ -1359,27 +1359,25 @@ static void restart_sched(unsigned long arg)
|
|||
*
|
||||
* Process an ingress ethernet pakcet and deliver it to the stack.
|
||||
*/
|
||||
static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
|
||||
static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
struct cpl_rx_pkt *p;
|
||||
const struct cpl_rx_pkt *p;
|
||||
struct adapter *adapter = sge->adapter;
|
||||
struct sge_port_stats *st;
|
||||
|
||||
skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad,
|
||||
sge->rx_pkt_pad, 2, SGE_RX_COPY_THRES,
|
||||
SGE_RX_DROP_THRES);
|
||||
skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad);
|
||||
if (unlikely(!skb)) {
|
||||
sge->stats.rx_drops++;
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
p = (struct cpl_rx_pkt *)skb->data;
|
||||
skb_pull(skb, sizeof(*p));
|
||||
p = (const struct cpl_rx_pkt *) skb->data;
|
||||
if (p->iff >= adapter->params.nports) {
|
||||
kfree_skb(skb);
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
__skb_pull(skb, sizeof(*p));
|
||||
|
||||
skb->dev = adapter->port[p->iff].dev;
|
||||
skb->dev->last_rx = jiffies;
|
||||
|
@ -1411,7 +1409,6 @@ static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
|
|||
netif_rx(skb);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1493,12 +1490,11 @@ static int process_responses(struct adapter *adapter, int budget)
|
|||
struct sge *sge = adapter->sge;
|
||||
struct respQ *q = &sge->respQ;
|
||||
struct respQ_e *e = &q->entries[q->cidx];
|
||||
int budget_left = budget;
|
||||
int done = 0;
|
||||
unsigned int flags = 0;
|
||||
unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0};
|
||||
|
||||
|
||||
while (likely(budget_left && e->GenerationBit == q->genbit)) {
|
||||
while (done < budget && e->GenerationBit == q->genbit) {
|
||||
flags |= e->Qsleeping;
|
||||
|
||||
cmdq_processed[0] += e->Cmdq0CreditReturn;
|
||||
|
@ -1508,14 +1504,16 @@ static int process_responses(struct adapter *adapter, int budget)
|
|||
* ping-pong of TX state information on MP where the sender
|
||||
* might run on a different CPU than this function...
|
||||
*/
|
||||
if (unlikely(flags & F_CMDQ0_ENABLE || cmdq_processed[0] > 64)) {
|
||||
if (unlikely((flags & F_CMDQ0_ENABLE) || cmdq_processed[0] > 64)) {
|
||||
flags = update_tx_info(adapter, flags, cmdq_processed[0]);
|
||||
cmdq_processed[0] = 0;
|
||||
}
|
||||
|
||||
if (unlikely(cmdq_processed[1] > 16)) {
|
||||
sge->cmdQ[1].processed += cmdq_processed[1];
|
||||
cmdq_processed[1] = 0;
|
||||
}
|
||||
|
||||
if (likely(e->DataValid)) {
|
||||
struct freelQ *fl = &sge->freelQ[e->FreelistQid];
|
||||
|
||||
|
@ -1525,12 +1523,16 @@ static int process_responses(struct adapter *adapter, int budget)
|
|||
else
|
||||
sge_rx(sge, fl, e->BufferLength);
|
||||
|
||||
++done;
|
||||
|
||||
/*
|
||||
* Note: this depends on each packet consuming a
|
||||
* single free-list buffer; cf. the BUG above.
|
||||
*/
|
||||
if (++fl->cidx == fl->size)
|
||||
fl->cidx = 0;
|
||||
prefetch(fl->centries[fl->cidx].skb);
|
||||
|
||||
if (unlikely(--fl->credits <
|
||||
fl->size - SGE_FREEL_REFILL_THRESH))
|
||||
refill_free_list(sge, fl);
|
||||
|
@ -1549,14 +1551,12 @@ static int process_responses(struct adapter *adapter, int budget)
|
|||
writel(q->credits, adapter->regs + A_SG_RSPQUEUECREDIT);
|
||||
q->credits = 0;
|
||||
}
|
||||
--budget_left;
|
||||
}
|
||||
|
||||
flags = update_tx_info(adapter, flags, cmdq_processed[0]);
|
||||
sge->cmdQ[1].processed += cmdq_processed[1];
|
||||
|
||||
budget -= budget_left;
|
||||
return budget;
|
||||
return done;
|
||||
}
|
||||
|
||||
static inline int responses_pending(const struct adapter *adapter)
|
||||
|
@ -1581,11 +1581,14 @@ static int process_pure_responses(struct adapter *adapter)
|
|||
struct sge *sge = adapter->sge;
|
||||
struct respQ *q = &sge->respQ;
|
||||
struct respQ_e *e = &q->entries[q->cidx];
|
||||
const struct freelQ *fl = &sge->freelQ[e->FreelistQid];
|
||||
unsigned int flags = 0;
|
||||
unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0};
|
||||
|
||||
prefetch(fl->centries[fl->cidx].skb);
|
||||
if (e->DataValid)
|
||||
return 1;
|
||||
|
||||
do {
|
||||
flags |= e->Qsleeping;
|
||||
|
||||
|
|
Loading…
Reference in a new issue