fc458dcda2
remove too-low cap on minor number Signed-off-by: Ed L. Cashin <ecashin@coraid.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
630 lines
14 KiB
C
630 lines
14 KiB
C
/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
|
|
/*
|
|
* aoecmd.c
|
|
* Filesystem request handling methods
|
|
*/
|
|
|
|
#include <linux/hdreg.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/netdevice.h>
|
|
#include "aoe.h"
|
|
|
|
#define TIMERTICK (HZ / 10)
|
|
#define MINTIMER (2 * TIMERTICK)
|
|
#define MAXTIMER (HZ << 1)
|
|
#define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
|
|
|
|
static struct sk_buff *
|
|
new_skb(struct net_device *if_dev, ulong len)
|
|
{
|
|
struct sk_buff *skb;
|
|
|
|
skb = alloc_skb(len, GFP_ATOMIC);
|
|
if (skb) {
|
|
skb->nh.raw = skb->mac.raw = skb->data;
|
|
skb->dev = if_dev;
|
|
skb->protocol = __constant_htons(ETH_P_AOE);
|
|
skb->priority = 0;
|
|
skb_put(skb, len);
|
|
skb->next = skb->prev = NULL;
|
|
|
|
/* tell the network layer not to perform IP checksums
|
|
* or to get the NIC to do it
|
|
*/
|
|
skb->ip_summed = CHECKSUM_NONE;
|
|
}
|
|
return skb;
|
|
}
|
|
|
|
static struct sk_buff *
|
|
skb_prepare(struct aoedev *d, struct frame *f)
|
|
{
|
|
struct sk_buff *skb;
|
|
char *p;
|
|
|
|
skb = new_skb(d->ifp, f->ndata + f->writedatalen);
|
|
if (!skb) {
|
|
printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
|
|
return NULL;
|
|
}
|
|
|
|
p = skb->mac.raw;
|
|
memcpy(p, f->data, f->ndata);
|
|
|
|
if (f->writedatalen) {
|
|
p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
|
|
memcpy(p, f->bufaddr, f->writedatalen);
|
|
}
|
|
|
|
return skb;
|
|
}
|
|
|
|
static struct frame *
|
|
getframe(struct aoedev *d, int tag)
|
|
{
|
|
struct frame *f, *e;
|
|
|
|
f = d->frames;
|
|
e = f + d->nframes;
|
|
for (; f<e; f++)
|
|
if (f->tag == tag)
|
|
return f;
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Leave the top bit clear so we have tagspace for userland.
|
|
* The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
|
|
* This driver reserves tag -1 to mean "unused frame."
|
|
*/
|
|
static int
|
|
newtag(struct aoedev *d)
|
|
{
|
|
register ulong n;
|
|
|
|
n = jiffies & 0xffff;
|
|
return n |= (++d->lasttag & 0x7fff) << 16;
|
|
}
|
|
|
|
static int
|
|
aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
|
|
{
|
|
u16 type = __constant_cpu_to_be16(ETH_P_AOE);
|
|
u16 aoemajor = __cpu_to_be16(d->aoemajor);
|
|
u32 host_tag = newtag(d);
|
|
u32 tag = __cpu_to_be32(host_tag);
|
|
|
|
memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
|
|
memcpy(h->dst, d->addr, sizeof h->dst);
|
|
memcpy(h->type, &type, sizeof type);
|
|
h->verfl = AOE_HVER;
|
|
memcpy(h->major, &aoemajor, sizeof aoemajor);
|
|
h->minor = d->aoeminor;
|
|
h->cmd = AOECMD_ATA;
|
|
memcpy(h->tag, &tag, sizeof tag);
|
|
|
|
return host_tag;
|
|
}
|
|
|
|
static void
|
|
aoecmd_ata_rw(struct aoedev *d, struct frame *f)
|
|
{
|
|
struct aoe_hdr *h;
|
|
struct aoe_atahdr *ah;
|
|
struct buf *buf;
|
|
struct sk_buff *skb;
|
|
ulong bcnt;
|
|
register sector_t sector;
|
|
char writebit, extbit;
|
|
|
|
writebit = 0x10;
|
|
extbit = 0x4;
|
|
|
|
buf = d->inprocess;
|
|
|
|
sector = buf->sector;
|
|
bcnt = buf->bv_resid;
|
|
if (bcnt > MAXATADATA)
|
|
bcnt = MAXATADATA;
|
|
|
|
/* initialize the headers & frame */
|
|
h = (struct aoe_hdr *) f->data;
|
|
ah = (struct aoe_atahdr *) (h+1);
|
|
f->ndata = sizeof *h + sizeof *ah;
|
|
memset(h, 0, f->ndata);
|
|
f->tag = aoehdr_atainit(d, h);
|
|
f->waited = 0;
|
|
f->buf = buf;
|
|
f->bufaddr = buf->bufaddr;
|
|
|
|
/* set up ata header */
|
|
ah->scnt = bcnt >> 9;
|
|
ah->lba0 = sector;
|
|
ah->lba1 = sector >>= 8;
|
|
ah->lba2 = sector >>= 8;
|
|
ah->lba3 = sector >>= 8;
|
|
if (d->flags & DEVFL_EXT) {
|
|
ah->aflags |= AOEAFL_EXT;
|
|
ah->lba4 = sector >>= 8;
|
|
ah->lba5 = sector >>= 8;
|
|
} else {
|
|
extbit = 0;
|
|
ah->lba3 &= 0x0f;
|
|
ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
|
|
}
|
|
|
|
if (bio_data_dir(buf->bio) == WRITE) {
|
|
ah->aflags |= AOEAFL_WRITE;
|
|
f->writedatalen = bcnt;
|
|
} else {
|
|
writebit = 0;
|
|
f->writedatalen = 0;
|
|
}
|
|
|
|
ah->cmdstat = WIN_READ | writebit | extbit;
|
|
|
|
/* mark all tracking fields and load out */
|
|
buf->nframesout += 1;
|
|
buf->bufaddr += bcnt;
|
|
buf->bv_resid -= bcnt;
|
|
/* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
|
|
buf->resid -= bcnt;
|
|
buf->sector += bcnt >> 9;
|
|
if (buf->resid == 0) {
|
|
d->inprocess = NULL;
|
|
} else if (buf->bv_resid == 0) {
|
|
buf->bv++;
|
|
buf->bv_resid = buf->bv->bv_len;
|
|
buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
|
|
}
|
|
|
|
skb = skb_prepare(d, f);
|
|
if (skb) {
|
|
skb->next = d->skblist;
|
|
d->skblist = skb;
|
|
}
|
|
}
|
|
|
|
/* enters with d->lock held */
|
|
void
|
|
aoecmd_work(struct aoedev *d)
|
|
{
|
|
struct frame *f;
|
|
struct buf *buf;
|
|
loop:
|
|
f = getframe(d, FREETAG);
|
|
if (f == NULL)
|
|
return;
|
|
if (d->inprocess == NULL) {
|
|
if (list_empty(&d->bufq))
|
|
return;
|
|
buf = container_of(d->bufq.next, struct buf, bufs);
|
|
list_del(d->bufq.next);
|
|
/*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
|
|
d->inprocess = buf;
|
|
}
|
|
aoecmd_ata_rw(d, f);
|
|
goto loop;
|
|
}
|
|
|
|
static void
|
|
rexmit(struct aoedev *d, struct frame *f)
|
|
{
|
|
struct sk_buff *skb;
|
|
struct aoe_hdr *h;
|
|
char buf[128];
|
|
u32 n;
|
|
u32 net_tag;
|
|
|
|
n = newtag(d);
|
|
|
|
snprintf(buf, sizeof buf,
|
|
"%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
|
|
"retransmit",
|
|
d->aoemajor, d->aoeminor, f->tag, jiffies, n);
|
|
aoechr_error(buf);
|
|
|
|
h = (struct aoe_hdr *) f->data;
|
|
f->tag = n;
|
|
net_tag = __cpu_to_be32(n);
|
|
memcpy(h->tag, &net_tag, sizeof net_tag);
|
|
|
|
skb = skb_prepare(d, f);
|
|
if (skb) {
|
|
skb->next = d->skblist;
|
|
d->skblist = skb;
|
|
}
|
|
}
|
|
|
|
static int
|
|
tsince(int tag)
|
|
{
|
|
int n;
|
|
|
|
n = jiffies & 0xffff;
|
|
n -= tag & 0xffff;
|
|
if (n < 0)
|
|
n += 1<<16;
|
|
return n;
|
|
}
|
|
|
|
static void
|
|
rexmit_timer(ulong vp)
|
|
{
|
|
struct aoedev *d;
|
|
struct frame *f, *e;
|
|
struct sk_buff *sl;
|
|
register long timeout;
|
|
ulong flags, n;
|
|
|
|
d = (struct aoedev *) vp;
|
|
sl = NULL;
|
|
|
|
/* timeout is always ~150% of the moving average */
|
|
timeout = d->rttavg;
|
|
timeout += timeout >> 1;
|
|
|
|
spin_lock_irqsave(&d->lock, flags);
|
|
|
|
if (d->flags & DEVFL_TKILL) {
|
|
tdie: spin_unlock_irqrestore(&d->lock, flags);
|
|
return;
|
|
}
|
|
f = d->frames;
|
|
e = f + d->nframes;
|
|
for (; f<e; f++) {
|
|
if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
|
|
n = f->waited += timeout;
|
|
n /= HZ;
|
|
if (n > MAXWAIT) { /* waited too long. device failure. */
|
|
aoedev_downdev(d);
|
|
goto tdie;
|
|
}
|
|
rexmit(d, f);
|
|
}
|
|
}
|
|
|
|
sl = d->skblist;
|
|
d->skblist = NULL;
|
|
if (sl) {
|
|
n = d->rttavg <<= 1;
|
|
if (n > MAXTIMER)
|
|
d->rttavg = MAXTIMER;
|
|
}
|
|
|
|
d->timer.expires = jiffies + TIMERTICK;
|
|
add_timer(&d->timer);
|
|
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
|
|
aoenet_xmit(sl);
|
|
}
|
|
|
|
static void
|
|
ataid_complete(struct aoedev *d, unsigned char *id)
|
|
{
|
|
u64 ssize;
|
|
u16 n;
|
|
|
|
/* word 83: command set supported */
|
|
n = __le16_to_cpu(*((u16 *) &id[83<<1]));
|
|
|
|
/* word 86: command set/feature enabled */
|
|
n |= __le16_to_cpu(*((u16 *) &id[86<<1]));
|
|
|
|
if (n & (1<<10)) { /* bit 10: LBA 48 */
|
|
d->flags |= DEVFL_EXT;
|
|
|
|
/* word 100: number lba48 sectors */
|
|
ssize = __le64_to_cpu(*((u64 *) &id[100<<1]));
|
|
|
|
/* set as in ide-disk.c:init_idedisk_capacity */
|
|
d->geo.cylinders = ssize;
|
|
d->geo.cylinders /= (255 * 63);
|
|
d->geo.heads = 255;
|
|
d->geo.sectors = 63;
|
|
} else {
|
|
d->flags &= ~DEVFL_EXT;
|
|
|
|
/* number lba28 sectors */
|
|
ssize = __le32_to_cpu(*((u32 *) &id[60<<1]));
|
|
|
|
/* NOTE: obsolete in ATA 6 */
|
|
d->geo.cylinders = __le16_to_cpu(*((u16 *) &id[54<<1]));
|
|
d->geo.heads = __le16_to_cpu(*((u16 *) &id[55<<1]));
|
|
d->geo.sectors = __le16_to_cpu(*((u16 *) &id[56<<1]));
|
|
}
|
|
d->ssize = ssize;
|
|
d->geo.start = 0;
|
|
if (d->gd != NULL) {
|
|
d->gd->capacity = ssize;
|
|
d->flags |= DEVFL_UP;
|
|
return;
|
|
}
|
|
if (d->flags & DEVFL_WORKON) {
|
|
printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! "
|
|
"(This really shouldn't happen).\n");
|
|
return;
|
|
}
|
|
INIT_WORK(&d->work, aoeblk_gdalloc, d);
|
|
schedule_work(&d->work);
|
|
d->flags |= DEVFL_WORKON;
|
|
}
|
|
|
|
static void
|
|
calc_rttavg(struct aoedev *d, int rtt)
|
|
{
|
|
register long n;
|
|
|
|
n = rtt;
|
|
if (n < MINTIMER)
|
|
n = MINTIMER;
|
|
else if (n > MAXTIMER)
|
|
n = MAXTIMER;
|
|
|
|
/* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
|
|
n -= d->rttavg;
|
|
d->rttavg += n >> 2;
|
|
}
|
|
|
|
void
|
|
aoecmd_ata_rsp(struct sk_buff *skb)
|
|
{
|
|
struct aoedev *d;
|
|
struct aoe_hdr *hin;
|
|
struct aoe_atahdr *ahin, *ahout;
|
|
struct frame *f;
|
|
struct buf *buf;
|
|
struct sk_buff *sl;
|
|
register long n;
|
|
ulong flags;
|
|
char ebuf[128];
|
|
|
|
hin = (struct aoe_hdr *) skb->mac.raw;
|
|
d = aoedev_bymac(hin->src);
|
|
if (d == NULL) {
|
|
snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
|
|
"for unknown device %d.%d\n",
|
|
__be16_to_cpu(*((u16 *) hin->major)),
|
|
hin->minor);
|
|
aoechr_error(ebuf);
|
|
return;
|
|
}
|
|
|
|
spin_lock_irqsave(&d->lock, flags);
|
|
|
|
f = getframe(d, __be32_to_cpu(*((u32 *) hin->tag)));
|
|
if (f == NULL) {
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
snprintf(ebuf, sizeof ebuf,
|
|
"%15s e%d.%d tag=%08x@%08lx\n",
|
|
"unexpected rsp",
|
|
__be16_to_cpu(*((u16 *) hin->major)),
|
|
hin->minor,
|
|
__be32_to_cpu(*((u32 *) hin->tag)),
|
|
jiffies);
|
|
aoechr_error(ebuf);
|
|
return;
|
|
}
|
|
|
|
calc_rttavg(d, tsince(f->tag));
|
|
|
|
ahin = (struct aoe_atahdr *) (hin+1);
|
|
ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
|
|
buf = f->buf;
|
|
|
|
if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
|
|
printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
|
|
"stat=%2.2Xh from e%ld.%ld\n",
|
|
ahout->cmdstat, ahin->cmdstat,
|
|
d->aoemajor, d->aoeminor);
|
|
if (buf)
|
|
buf->flags |= BUFFL_FAIL;
|
|
} else {
|
|
switch (ahout->cmdstat) {
|
|
case WIN_READ:
|
|
case WIN_READ_EXT:
|
|
n = ahout->scnt << 9;
|
|
if (skb->len - sizeof *hin - sizeof *ahin < n) {
|
|
printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
|
|
"ata data size in read. skb->len=%d\n",
|
|
skb->len);
|
|
/* fail frame f? just returning will rexmit. */
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
return;
|
|
}
|
|
memcpy(f->bufaddr, ahin+1, n);
|
|
case WIN_WRITE:
|
|
case WIN_WRITE_EXT:
|
|
break;
|
|
case WIN_IDENTIFY:
|
|
if (skb->len - sizeof *hin - sizeof *ahin < 512) {
|
|
printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
|
|
"in ataid. skb->len=%d\n", skb->len);
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
return;
|
|
}
|
|
ataid_complete(d, (char *) (ahin+1));
|
|
/* d->flags |= DEVFL_WC_UPDATE; */
|
|
break;
|
|
default:
|
|
printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
|
|
"outbound ata command %2.2Xh for %d.%d\n",
|
|
ahout->cmdstat,
|
|
__be16_to_cpu(*((u16 *) hin->major)),
|
|
hin->minor);
|
|
}
|
|
}
|
|
|
|
if (buf) {
|
|
buf->nframesout -= 1;
|
|
if (buf->nframesout == 0 && buf->resid == 0) {
|
|
n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
|
|
bio_endio(buf->bio, buf->bio->bi_size, n);
|
|
mempool_free(buf, d->bufpool);
|
|
}
|
|
}
|
|
|
|
f->buf = NULL;
|
|
f->tag = FREETAG;
|
|
|
|
aoecmd_work(d);
|
|
|
|
sl = d->skblist;
|
|
d->skblist = NULL;
|
|
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
|
|
aoenet_xmit(sl);
|
|
}
|
|
|
|
void
|
|
aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
|
|
{
|
|
struct aoe_hdr *h;
|
|
struct aoe_cfghdr *ch;
|
|
struct sk_buff *skb, *sl;
|
|
struct net_device *ifp;
|
|
u16 aoe_type = __constant_cpu_to_be16(ETH_P_AOE);
|
|
u16 net_aoemajor = __cpu_to_be16(aoemajor);
|
|
|
|
sl = NULL;
|
|
|
|
read_lock(&dev_base_lock);
|
|
for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
|
|
dev_hold(ifp);
|
|
if (!is_aoe_netif(ifp))
|
|
continue;
|
|
|
|
skb = new_skb(ifp, sizeof *h + sizeof *ch);
|
|
if (skb == NULL) {
|
|
printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
|
|
continue;
|
|
}
|
|
h = (struct aoe_hdr *) skb->mac.raw;
|
|
memset(h, 0, sizeof *h + sizeof *ch);
|
|
|
|
memset(h->dst, 0xff, sizeof h->dst);
|
|
memcpy(h->src, ifp->dev_addr, sizeof h->src);
|
|
memcpy(h->type, &aoe_type, sizeof aoe_type);
|
|
h->verfl = AOE_HVER;
|
|
memcpy(h->major, &net_aoemajor, sizeof net_aoemajor);
|
|
h->minor = aoeminor;
|
|
h->cmd = AOECMD_CFG;
|
|
|
|
skb->next = sl;
|
|
sl = skb;
|
|
}
|
|
read_unlock(&dev_base_lock);
|
|
|
|
aoenet_xmit(sl);
|
|
}
|
|
|
|
/*
|
|
* Since we only call this in one place (and it only prepares one frame)
|
|
* we just return the skb. Usually we'd chain it up to the d->skblist.
|
|
*/
|
|
static struct sk_buff *
|
|
aoecmd_ata_id(struct aoedev *d)
|
|
{
|
|
struct aoe_hdr *h;
|
|
struct aoe_atahdr *ah;
|
|
struct frame *f;
|
|
struct sk_buff *skb;
|
|
|
|
f = getframe(d, FREETAG);
|
|
if (f == NULL) {
|
|
printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. "
|
|
"This shouldn't happen.\n");
|
|
return NULL;
|
|
}
|
|
|
|
/* initialize the headers & frame */
|
|
h = (struct aoe_hdr *) f->data;
|
|
ah = (struct aoe_atahdr *) (h+1);
|
|
f->ndata = sizeof *h + sizeof *ah;
|
|
memset(h, 0, f->ndata);
|
|
f->tag = aoehdr_atainit(d, h);
|
|
f->waited = 0;
|
|
f->writedatalen = 0;
|
|
|
|
/* this message initializes the device, so we reset the rttavg */
|
|
d->rttavg = MAXTIMER;
|
|
|
|
/* set up ata header */
|
|
ah->scnt = 1;
|
|
ah->cmdstat = WIN_IDENTIFY;
|
|
ah->lba3 = 0xa0;
|
|
|
|
skb = skb_prepare(d, f);
|
|
|
|
/* we now want to start the rexmit tracking */
|
|
d->flags &= ~DEVFL_TKILL;
|
|
d->timer.data = (ulong) d;
|
|
d->timer.function = rexmit_timer;
|
|
d->timer.expires = jiffies + TIMERTICK;
|
|
add_timer(&d->timer);
|
|
|
|
return skb;
|
|
}
|
|
|
|
void
|
|
aoecmd_cfg_rsp(struct sk_buff *skb)
|
|
{
|
|
struct aoedev *d;
|
|
struct aoe_hdr *h;
|
|
struct aoe_cfghdr *ch;
|
|
ulong flags, bufcnt, sysminor, aoemajor;
|
|
struct sk_buff *sl;
|
|
enum { MAXFRAMES = 8 };
|
|
|
|
h = (struct aoe_hdr *) skb->mac.raw;
|
|
ch = (struct aoe_cfghdr *) (h+1);
|
|
|
|
/*
|
|
* Enough people have their dip switches set backwards to
|
|
* warrant a loud message for this special case.
|
|
*/
|
|
aoemajor = __be16_to_cpu(*((u16 *) h->major));
|
|
if (aoemajor == 0xfff) {
|
|
printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
|
|
"address is all ones. Check shelf dip switches\n");
|
|
return;
|
|
}
|
|
|
|
sysminor = SYSMINOR(aoemajor, h->minor);
|
|
if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
|
|
printk(KERN_INFO
|
|
"aoe: e%ld.%d: minor number too large\n",
|
|
aoemajor, (int) h->minor);
|
|
return;
|
|
}
|
|
|
|
bufcnt = __be16_to_cpu(*((u16 *) ch->bufcnt));
|
|
if (bufcnt > MAXFRAMES) /* keep it reasonable */
|
|
bufcnt = MAXFRAMES;
|
|
|
|
d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
|
|
if (d == NULL) {
|
|
printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
|
|
return;
|
|
}
|
|
|
|
spin_lock_irqsave(&d->lock, flags);
|
|
|
|
if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
return;
|
|
}
|
|
|
|
d->fw_ver = __be16_to_cpu(*((u16 *) ch->fwver));
|
|
|
|
/* we get here only if the device is new */
|
|
sl = aoecmd_ata_id(d);
|
|
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
|
|
aoenet_xmit(sl);
|
|
}
|
|
|