kernel-fxtec-pro1x/arch/sparc64/kernel/pci_sun4v.c
David S. Miller ad7ad57c61 [SPARC64]: Fix conflicts in SBUS/PCI/EBUS/ISA DMA handling.
Fully unify all of the DMA ops so that subordinate bus types to
the DMA operation providers (such as ebus, isa, of_device) can
work transparently.

Basically, we just make sure that for every system device we
create, the dev->archdata 'iommu' and 'stc' fields are filled
in.

Then we have two platform variants of the DMA ops, one for SUN4U which
actually programs the real hardware, and one for SUN4V which makes
hypervisor calls.

This also fixes the crashes in parport_pc on sparc64, reported by
Meelis Roos.

Signed-off-by: David S. Miller <davem@davemloft.net>
2007-07-30 00:27:34 -07:00

1253 lines
29 KiB
C

/* pci_sun4v.c: SUN4V specific PCI controller support.
*
* Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/irq.h>
#include <linux/msi.h>
#include <linux/log2.h>
#include <asm/iommu.h>
#include <asm/irq.h>
#include <asm/upa.h>
#include <asm/pstate.h>
#include <asm/oplib.h>
#include <asm/hypervisor.h>
#include <asm/prom.h>
#include "pci_impl.h"
#include "iommu_common.h"
#include "pci_sun4v.h"
static unsigned long vpci_major = 1;
static unsigned long vpci_minor = 1;
#define PGLIST_NENTS (PAGE_SIZE / sizeof(u64))
struct iommu_batch {
struct device *dev; /* Device mapping is for. */
unsigned long prot; /* IOMMU page protections */
unsigned long entry; /* Index into IOTSB. */
u64 *pglist; /* List of physical pages */
unsigned long npages; /* Number of pages in list. */
};
static DEFINE_PER_CPU(struct iommu_batch, iommu_batch);
/* Interrupts must be disabled. */
static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry)
{
struct iommu_batch *p = &__get_cpu_var(iommu_batch);
p->dev = dev;
p->prot = prot;
p->entry = entry;
p->npages = 0;
}
/* Interrupts must be disabled. */
static long iommu_batch_flush(struct iommu_batch *p)
{
struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
unsigned long devhandle = pbm->devhandle;
unsigned long prot = p->prot;
unsigned long entry = p->entry;
u64 *pglist = p->pglist;
unsigned long npages = p->npages;
while (npages != 0) {
long num;
num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
npages, prot, __pa(pglist));
if (unlikely(num < 0)) {
if (printk_ratelimit())
printk("iommu_batch_flush: IOMMU map of "
"[%08lx:%08lx:%lx:%lx:%lx] failed with "
"status %ld\n",
devhandle, HV_PCI_TSBID(0, entry),
npages, prot, __pa(pglist), num);
return -1;
}
entry += num;
npages -= num;
pglist += num;
}
p->entry = entry;
p->npages = 0;
return 0;
}
/* Interrupts must be disabled. */
static inline long iommu_batch_add(u64 phys_page)
{
struct iommu_batch *p = &__get_cpu_var(iommu_batch);
BUG_ON(p->npages >= PGLIST_NENTS);
p->pglist[p->npages++] = phys_page;
if (p->npages == PGLIST_NENTS)
return iommu_batch_flush(p);
return 0;
}
/* Interrupts must be disabled. */
static inline long iommu_batch_end(void)
{
struct iommu_batch *p = &__get_cpu_var(iommu_batch);
BUG_ON(p->npages >= PGLIST_NENTS);
return iommu_batch_flush(p);
}
static long arena_alloc(struct iommu_arena *arena, unsigned long npages)
{
unsigned long n, i, start, end, limit;
int pass;
limit = arena->limit;
start = arena->hint;
pass = 0;
again:
n = find_next_zero_bit(arena->map, limit, start);
end = n + npages;
if (unlikely(end >= limit)) {
if (likely(pass < 1)) {
limit = start;
start = 0;
pass++;
goto again;
} else {
/* Scanned the whole thing, give up. */
return -1;
}
}
for (i = n; i < end; i++) {
if (test_bit(i, arena->map)) {
start = i + 1;
goto again;
}
}
for (i = n; i < end; i++)
__set_bit(i, arena->map);
arena->hint = end;
return n;
}
static void arena_free(struct iommu_arena *arena, unsigned long base,
unsigned long npages)
{
unsigned long i;
for (i = base; i < (base + npages); i++)
__clear_bit(i, arena->map);
}
static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addrp, gfp_t gfp)
{
struct iommu *iommu;
unsigned long flags, order, first_page, npages, n;
void *ret;
long entry;
size = IO_PAGE_ALIGN(size);
order = get_order(size);
if (unlikely(order >= MAX_ORDER))
return NULL;
npages = size >> IO_PAGE_SHIFT;
first_page = __get_free_pages(gfp, order);
if (unlikely(first_page == 0UL))
return NULL;
memset((char *)first_page, 0, PAGE_SIZE << order);
iommu = dev->archdata.iommu;
spin_lock_irqsave(&iommu->lock, flags);
entry = arena_alloc(&iommu->arena, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
if (unlikely(entry < 0L))
goto arena_alloc_fail;
*dma_addrp = (iommu->page_table_map_base +
(entry << IO_PAGE_SHIFT));
ret = (void *) first_page;
first_page = __pa(first_page);
local_irq_save(flags);
iommu_batch_start(dev,
(HV_PCI_MAP_ATTR_READ |
HV_PCI_MAP_ATTR_WRITE),
entry);
for (n = 0; n < npages; n++) {
long err = iommu_batch_add(first_page + (n * PAGE_SIZE));
if (unlikely(err < 0L))
goto iommu_map_fail;
}
if (unlikely(iommu_batch_end() < 0L))
goto iommu_map_fail;
local_irq_restore(flags);
return ret;
iommu_map_fail:
/* Interrupts are disabled. */
spin_lock(&iommu->lock);
arena_free(&iommu->arena, entry, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
arena_alloc_fail:
free_pages(first_page, order);
return NULL;
}
static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
dma_addr_t dvma)
{
struct pci_pbm_info *pbm;
struct iommu *iommu;
unsigned long flags, order, npages, entry;
u32 devhandle;
npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
devhandle = pbm->devhandle;
entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
spin_lock_irqsave(&iommu->lock, flags);
arena_free(&iommu->arena, entry, npages);
do {
unsigned long num;
num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
npages);
entry += num;
npages -= num;
} while (npages != 0);
spin_unlock_irqrestore(&iommu->lock, flags);
order = get_order(size);
if (order < 10)
free_pages((unsigned long)cpu, order);
}
static dma_addr_t dma_4v_map_single(struct device *dev, void *ptr, size_t sz,
enum dma_data_direction direction)
{
struct iommu *iommu;
unsigned long flags, npages, oaddr;
unsigned long i, base_paddr;
u32 bus_addr, ret;
unsigned long prot;
long entry;
iommu = dev->archdata.iommu;
if (unlikely(direction == DMA_NONE))
goto bad;
oaddr = (unsigned long)ptr;
npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT;
spin_lock_irqsave(&iommu->lock, flags);
entry = arena_alloc(&iommu->arena, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
if (unlikely(entry < 0L))
goto bad;
bus_addr = (iommu->page_table_map_base +
(entry << IO_PAGE_SHIFT));
ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
base_paddr = __pa(oaddr & IO_PAGE_MASK);
prot = HV_PCI_MAP_ATTR_READ;
if (direction != DMA_TO_DEVICE)
prot |= HV_PCI_MAP_ATTR_WRITE;
local_irq_save(flags);
iommu_batch_start(dev, prot, entry);
for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
long err = iommu_batch_add(base_paddr);
if (unlikely(err < 0L))
goto iommu_map_fail;
}
if (unlikely(iommu_batch_end() < 0L))
goto iommu_map_fail;
local_irq_restore(flags);
return ret;
bad:
if (printk_ratelimit())
WARN_ON(1);
return DMA_ERROR_CODE;
iommu_map_fail:
/* Interrupts are disabled. */
spin_lock(&iommu->lock);
arena_free(&iommu->arena, entry, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
return DMA_ERROR_CODE;
}
static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr,
size_t sz, enum dma_data_direction direction)
{
struct pci_pbm_info *pbm;
struct iommu *iommu;
unsigned long flags, npages;
long entry;
u32 devhandle;
if (unlikely(direction == DMA_NONE)) {
if (printk_ratelimit())
WARN_ON(1);
return;
}
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
devhandle = pbm->devhandle;
npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT;
bus_addr &= IO_PAGE_MASK;
spin_lock_irqsave(&iommu->lock, flags);
entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
arena_free(&iommu->arena, entry, npages);
do {
unsigned long num;
num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
npages);
entry += num;
npages -= num;
} while (npages != 0);
spin_unlock_irqrestore(&iommu->lock, flags);
}
#define SG_ENT_PHYS_ADDRESS(SG) \
(__pa(page_address((SG)->page)) + (SG)->offset)
static inline long fill_sg(long entry, struct device *dev,
struct scatterlist *sg,
int nused, int nelems, unsigned long prot)
{
struct scatterlist *dma_sg = sg;
struct scatterlist *sg_end = sg + nelems;
unsigned long flags;
int i;
local_irq_save(flags);
iommu_batch_start(dev, prot, entry);
for (i = 0; i < nused; i++) {
unsigned long pteval = ~0UL;
u32 dma_npages;
dma_npages = ((dma_sg->dma_address & (IO_PAGE_SIZE - 1UL)) +
dma_sg->dma_length +
((IO_PAGE_SIZE - 1UL))) >> IO_PAGE_SHIFT;
do {
unsigned long offset;
signed int len;
/* If we are here, we know we have at least one
* more page to map. So walk forward until we
* hit a page crossing, and begin creating new
* mappings from that spot.
*/
for (;;) {
unsigned long tmp;
tmp = SG_ENT_PHYS_ADDRESS(sg);
len = sg->length;
if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) {
pteval = tmp & IO_PAGE_MASK;
offset = tmp & (IO_PAGE_SIZE - 1UL);
break;
}
if (((tmp ^ (tmp + len - 1UL)) >> IO_PAGE_SHIFT) != 0UL) {
pteval = (tmp + IO_PAGE_SIZE) & IO_PAGE_MASK;
offset = 0UL;
len -= (IO_PAGE_SIZE - (tmp & (IO_PAGE_SIZE - 1UL)));
break;
}
sg++;
}
pteval = (pteval & IOPTE_PAGE);
while (len > 0) {
long err;
err = iommu_batch_add(pteval);
if (unlikely(err < 0L))
goto iommu_map_failed;
pteval += IO_PAGE_SIZE;
len -= (IO_PAGE_SIZE - offset);
offset = 0;
dma_npages--;
}
pteval = (pteval & IOPTE_PAGE) + len;
sg++;
/* Skip over any tail mappings we've fully mapped,
* adjusting pteval along the way. Stop when we
* detect a page crossing event.
*/
while (sg < sg_end &&
(pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
(pteval == SG_ENT_PHYS_ADDRESS(sg)) &&
((pteval ^
(SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) {
pteval += sg->length;
sg++;
}
if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL)
pteval = ~0UL;
} while (dma_npages != 0);
dma_sg++;
}
if (unlikely(iommu_batch_end() < 0L))
goto iommu_map_failed;
local_irq_restore(flags);
return 0;
iommu_map_failed:
local_irq_restore(flags);
return -1L;
}
static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction)
{
struct iommu *iommu;
unsigned long flags, npages, prot;
u32 dma_base;
struct scatterlist *sgtmp;
long entry, err;
int used;
/* Fast path single entry scatterlists. */
if (nelems == 1) {
sglist->dma_address =
dma_4v_map_single(dev,
(page_address(sglist->page) +
sglist->offset),
sglist->length, direction);
if (unlikely(sglist->dma_address == DMA_ERROR_CODE))
return 0;
sglist->dma_length = sglist->length;
return 1;
}
iommu = dev->archdata.iommu;
if (unlikely(direction == DMA_NONE))
goto bad;
/* Step 1: Prepare scatter list. */
npages = prepare_sg(sglist, nelems);
/* Step 2: Allocate a cluster and context, if necessary. */
spin_lock_irqsave(&iommu->lock, flags);
entry = arena_alloc(&iommu->arena, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
if (unlikely(entry < 0L))
goto bad;
dma_base = iommu->page_table_map_base +
(entry << IO_PAGE_SHIFT);
/* Step 3: Normalize DMA addresses. */
used = nelems;
sgtmp = sglist;
while (used && sgtmp->dma_length) {
sgtmp->dma_address += dma_base;
sgtmp++;
used--;
}
used = nelems - used;
/* Step 4: Create the mappings. */
prot = HV_PCI_MAP_ATTR_READ;
if (direction != DMA_TO_DEVICE)
prot |= HV_PCI_MAP_ATTR_WRITE;
err = fill_sg(entry, dev, sglist, used, nelems, prot);
if (unlikely(err < 0L))
goto iommu_map_failed;
return used;
bad:
if (printk_ratelimit())
WARN_ON(1);
return 0;
iommu_map_failed:
spin_lock_irqsave(&iommu->lock, flags);
arena_free(&iommu->arena, entry, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
}
static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction)
{
struct pci_pbm_info *pbm;
struct iommu *iommu;
unsigned long flags, i, npages;
long entry;
u32 devhandle, bus_addr;
if (unlikely(direction == DMA_NONE)) {
if (printk_ratelimit())
WARN_ON(1);
}
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
devhandle = pbm->devhandle;
bus_addr = sglist->dma_address & IO_PAGE_MASK;
for (i = 1; i < nelems; i++)
if (sglist[i].dma_length == 0)
break;
i--;
npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) -
bus_addr) >> IO_PAGE_SHIFT;
entry = ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
spin_lock_irqsave(&iommu->lock, flags);
arena_free(&iommu->arena, entry, npages);
do {
unsigned long num;
num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
npages);
entry += num;
npages -= num;
} while (npages != 0);
spin_unlock_irqrestore(&iommu->lock, flags);
}
static void dma_4v_sync_single_for_cpu(struct device *dev,
dma_addr_t bus_addr, size_t sz,
enum dma_data_direction direction)
{
/* Nothing to do... */
}
static void dma_4v_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sglist, int nelems,
enum dma_data_direction direction)
{
/* Nothing to do... */
}
const struct dma_ops sun4v_dma_ops = {
.alloc_coherent = dma_4v_alloc_coherent,
.free_coherent = dma_4v_free_coherent,
.map_single = dma_4v_map_single,
.unmap_single = dma_4v_unmap_single,
.map_sg = dma_4v_map_sg,
.unmap_sg = dma_4v_unmap_sg,
.sync_single_for_cpu = dma_4v_sync_single_for_cpu,
.sync_sg_for_cpu = dma_4v_sync_sg_for_cpu,
};
static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm)
{
struct property *prop;
struct device_node *dp;
dp = pbm->prom_node;
prop = of_find_property(dp, "66mhz-capable", NULL);
pbm->is_66mhz_capable = (prop != NULL);
pbm->pci_bus = pci_scan_one_pbm(pbm);
/* XXX register error interrupt handlers XXX */
}
static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
struct iommu *iommu)
{
struct iommu_arena *arena = &iommu->arena;
unsigned long i, cnt = 0;
u32 devhandle;
devhandle = pbm->devhandle;
for (i = 0; i < arena->limit; i++) {
unsigned long ret, io_attrs, ra;
ret = pci_sun4v_iommu_getmap(devhandle,
HV_PCI_TSBID(0, i),
&io_attrs, &ra);
if (ret == HV_EOK) {
if (page_in_phys_avail(ra)) {
pci_sun4v_iommu_demap(devhandle,
HV_PCI_TSBID(0, i), 1);
} else {
cnt++;
__set_bit(i, arena->map);
}
}
}
return cnt;
}
static void pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
{
struct iommu *iommu = pbm->iommu;
struct property *prop;
unsigned long num_tsb_entries, sz, tsbsize;
u32 vdma[2], dma_mask, dma_offset;
prop = of_find_property(pbm->prom_node, "virtual-dma", NULL);
if (prop) {
u32 *val = prop->value;
vdma[0] = val[0];
vdma[1] = val[1];
} else {
/* No property, use default values. */
vdma[0] = 0x80000000;
vdma[1] = 0x80000000;
}
if ((vdma[0] | vdma[1]) & ~IO_PAGE_MASK) {
prom_printf("PCI-SUN4V: strange virtual-dma[%08x:%08x].\n",
vdma[0], vdma[1]);
prom_halt();
};
dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
tsbsize = num_tsb_entries * sizeof(iopte_t);
dma_offset = vdma[0];
/* Setup initial software IOMMU state. */
spin_lock_init(&iommu->lock);
iommu->ctx_lowest_free = 1;
iommu->page_table_map_base = dma_offset;
iommu->dma_addr_mask = dma_mask;
/* Allocate and initialize the free area map. */
sz = (num_tsb_entries + 7) / 8;
sz = (sz + 7UL) & ~7UL;
iommu->arena.map = kzalloc(sz, GFP_KERNEL);
if (!iommu->arena.map) {
prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
prom_halt();
}
iommu->arena.limit = num_tsb_entries;
sz = probe_existing_entries(pbm, iommu);
if (sz)
printk("%s: Imported %lu TSB entries from OBP\n",
pbm->name, sz);
}
#ifdef CONFIG_PCI_MSI
struct pci_sun4v_msiq_entry {
u64 version_type;
#define MSIQ_VERSION_MASK 0xffffffff00000000UL
#define MSIQ_VERSION_SHIFT 32
#define MSIQ_TYPE_MASK 0x00000000000000ffUL
#define MSIQ_TYPE_SHIFT 0
#define MSIQ_TYPE_NONE 0x00
#define MSIQ_TYPE_MSG 0x01
#define MSIQ_TYPE_MSI32 0x02
#define MSIQ_TYPE_MSI64 0x03
#define MSIQ_TYPE_INTX 0x08
#define MSIQ_TYPE_NONE2 0xff
u64 intx_sysino;
u64 reserved1;
u64 stick;
u64 req_id; /* bus/device/func */
#define MSIQ_REQID_BUS_MASK 0xff00UL
#define MSIQ_REQID_BUS_SHIFT 8
#define MSIQ_REQID_DEVICE_MASK 0x00f8UL
#define MSIQ_REQID_DEVICE_SHIFT 3
#define MSIQ_REQID_FUNC_MASK 0x0007UL
#define MSIQ_REQID_FUNC_SHIFT 0
u64 msi_address;
/* The format of this value is message type dependent.
* For MSI bits 15:0 are the data from the MSI packet.
* For MSI-X bits 31:0 are the data from the MSI packet.
* For MSG, the message code and message routing code where:
* bits 39:32 is the bus/device/fn of the msg target-id
* bits 18:16 is the message routing code
* bits 7:0 is the message code
* For INTx the low order 2-bits are:
* 00 - INTA
* 01 - INTB
* 10 - INTC
* 11 - INTD
*/
u64 msi_data;
u64 reserved2;
};
/* For now this just runs as a pre-handler for the real interrupt handler.
* So we just walk through the queue and ACK all the entries, update the
* head pointer, and return.
*
* In the longer term it would be nice to do something more integrated
* wherein we can pass in some of this MSI info to the drivers. This
* would be most useful for PCIe fabric error messages, although we could
* invoke those directly from the loop here in order to pass the info around.
*/
static void pci_sun4v_msi_prehandler(unsigned int ino, void *data1, void *data2)
{
struct pci_pbm_info *pbm = data1;
struct pci_sun4v_msiq_entry *base, *ep;
unsigned long msiqid, orig_head, head, type, err;
msiqid = (unsigned long) data2;
head = 0xdeadbeef;
err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, &head);
if (unlikely(err))
goto hv_error_get;
if (unlikely(head >= (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry))))
goto bad_offset;
head /= sizeof(struct pci_sun4v_msiq_entry);
orig_head = head;
base = (pbm->msi_queues + ((msiqid - pbm->msiq_first) *
(pbm->msiq_ent_count *
sizeof(struct pci_sun4v_msiq_entry))));
ep = &base[head];
while ((ep->version_type & MSIQ_TYPE_MASK) != 0) {
type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT;
if (unlikely(type != MSIQ_TYPE_MSI32 &&
type != MSIQ_TYPE_MSI64))
goto bad_type;
pci_sun4v_msi_setstate(pbm->devhandle,
ep->msi_data /* msi_num */,
HV_MSISTATE_IDLE);
/* Clear the entry. */
ep->version_type &= ~MSIQ_TYPE_MASK;
/* Go to next entry in ring. */
head++;
if (head >= pbm->msiq_ent_count)
head = 0;
ep = &base[head];
}
if (likely(head != orig_head)) {
/* ACK entries by updating head pointer. */
head *= sizeof(struct pci_sun4v_msiq_entry);
err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head);
if (unlikely(err))
goto hv_error_set;
}
return;
hv_error_set:
printk(KERN_EMERG "MSI: Hypervisor set head gives error %lu\n", err);
goto hv_error_cont;
hv_error_get:
printk(KERN_EMERG "MSI: Hypervisor get head gives error %lu\n", err);
hv_error_cont:
printk(KERN_EMERG "MSI: devhandle[%x] msiqid[%lx] head[%lu]\n",
pbm->devhandle, msiqid, head);
return;
bad_offset:
printk(KERN_EMERG "MSI: Hypervisor gives bad offset %lx max(%lx)\n",
head, pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry));
return;
bad_type:
printk(KERN_EMERG "MSI: Entry has bad type %lx\n", type);
return;
}
static int msi_bitmap_alloc(struct pci_pbm_info *pbm)
{
unsigned long size, bits_per_ulong;
bits_per_ulong = sizeof(unsigned long) * 8;
size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1);
size /= 8;
BUG_ON(size % sizeof(unsigned long));
pbm->msi_bitmap = kzalloc(size, GFP_KERNEL);
if (!pbm->msi_bitmap)
return -ENOMEM;
return 0;
}
static void msi_bitmap_free(struct pci_pbm_info *pbm)
{
kfree(pbm->msi_bitmap);
pbm->msi_bitmap = NULL;
}
static int msi_queue_alloc(struct pci_pbm_info *pbm)
{
unsigned long q_size, alloc_size, pages, order;
int i;
q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
alloc_size = (pbm->msiq_num * q_size);
order = get_order(alloc_size);
pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
if (pages == 0UL) {
printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
order);
return -ENOMEM;
}
memset((char *)pages, 0, PAGE_SIZE << order);
pbm->msi_queues = (void *) pages;
for (i = 0; i < pbm->msiq_num; i++) {
unsigned long err, base = __pa(pages + (i * q_size));
unsigned long ret1, ret2;
err = pci_sun4v_msiq_conf(pbm->devhandle,
pbm->msiq_first + i,
base, pbm->msiq_ent_count);
if (err) {
printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n",
err);
goto h_error;
}
err = pci_sun4v_msiq_info(pbm->devhandle,
pbm->msiq_first + i,
&ret1, &ret2);
if (err) {
printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n",
err);
goto h_error;
}
if (ret1 != base || ret2 != pbm->msiq_ent_count) {
printk(KERN_ERR "MSI: Bogus qconf "
"expected[%lx:%x] got[%lx:%lx]\n",
base, pbm->msiq_ent_count,
ret1, ret2);
goto h_error;
}
}
return 0;
h_error:
free_pages(pages, order);
return -EINVAL;
}
static int alloc_msi(struct pci_pbm_info *pbm)
{
int i;
for (i = 0; i < pbm->msi_num; i++) {
if (!test_and_set_bit(i, pbm->msi_bitmap))
return i + pbm->msi_first;
}
return -ENOENT;
}
static void free_msi(struct pci_pbm_info *pbm, int msi_num)
{
msi_num -= pbm->msi_first;
clear_bit(msi_num, pbm->msi_bitmap);
}
static int pci_sun4v_setup_msi_irq(unsigned int *virt_irq_p,
struct pci_dev *pdev,
struct msi_desc *entry)
{
struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
unsigned long devino, msiqid;
struct msi_msg msg;
int msi_num, err;
*virt_irq_p = 0;
msi_num = alloc_msi(pbm);
if (msi_num < 0)
return msi_num;
devino = sun4v_build_msi(pbm->devhandle, virt_irq_p,
pbm->msiq_first_devino,
(pbm->msiq_first_devino +
pbm->msiq_num));
err = -ENOMEM;
if (!devino)
goto out_err;
msiqid = ((devino - pbm->msiq_first_devino) +
pbm->msiq_first);
err = -EINVAL;
if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
if (err)
goto out_err;
if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
goto out_err;
if (pci_sun4v_msi_setmsiq(pbm->devhandle,
msi_num, msiqid,
(entry->msi_attrib.is_64 ?
HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32)))
goto out_err;
if (pci_sun4v_msi_setstate(pbm->devhandle, msi_num, HV_MSISTATE_IDLE))
goto out_err;
if (pci_sun4v_msi_setvalid(pbm->devhandle, msi_num, HV_MSIVALID_VALID))
goto out_err;
pdev->dev.archdata.msi_num = msi_num;
if (entry->msi_attrib.is_64) {
msg.address_hi = pbm->msi64_start >> 32;
msg.address_lo = pbm->msi64_start & 0xffffffff;
} else {
msg.address_hi = 0;
msg.address_lo = pbm->msi32_start;
}
msg.data = msi_num;
set_irq_msi(*virt_irq_p, entry);
write_msi_msg(*virt_irq_p, &msg);
irq_install_pre_handler(*virt_irq_p,
pci_sun4v_msi_prehandler,
pbm, (void *) msiqid);
return 0;
out_err:
free_msi(pbm, msi_num);
sun4v_destroy_msi(*virt_irq_p);
*virt_irq_p = 0;
return err;
}
static void pci_sun4v_teardown_msi_irq(unsigned int virt_irq,
struct pci_dev *pdev)
{
struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
unsigned long msiqid, err;
unsigned int msi_num;
msi_num = pdev->dev.archdata.msi_num;
err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi_num, &msiqid);
if (err) {
printk(KERN_ERR "%s: getmsiq gives error %lu\n",
pbm->name, err);
return;
}
pci_sun4v_msi_setvalid(pbm->devhandle, msi_num, HV_MSIVALID_INVALID);
pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_INVALID);
free_msi(pbm, msi_num);
/* The sun4v_destroy_msi() will liberate the devino and thus the MSIQ
* allocation.
*/
sun4v_destroy_msi(virt_irq);
}
static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
{
const u32 *val;
int len;
val = of_get_property(pbm->prom_node, "#msi-eqs", &len);
if (!val || len != 4)
goto no_msi;
pbm->msiq_num = *val;
if (pbm->msiq_num) {
const struct msiq_prop {
u32 first_msiq;
u32 num_msiq;
u32 first_devino;
} *mqp;
const struct msi_range_prop {
u32 first_msi;
u32 num_msi;
} *mrng;
const struct addr_range_prop {
u32 msi32_high;
u32 msi32_low;
u32 msi32_len;
u32 msi64_high;
u32 msi64_low;
u32 msi64_len;
} *arng;
val = of_get_property(pbm->prom_node, "msi-eq-size", &len);
if (!val || len != 4)
goto no_msi;
pbm->msiq_ent_count = *val;
mqp = of_get_property(pbm->prom_node,
"msi-eq-to-devino", &len);
if (!mqp || len != sizeof(struct msiq_prop))
goto no_msi;
pbm->msiq_first = mqp->first_msiq;
pbm->msiq_first_devino = mqp->first_devino;
val = of_get_property(pbm->prom_node, "#msi", &len);
if (!val || len != 4)
goto no_msi;
pbm->msi_num = *val;
mrng = of_get_property(pbm->prom_node, "msi-ranges", &len);
if (!mrng || len != sizeof(struct msi_range_prop))
goto no_msi;
pbm->msi_first = mrng->first_msi;
val = of_get_property(pbm->prom_node, "msi-data-mask", &len);
if (!val || len != 4)
goto no_msi;
pbm->msi_data_mask = *val;
val = of_get_property(pbm->prom_node, "msix-data-width", &len);
if (!val || len != 4)
goto no_msi;
pbm->msix_data_width = *val;
arng = of_get_property(pbm->prom_node, "msi-address-ranges",
&len);
if (!arng || len != sizeof(struct addr_range_prop))
goto no_msi;
pbm->msi32_start = ((u64)arng->msi32_high << 32) |
(u64) arng->msi32_low;
pbm->msi64_start = ((u64)arng->msi64_high << 32) |
(u64) arng->msi64_low;
pbm->msi32_len = arng->msi32_len;
pbm->msi64_len = arng->msi64_len;
if (msi_bitmap_alloc(pbm))
goto no_msi;
if (msi_queue_alloc(pbm)) {
msi_bitmap_free(pbm);
goto no_msi;
}
printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] "
"devino[0x%x]\n",
pbm->name,
pbm->msiq_first, pbm->msiq_num,
pbm->msiq_ent_count,
pbm->msiq_first_devino);
printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] "
"width[%u]\n",
pbm->name,
pbm->msi_first, pbm->msi_num, pbm->msi_data_mask,
pbm->msix_data_width);
printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] "
"addr64[0x%lx:0x%x]\n",
pbm->name,
pbm->msi32_start, pbm->msi32_len,
pbm->msi64_start, pbm->msi64_len);
printk(KERN_INFO "%s: MSI queues at RA [%p]\n",
pbm->name,
pbm->msi_queues);
}
pbm->setup_msi_irq = pci_sun4v_setup_msi_irq;
pbm->teardown_msi_irq = pci_sun4v_teardown_msi_irq;
return;
no_msi:
pbm->msiq_num = 0;
printk(KERN_INFO "%s: No MSI support.\n", pbm->name);
}
#else /* CONFIG_PCI_MSI */
static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
{
}
#endif /* !(CONFIG_PCI_MSI) */
static void __init pci_sun4v_pbm_init(struct pci_controller_info *p, struct device_node *dp, u32 devhandle)
{
struct pci_pbm_info *pbm;
if (devhandle & 0x40)
pbm = &p->pbm_B;
else
pbm = &p->pbm_A;
pbm->next = pci_pbm_root;
pci_pbm_root = pbm;
pbm->scan_bus = pci_sun4v_scan_bus;
pbm->pci_ops = &sun4v_pci_ops;
pbm->config_space_reg_bits = 12;
pbm->index = pci_num_pbms++;
pbm->parent = p;
pbm->prom_node = dp;
pbm->devhandle = devhandle;
pbm->name = dp->full_name;
printk("%s: SUN4V PCI Bus Module\n", pbm->name);
pci_determine_mem_io_space(pbm);
pci_get_pbm_props(pbm);
pci_sun4v_iommu_init(pbm);
pci_sun4v_msi_init(pbm);
}
void __init sun4v_pci_init(struct device_node *dp, char *model_name)
{
static int hvapi_negotiated = 0;
struct pci_controller_info *p;
struct pci_pbm_info *pbm;
struct iommu *iommu;
struct property *prop;
struct linux_prom64_registers *regs;
u32 devhandle;
int i;
if (!hvapi_negotiated++) {
int err = sun4v_hvapi_register(HV_GRP_PCI,
vpci_major,
&vpci_minor);
if (err) {
prom_printf("SUN4V_PCI: Could not register hvapi, "
"err=%d\n", err);
prom_halt();
}
printk("SUN4V_PCI: Registered hvapi major[%lu] minor[%lu]\n",
vpci_major, vpci_minor);
dma_ops = &sun4v_dma_ops;
}
prop = of_find_property(dp, "reg", NULL);
regs = prop->value;
devhandle = (regs->phys_addr >> 32UL) & 0x0fffffff;
for (pbm = pci_pbm_root; pbm; pbm = pbm->next) {
if (pbm->devhandle == (devhandle ^ 0x40)) {
pci_sun4v_pbm_init(pbm->parent, dp, devhandle);
return;
}
}
for_each_possible_cpu(i) {
unsigned long page = get_zeroed_page(GFP_ATOMIC);
if (!page)
goto fatal_memory_error;
per_cpu(iommu_batch, i).pglist = (u64 *) page;
}
p = kzalloc(sizeof(struct pci_controller_info), GFP_ATOMIC);
if (!p)
goto fatal_memory_error;
iommu = kzalloc(sizeof(struct iommu), GFP_ATOMIC);
if (!iommu)
goto fatal_memory_error;
p->pbm_A.iommu = iommu;
iommu = kzalloc(sizeof(struct iommu), GFP_ATOMIC);
if (!iommu)
goto fatal_memory_error;
p->pbm_B.iommu = iommu;
/* Like PSYCHO and SCHIZO we have a 2GB aligned area
* for memory space.
*/
pci_memspace_mask = 0x7fffffffUL;
pci_sun4v_pbm_init(p, dp, devhandle);
return;
fatal_memory_error:
prom_printf("SUN4V_PCI: Fatal memory allocation error.\n");
prom_halt();
}