From 65eef33550f68e9a7f7d2dc64da94fb6cb85be2c Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Tue, 14 Jan 2014 15:02:11 +0100
Subject: [PATCH 01/54] KVM: s390: Adding skey bit to mmu context

For lazy storage key handling, we need a mechanism to track if the
process ever issued a storage key operation.

This patch adds the basic infrastructure for making the storage
key handling optional, but still leaves it enabled for now by default.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/mmu.h         |  2 ++
 arch/s390/include/asm/mmu_context.h |  1 +
 arch/s390/include/asm/pgtable.h     | 41 +++++++++++++++++++----------
 3 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index f77695a82f64..a5e656260a70 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -16,6 +16,8 @@ typedef struct {
 	unsigned long vdso_base;
 	/* The mmu context has extended page tables. */
 	unsigned int has_pgste:1;
+	/* The mmu context uses storage keys. */
+	unsigned int use_skey:1;
 } mm_context_t;
 
 #define INIT_MM_CONTEXT(name)						      \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 71be346d0e3c..05925ead0748 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -23,6 +23,7 @@ static inline int init_new_context(struct task_struct *tsk,
 	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 #endif
 	mm->context.has_pgste = 0;
+	mm->context.use_skey = 1;
 	mm->context.asce_limit = STACK_TOP_MAX;
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
 	return 0;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 12f75313e086..e88e9f6b07cc 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -466,6 +466,16 @@ static inline int mm_has_pgste(struct mm_struct *mm)
 #endif
 	return 0;
 }
+
+static inline int mm_use_skey(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	if (mm->context.use_skey)
+		return 1;
+#endif
+	return 0;
+}
+
 /*
  * pgd/pmd/pte query functions
  */
@@ -699,12 +709,13 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste)
 #endif
 }
 
-static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
+static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
+				       struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
 	unsigned long address, bits, skey;
 
-	if (pte_val(*ptep) & _PAGE_INVALID)
+	if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
 		return pgste;
 	address = pte_val(*ptep) & PAGE_MASK;
 	skey = (unsigned long) page_get_storage_key(address);
@@ -729,10 +740,11 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
 
 }
 
-static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
+static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste,
+					 struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
-	if (pte_val(*ptep) & _PAGE_INVALID)
+	if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
 		return pgste;
 	/* Get referenced bit from storage key */
 	if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
@@ -741,13 +753,14 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
 	return pgste;
 }
 
-static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
+static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
+				 struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
 	unsigned long address;
 	unsigned long nkey;
 
-	if (pte_val(entry) & _PAGE_INVALID)
+	if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
 		return;
 	VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
 	address = pte_val(entry) & PAGE_MASK;
@@ -870,7 +883,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
-		pgste_set_key(ptep, pgste, entry);
+		pgste_set_key(ptep, pgste, entry, mm);
 		pgste_set_pte(ptep, entry);
 		pgste_set_unlock(ptep, pgste);
 	} else {
@@ -1028,7 +1041,7 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
 
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
-		pgste = pgste_update_all(ptep, pgste);
+		pgste = pgste_update_all(ptep, pgste, mm);
 		dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
 		pgste_val(pgste) &= ~PGSTE_HC_BIT;
 		pgste_set_unlock(ptep, pgste);
@@ -1048,7 +1061,7 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
 
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
-		pgste = pgste_update_young(ptep, pgste);
+		pgste = pgste_update_young(ptep, pgste, mm);
 		young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
 		pgste_val(pgste) &= ~PGSTE_HR_BIT;
 		pgste_set_unlock(ptep, pgste);
@@ -1182,7 +1195,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 	pte_val(*ptep) = _PAGE_INVALID;
 
 	if (mm_has_pgste(mm)) {
-		pgste = pgste_update_all(&pte, pgste);
+		pgste = pgste_update_all(&pte, pgste, mm);
 		pgste_set_unlock(ptep, pgste);
 	}
 	return pte;
@@ -1205,7 +1218,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
 	ptep_flush_lazy(mm, address, ptep);
 
 	if (mm_has_pgste(mm)) {
-		pgste = pgste_update_all(&pte, pgste);
+		pgste = pgste_update_all(&pte, pgste, mm);
 		pgste_set(ptep, pgste);
 	}
 	return pte;
@@ -1219,7 +1232,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get(ptep);
-		pgste_set_key(ptep, pgste, pte);
+		pgste_set_key(ptep, pgste, pte, mm);
 		pgste_set_pte(ptep, pte);
 		pgste_set_unlock(ptep, pgste);
 	} else
@@ -1246,7 +1259,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
 		if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
 		    _PGSTE_GPS_USAGE_UNUSED)
 			pte_val(pte) |= _PAGE_UNUSED;
-		pgste = pgste_update_all(&pte, pgste);
+		pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
 		pgste_set_unlock(ptep, pgste);
 	}
 	return pte;
@@ -1278,7 +1291,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 	pte_val(*ptep) = _PAGE_INVALID;
 
 	if (!full && mm_has_pgste(mm)) {
-		pgste = pgste_update_all(&pte, pgste);
+		pgste = pgste_update_all(&pte, pgste, mm);
 		pgste_set_unlock(ptep, pgste);
 	}
 	return pte;

From d4cb11340be6a1613d40d2b546cb111ea2547066 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Wed, 29 Jan 2014 16:02:32 +0100
Subject: [PATCH 02/54] KVM: s390: Clear storage keys

page_table_reset_pgste() already does a complete page table walk to
reset the pgste. Enhance it to initialize the storage keys to
PAGE_DEFAULT_KEY if requested by the caller. This will be used
for lazy storage key handling. Also provide an empty stub for
!CONFIG_PGSTE

Lets adopt the current code (diag 308) to not clear the keys.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/pgalloc.h |  3 ++-
 arch/s390/kvm/diag.c            |  6 ++---
 arch/s390/mm/pgtable.c          | 43 ++++++++++++++++++++++++---------
 3 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 884017cbfa9f..9e18a61d3df3 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -22,7 +22,8 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
 void page_table_free(struct mm_struct *, unsigned long *);
 void page_table_free_rcu(struct mmu_gather *, unsigned long *);
 
-void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long);
+void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
+			    bool init_skey);
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned long key, bool nq);
 
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 08dfc839a6cf..44dcfa8860b5 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -169,15 +169,15 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 	switch (subcode) {
 	case 0:
 	case 1:
-		page_table_reset_pgste(current->mm, 0, TASK_SIZE);
+		page_table_reset_pgste(current->mm, 0, TASK_SIZE, false);
 		return -EOPNOTSUPP;
 	case 3:
 		vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
-		page_table_reset_pgste(current->mm, 0, TASK_SIZE);
+		page_table_reset_pgste(current->mm, 0, TASK_SIZE, false);
 		break;
 	case 4:
 		vcpu->run->s390_reset_flags = 0;
-		page_table_reset_pgste(current->mm, 0, TASK_SIZE);
+		page_table_reset_pgste(current->mm, 0, TASK_SIZE, false);
 		break;
 	default:
 		return -EOPNOTSUPP;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index d7cfd57815fb..be80f55a1b78 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -883,8 +883,8 @@ static inline void page_table_free_pgste(unsigned long *table)
 	__free_page(page);
 }
 
-static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
-			pmd_t *pmd, unsigned long addr, unsigned long end)
+static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd,
+			unsigned long addr, unsigned long end, bool init_skey)
 {
 	pte_t *start_pte, *pte;
 	spinlock_t *ptl;
@@ -895,6 +895,22 @@ static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
 	do {
 		pgste = pgste_get_lock(pte);
 		pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
+		if (init_skey) {
+			unsigned long address;
+
+			pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
+					      PGSTE_GR_BIT | PGSTE_GC_BIT);
+
+			/* skip invalid and not writable pages */
+			if (pte_val(*pte) & _PAGE_INVALID ||
+			    !(pte_val(*pte) & _PAGE_WRITE)) {
+				pgste_set_unlock(pte, pgste);
+				continue;
+			}
+
+			address = pte_val(*pte) & PAGE_MASK;
+			page_set_storage_key(address, PAGE_DEFAULT_KEY, 1);
+		}
 		pgste_set_unlock(pte, pgste);
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	pte_unmap_unlock(start_pte, ptl);
@@ -902,8 +918,8 @@ static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
 	return addr;
 }
 
-static inline unsigned long page_table_reset_pmd(struct mm_struct *mm,
-			pud_t *pud, unsigned long addr, unsigned long end)
+static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud,
+			unsigned long addr, unsigned long end, bool init_skey)
 {
 	unsigned long next;
 	pmd_t *pmd;
@@ -913,14 +929,14 @@ static inline unsigned long page_table_reset_pmd(struct mm_struct *mm,
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		next = page_table_reset_pte(mm, pmd, addr, next);
+		next = page_table_reset_pte(mm, pmd, addr, next, init_skey);
 	} while (pmd++, addr = next, addr != end);
 
 	return addr;
 }
 
-static inline unsigned long page_table_reset_pud(struct mm_struct *mm,
-			pgd_t *pgd, unsigned long addr, unsigned long end)
+static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd,
+			unsigned long addr, unsigned long end, bool init_skey)
 {
 	unsigned long next;
 	pud_t *pud;
@@ -930,14 +946,14 @@ static inline unsigned long page_table_reset_pud(struct mm_struct *mm,
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		next = page_table_reset_pmd(mm, pud, addr, next);
+		next = page_table_reset_pmd(mm, pud, addr, next, init_skey);
 	} while (pud++, addr = next, addr != end);
 
 	return addr;
 }
 
-void page_table_reset_pgste(struct mm_struct *mm,
-			unsigned long start, unsigned long end)
+void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
+			    unsigned long end, bool init_skey)
 {
 	unsigned long addr, next;
 	pgd_t *pgd;
@@ -949,7 +965,7 @@ void page_table_reset_pgste(struct mm_struct *mm,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		next = page_table_reset_pud(mm, pgd, addr, next);
+		next = page_table_reset_pud(mm, pgd, addr, next, init_skey);
 	} while (pgd++, addr = next, addr != end);
 	up_read(&mm->mmap_sem);
 }
@@ -1011,6 +1027,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
 	return NULL;
 }
 
+void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
+			    unsigned long end, bool init_skey)
+{
+}
+
 static inline void page_table_free_pgste(unsigned long *table)
 {
 }

From 934bc131efc3e4be6a52f7dd6c4dbf99635e381a Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Tue, 14 Jan 2014 18:10:17 +0100
Subject: [PATCH 03/54] KVM: s390: Allow skeys to be enabled for the current
 process

Introduce a new function s390_enable_skey(), which enables storage key
handling via setting the use_skey flag in the mmu context.

This function is only useful within the context of kvm.

Note that enabling storage keys will cause a one-time hickup when
walking the page table; however, it saves us special effort for cases
like clear reset while making it possible for us to be architecture
conform.

s390_enable_skey() takes the page table lock to prevent reseting
storage keys triggered from multiple vcpus.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h |  1 +
 arch/s390/mm/pgtable.c          | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index e88e9f6b07cc..51b002b5667e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1747,6 +1747,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
+extern void s390_enable_skey(void);
 
 /*
  * No page table caches to initialise
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index be80f55a1b78..02a8607bbeb5 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1378,6 +1378,29 @@ int s390_enable_sie(void)
 }
 EXPORT_SYMBOL_GPL(s390_enable_sie);
 
+/*
+ * Enable storage key handling from now on and initialize the storage
+ * keys with the default key.
+ */
+void s390_enable_skey(void)
+{
+	/*
+	 * To avoid races between multiple vcpus, ending in calling
+	 * page_table_reset twice or more,
+	 * the page_table_lock is taken for serialization.
+	 */
+	spin_lock(&current->mm->page_table_lock);
+	if (mm_use_skey(current->mm)) {
+		spin_unlock(&current->mm->page_table_lock);
+		return;
+	}
+
+	current->mm->context.use_skey = 1;
+	spin_unlock(&current->mm->page_table_lock);
+	page_table_reset_pgste(current->mm, 0, TASK_SIZE, true);
+}
+EXPORT_SYMBOL_GPL(s390_enable_skey);
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
 			   pmd_t *pmdp)

From 693ffc0802db41911ada95a3e77546f0ed1e7d00 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Tue, 14 Jan 2014 18:11:14 +0100
Subject: [PATCH 04/54] KVM: s390: Don't enable skeys by default

The first invocation of storage key operations on a given cpu will be intercepted.

On these intercepts we will enable storage keys for the guest and remove the
previously added intercepts.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h    |  3 +++
 arch/s390/include/asm/mmu_context.h |  2 +-
 arch/s390/kvm/kvm-s390.c            |  1 +
 arch/s390/kvm/priv.c                | 14 ++++++++++++++
 arch/s390/kvm/trace.h               | 14 ++++++++++++++
 5 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 154b60089be9..a993b6f3429f 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -89,6 +89,9 @@ struct kvm_s390_sie_block {
 	__u16   lctl;			/* 0x0044 */
 	__s16	icpua;			/* 0x0046 */
 #define ICTL_LPSW 0x00400000
+#define ICTL_ISKE 0x00004000
+#define ICTL_SSKE 0x00002000
+#define ICTL_RRBE 0x00001000
 	__u32	ictl;			/* 0x0048 */
 	__u32	eca;			/* 0x004c */
 	__u8	icptcode;		/* 0x0050 */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 05925ead0748..d42fb1b728d8 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -23,7 +23,7 @@ static inline int init_new_context(struct task_struct *tsk,
 	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 #endif
 	mm->context.has_pgste = 0;
-	mm->context.use_skey = 1;
+	mm->context.use_skey = 0;
 	mm->context.asce_limit = STACK_TOP_MAX;
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
 	return 0;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b3ecb8f5b6ce..b767ec97368a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -465,6 +465,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->ecb2  = 8;
 	vcpu->arch.sie_block->eca   = 0xC1002001U;
 	vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
+	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 	if (kvm_enabled_cmma()) {
 		cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO);
 		if (cbrl) {
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 476e9e218f43..8a63e992936b 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -147,8 +147,21 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static void __skey_check_enable(struct kvm_vcpu *vcpu)
+{
+	if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
+		return;
+
+	s390_enable_skey();
+	trace_kvm_s390_skey_related_inst(vcpu);
+	vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+}
+
+
 static int handle_skey(struct kvm_vcpu *vcpu)
 {
+	__skey_check_enable(vcpu);
+
 	vcpu->stat.instruction_storage_key++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -618,6 +631,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 		}
 
 		if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
+			__skey_check_enable(vcpu);
 			if (set_guest_storage_key(current->mm, useraddr,
 					vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
 					vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index e8e7213d4cc5..a4bf7d78a0db 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -30,6 +30,20 @@
 	TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id,		\
 		  __entry->pswmask, __entry->pswaddr, p_args)
 
+TRACE_EVENT(kvm_s390_skey_related_inst,
+	    TP_PROTO(VCPU_PROTO_COMMON),
+	    TP_ARGS(VCPU_ARGS_COMMON),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    ),
+	    VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu")
+	);
+
 TRACE_EVENT(kvm_s390_major_guest_pfault,
 	    TP_PROTO(VCPU_PROTO_COMMON),
 	    TP_ARGS(VCPU_ARGS_COMMON),

From 0a61b222df75a6a69dc34816f7db2f61fee8c935 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 18 Oct 2013 12:03:41 +0200
Subject: [PATCH 05/54] KVM: s390/mm: use software dirty bit detection for user
 dirty tracking

Switch the user dirty bit detection used for migration from the hardware
provided host change-bit in the pgste to a fault based detection method.
This reduced the dependency of the host from the storage key to a point
where it becomes possible to enable the RCP bypass for KVM guests.

The fault based dirty detection will only indicate changes caused
by accesses via the guest address space. The hardware based method
can detect all changes, even those caused by I/O or accesses via the
kernel page table. The KVM/qemu code needs to take this into account.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 135 +++++++++++++-------------------
 arch/s390/mm/pgtable.c          |   6 +-
 2 files changed, 59 insertions(+), 82 deletions(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 51b002b5667e..b2c630df0ca5 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -309,7 +309,8 @@ extern unsigned long MODULES_END;
 #define PGSTE_HC_BIT	0x00200000UL
 #define PGSTE_GR_BIT	0x00040000UL
 #define PGSTE_GC_BIT	0x00020000UL
-#define PGSTE_IN_BIT	0x00008000UL	/* IPTE notify bit */
+#define PGSTE_UC_BIT	0x00008000UL	/* user dirty (migration) */
+#define PGSTE_IN_BIT	0x00004000UL	/* IPTE notify bit */
 
 #else /* CONFIG_64BIT */
 
@@ -391,7 +392,8 @@ extern unsigned long MODULES_END;
 #define PGSTE_HC_BIT	0x0020000000000000UL
 #define PGSTE_GR_BIT	0x0004000000000000UL
 #define PGSTE_GC_BIT	0x0002000000000000UL
-#define PGSTE_IN_BIT	0x0000800000000000UL	/* IPTE notify bit */
+#define PGSTE_UC_BIT	0x0000800000000000UL	/* user dirty (migration) */
+#define PGSTE_IN_BIT	0x0000400000000000UL	/* IPTE notify bit */
 
 #endif /* CONFIG_64BIT */
 
@@ -720,16 +722,6 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
 	address = pte_val(*ptep) & PAGE_MASK;
 	skey = (unsigned long) page_get_storage_key(address);
 	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
-	if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
-		/* Transfer dirty + referenced bit to host bits in pgste */
-		pgste_val(pgste) |= bits << 52;
-		page_set_storage_key(address, skey ^ bits, 0);
-	} else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
-		   (bits & _PAGE_REFERENCED)) {
-		/* Transfer referenced bit to host bit in pgste */
-		pgste_val(pgste) |= PGSTE_HR_BIT;
-		page_reset_referenced(address);
-	}
 	/* Transfer page changed & referenced bit to guest bits in pgste */
 	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
 	/* Copy page access key and fetch protection bit to pgste */
@@ -740,19 +732,6 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
 
 }
 
-static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste,
-					 struct mm_struct *mm)
-{
-#ifdef CONFIG_PGSTE
-	if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
-		return pgste;
-	/* Get referenced bit from storage key */
-	if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
-		pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
-#endif
-	return pgste;
-}
-
 static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
 				 struct mm_struct *mm)
 {
@@ -770,23 +749,30 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
 	 * key C/R to 0.
 	 */
 	nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+	nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
 	page_set_storage_key(address, nkey, 0);
 #endif
 }
 
-static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
+static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
 {
-	if (!MACHINE_HAS_ESOP &&
-	    (pte_val(entry) & _PAGE_PRESENT) &&
-	    (pte_val(entry) & _PAGE_WRITE)) {
-		/*
-		 * Without enhanced suppression-on-protection force
-		 * the dirty bit on for all writable ptes.
-		 */
-		pte_val(entry) |= _PAGE_DIRTY;
-		pte_val(entry) &= ~_PAGE_PROTECT;
+	if ((pte_val(entry) & _PAGE_PRESENT) &&
+	    (pte_val(entry) & _PAGE_WRITE) &&
+	    !(pte_val(entry) & _PAGE_INVALID)) {
+		if (!MACHINE_HAS_ESOP) {
+			/*
+			 * Without enhanced suppression-on-protection force
+			 * the dirty bit on for all writable ptes.
+			 */
+			pte_val(entry) |= _PAGE_DIRTY;
+			pte_val(entry) &= ~_PAGE_PROTECT;
+		}
+		if (!(pte_val(entry) & _PAGE_PROTECT))
+			/* This pte allows write access, set user-dirty */
+			pgste_val(pgste) |= PGSTE_UC_BIT;
 	}
 	*ptep = entry;
+	return pgste;
 }
 
 /**
@@ -884,7 +870,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 		pgste = pgste_get_lock(ptep);
 		pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
 		pgste_set_key(ptep, pgste, entry, mm);
-		pgste_set_pte(ptep, entry);
+		pgste = pgste_set_pte(ptep, pgste, entry);
 		pgste_set_unlock(ptep, pgste);
 	} else {
 		if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1)
@@ -1030,45 +1016,6 @@ static inline pte_t pte_mkhuge(pte_t pte)
 }
 #endif
 
-/*
- * Get (and clear) the user dirty bit for a pte.
- */
-static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
-						 pte_t *ptep)
-{
-	pgste_t pgste;
-	int dirty = 0;
-
-	if (mm_has_pgste(mm)) {
-		pgste = pgste_get_lock(ptep);
-		pgste = pgste_update_all(ptep, pgste, mm);
-		dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
-		pgste_val(pgste) &= ~PGSTE_HC_BIT;
-		pgste_set_unlock(ptep, pgste);
-		return dirty;
-	}
-	return dirty;
-}
-
-/*
- * Get (and clear) the user referenced bit for a pte.
- */
-static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
-						 pte_t *ptep)
-{
-	pgste_t pgste;
-	int young = 0;
-
-	if (mm_has_pgste(mm)) {
-		pgste = pgste_get_lock(ptep);
-		pgste = pgste_update_young(ptep, pgste, mm);
-		young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
-		pgste_val(pgste) &= ~PGSTE_HR_BIT;
-		pgste_set_unlock(ptep, pgste);
-	}
-	return young;
-}
-
 static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
 {
 	unsigned long pto = (unsigned long) ptep;
@@ -1131,6 +1078,36 @@ static inline void ptep_flush_lazy(struct mm_struct *mm,
 	atomic_sub(0x10000, &mm->context.attach_count);
 }
 
+/*
+ * Get (and clear) the user dirty bit for a pte.
+ */
+static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
+						 unsigned long addr,
+						 pte_t *ptep)
+{
+	pgste_t pgste;
+	pte_t pte;
+	int dirty;
+
+	if (!mm_has_pgste(mm))
+		return 0;
+	pgste = pgste_get_lock(ptep);
+	dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+	pgste_val(pgste) &= ~PGSTE_UC_BIT;
+	pte = *ptep;
+	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
+		pgste = pgste_ipte_notify(mm, ptep, pgste);
+		__ptep_ipte(addr, ptep);
+		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+			pte_val(pte) |= _PAGE_PROTECT;
+		else
+			pte_val(pte) |= _PAGE_INVALID;
+		*ptep = pte;
+	}
+	pgste_set_unlock(ptep, pgste);
+	return dirty;
+}
+
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 					    unsigned long addr, pte_t *ptep)
@@ -1150,7 +1127,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 	pte = pte_mkold(pte);
 
 	if (mm_has_pgste(vma->vm_mm)) {
-		pgste_set_pte(ptep, pte);
+		pgste = pgste_set_pte(ptep, pgste, pte);
 		pgste_set_unlock(ptep, pgste);
 	} else
 		*ptep = pte;
@@ -1233,7 +1210,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get(ptep);
 		pgste_set_key(ptep, pgste, pte, mm);
-		pgste_set_pte(ptep, pte);
+		pgste = pgste_set_pte(ptep, pgste, pte);
 		pgste_set_unlock(ptep, pgste);
 	} else
 		*ptep = pte;
@@ -1314,7 +1291,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
 		pte = pte_wrprotect(pte);
 
 		if (mm_has_pgste(mm)) {
-			pgste_set_pte(ptep, pte);
+			pgste = pgste_set_pte(ptep, pgste, pte);
 			pgste_set_unlock(ptep, pgste);
 		} else
 			*ptep = pte;
@@ -1339,7 +1316,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
 	ptep_flush_direct(vma->vm_mm, address, ptep);
 
 	if (mm_has_pgste(vma->vm_mm)) {
-		pgste_set_pte(ptep, entry);
+		pgste = pgste_set_pte(ptep, pgste, entry);
 		pgste_set_unlock(ptep, pgste);
 	} else
 		*ptep = entry;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 02a8607bbeb5..1ddf975352a0 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -832,6 +832,7 @@ void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte)
 	}
 	spin_unlock(&gmap_notifier_lock);
 }
+EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
 
 static inline int page_table_with_pgste(struct page *page)
 {
@@ -864,8 +865,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
 	atomic_set(&page->_mapcount, 0);
 	table = (unsigned long *) page_to_phys(page);
 	clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
-	clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
-		    PAGE_SIZE/2);
+	clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
 	return table;
 }
 
@@ -1005,7 +1005,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 	/* changing the guest storage key is considered a change of the page */
 	if ((pgste_val(new) ^ pgste_val(old)) &
 	    (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
-		pgste_val(new) |= PGSTE_HC_BIT;
+		pgste_val(new) |= PGSTE_UC_BIT;
 
 	pgste_set_unlock(ptep, new);
 	pte_unmap_unlock(*ptep, ptl);

From a0bf4f149bbfa2e31b5f4172c817afdb7b986733 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Mon, 24 Mar 2014 14:27:58 +0100
Subject: [PATCH 06/54] KVM: s390/mm: new gmap_test_and_clear_dirty function

For live migration kvm needs to test and clear the dirty bit of guest pages.

That for is ptep_test_and_clear_user_dirty, to be sure we are not racing with
other code, we protect the pte. This needs to be done within
the architecture memory management code.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h |  2 ++
 arch/s390/mm/pgtable.c          | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index b2c630df0ca5..fcba5e03839f 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -838,6 +838,8 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *);
 unsigned long gmap_fault(unsigned long address, struct gmap *);
 void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
 void __gmap_zap(unsigned long address, struct gmap *);
+bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
+
 
 void gmap_register_ipte_notifier(struct gmap_notifier *);
 void gmap_unregister_ipte_notifier(struct gmap_notifier *);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 1ddf975352a0..ea4a31b95990 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1401,6 +1401,27 @@ void s390_enable_skey(void)
 }
 EXPORT_SYMBOL_GPL(s390_enable_skey);
 
+/*
+ * Test and reset if a guest page is dirty
+ */
+bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+	bool dirty = false;
+
+	pte = get_locked_pte(gmap->mm, address, &ptl);
+	if (unlikely(!pte))
+		return false;
+
+	if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
+		dirty = true;
+
+	spin_unlock(ptl);
+	return dirty;
+}
+EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
 			   pmd_t *pmdp)

From 15f36ebd34b5b296c274ef67024f14c2d394a507 Mon Sep 17 00:00:00 2001
From: "Jason J. Herne" <jjherne@us.ibm.com>
Date: Thu, 2 Aug 2012 10:10:17 -0400
Subject: [PATCH 07/54] KVM: s390: Add proper dirty bitmap support to S390 kvm.

Replace the kvm_s390_sync_dirty_log() stub with code to construct the KVM
dirty_bitmap from S390 memory change bits.  Also add code to properly clear
the dirty_bitmap size when clearing the bitmap.

Signed-off-by: Jason J. Herne <jjherne@us.ibm.com>
CC: Dominik Dingel <dingel@linux.vnet.ibm.com>
[Dominik Dingel: use gmap_test_and_clear_dirty, locking fixes]
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 51 +++++++++++++++++++++++++++++++++++++++-
 virt/kvm/kvm_main.c      |  2 --
 2 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b767ec97368a..346a3478dd00 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -11,6 +11,7 @@
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
+ *               Jason J. Herne <jjherne@us.ibm.com>
  */
 
 #include <linux/compiler.h>
@@ -179,6 +180,25 @@ int kvm_dev_ioctl_check_extension(long ext)
 	return r;
 }
 
+static void kvm_s390_sync_dirty_log(struct kvm *kvm,
+					struct kvm_memory_slot *memslot)
+{
+	gfn_t cur_gfn, last_gfn;
+	unsigned long address;
+	struct gmap *gmap = kvm->arch.gmap;
+
+	down_read(&gmap->mm->mmap_sem);
+	/* Loop over all guest pages */
+	last_gfn = memslot->base_gfn + memslot->npages;
+	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
+		address = gfn_to_hva_memslot(memslot, cur_gfn);
+
+		if (gmap_test_and_clear_dirty(address, gmap))
+			mark_page_dirty(kvm, cur_gfn);
+	}
+	up_read(&gmap->mm->mmap_sem);
+}
+
 /* Section: vm related */
 /*
  * Get (and clear) the dirty memory log for a memory slot.
@@ -186,7 +206,36 @@ int kvm_dev_ioctl_check_extension(long ext)
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 			       struct kvm_dirty_log *log)
 {
-	return 0;
+	int r;
+	unsigned long n;
+	struct kvm_memory_slot *memslot;
+	int is_dirty = 0;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_USER_MEM_SLOTS)
+		goto out;
+
+	memslot = id_to_memslot(kvm->memslots, log->slot);
+	r = -ENOENT;
+	if (!memslot->dirty_bitmap)
+		goto out;
+
+	kvm_s390_sync_dirty_log(kvm, memslot);
+	r = kvm_get_dirty_log(kvm, log, &is_dirty);
+	if (r)
+		goto out;
+
+	/* Clear the dirty log */
+	if (is_dirty) {
+		n = kvm_dirty_bitmap_bytes(memslot);
+		memset(memslot->dirty_bitmap, 0, n);
+	}
+	r = 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
 }
 
 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 56baae8c2f56..7facdb1f1374 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -637,14 +637,12 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
  */
 static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
-#ifndef CONFIG_S390
 	unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
 
 	memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes);
 	if (!memslot->dirty_bitmap)
 		return -ENOMEM;
 
-#endif /* !CONFIG_S390 */
 	return 0;
 }
 

From f2061656209fb9a5d54bbb1999f0a633438504e7 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Wed, 9 Apr 2014 13:13:00 +0200
Subject: [PATCH 08/54] KVM: s390: Per-vm kvm device controls

We sometimes need to get/set attributes specific to a virtual machine
and so need something else than ONE_REG.

Let's copy the KVM_DEVICE approach, and define the respective ioctls
for the vm file descriptor.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Acked-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt        |  8 ++--
 Documentation/virtual/kvm/devices/vm.txt | 10 +++++
 arch/s390/kvm/kvm-s390.c                 | 54 ++++++++++++++++++++++++
 include/uapi/linux/kvm.h                 |  1 +
 4 files changed, 69 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/virtual/kvm/devices/vm.txt

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a9380ba54c8e..2014ff12b492 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2314,8 +2314,8 @@ struct kvm_create_device {
 
 4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
 
-Capability: KVM_CAP_DEVICE_CTRL
-Type: device ioctl
+Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
+Type: device ioctl, vm ioctl
 Parameters: struct kvm_device_attr
 Returns: 0 on success, -1 on error
 Errors:
@@ -2340,8 +2340,8 @@ struct kvm_device_attr {
 
 4.81 KVM_HAS_DEVICE_ATTR
 
-Capability: KVM_CAP_DEVICE_CTRL
-Type: device ioctl
+Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
+Type: device ioctl, vm ioctl
 Parameters: struct kvm_device_attr
 Returns: 0 on success, -1 on error
 Errors:
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
new file mode 100644
index 000000000000..562bee6e600b
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -0,0 +1,10 @@
+Generic vm interface
+====================================
+
+The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
+KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
+struct kvm_device_attr as other devices, but targets VM-wide settings
+and controls.
+
+The groups and attributes per virtual machine, if any, are architecture
+specific.
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 346a3478dd00..c335a2efa5de 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -162,6 +162,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_ENABLE_CAP_VM:
+	case KVM_CAP_VM_ATTRIBUTES:
 		r = 1;
 		break;
 	case KVM_CAP_NR_VCPUS:
@@ -257,11 +258,43 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 	return r;
 }
 
+static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	switch (attr->group) {
+	default:
+		ret = -ENXIO;
+		break;
+	}
+
+	return ret;
+}
+
+static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	return -ENXIO;
+}
+
+static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	switch (attr->group) {
+	default:
+		ret = -ENXIO;
+		break;
+	}
+
+	return ret;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
 	struct kvm *kvm = filp->private_data;
 	void __user *argp = (void __user *)arg;
+	struct kvm_device_attr attr;
 	int r;
 
 	switch (ioctl) {
@@ -294,6 +327,27 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		}
 		break;
 	}
+	case KVM_SET_DEVICE_ATTR: {
+		r = -EFAULT;
+		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+			break;
+		r = kvm_s390_vm_set_attr(kvm, &attr);
+		break;
+	}
+	case KVM_GET_DEVICE_ATTR: {
+		r = -EFAULT;
+		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+			break;
+		r = kvm_s390_vm_get_attr(kvm, &attr);
+		break;
+	}
+	case KVM_HAS_DEVICE_ATTR: {
+		r = -EFAULT;
+		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+			break;
+		r = kvm_s390_vm_has_attr(kvm, &attr);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a8f4ee5d2e82..90acfe4966e7 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -743,6 +743,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_IOAPIC_POLARITY_IGNORED 97
 #define KVM_CAP_ENABLE_CAP_VM 98
 #define KVM_CAP_S390_IRQCHIP 99
+#define KVM_CAP_VM_ATTRIBUTES 100
 
 #ifdef KVM_CAP_IRQ_ROUTING
 

From b31605c12f4edda7a40ad9f86985739666298c31 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Tue, 25 Mar 2014 13:47:11 +0100
Subject: [PATCH 09/54] KVM: s390: make cmma usage conditionally

When userspace reset the guest without notifying kvm, the CMMA state
of the pages might be unused, resulting in guest data corruption.
To avoid this, CMMA must be enabled only if userspace understands
the implications.

CMMA must be enabled before vCPU creation. It can't be switched off
once enabled.  All subsequently created vCPUs will be enabled for
CMMA according to the CMMA state of the VM.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
[remove now unnecessary calls to page_table_reset_pgste]
---
 arch/s390/include/asm/kvm_host.h |  1 +
 arch/s390/kvm/diag.c             |  6 ----
 arch/s390/kvm/kvm-s390.c         | 59 ++++++++++++++++++++------------
 arch/s390/kvm/kvm-s390.h         |  7 ++--
 arch/s390/kvm/priv.c             |  2 +-
 5 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a993b6f3429f..b61ac418b510 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -288,6 +288,7 @@ struct kvm_arch{
 	struct gmap *gmap;
 	int css_support;
 	int use_irqchip;
+	int use_cmma;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 };
 
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 44dcfa8860b5..ff768f1dd337 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -167,17 +167,11 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 
 	VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
 	switch (subcode) {
-	case 0:
-	case 1:
-		page_table_reset_pgste(current->mm, 0, TASK_SIZE, false);
-		return -EOPNOTSUPP;
 	case 3:
 		vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
-		page_table_reset_pgste(current->mm, 0, TASK_SIZE, false);
 		break;
 	case 4:
 		vcpu->run->s390_reset_flags = 0;
-		page_table_reset_pgste(current->mm, 0, TASK_SIZE, false);
 		break;
 	default:
 		return -EOPNOTSUPP;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c335a2efa5de..fc2fe49488c0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -437,9 +437,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	if (kvm_is_ucontrol(vcpu->kvm))
 		gmap_free(vcpu->arch.gmap);
 
-	if (vcpu->arch.sie_block->cbrlo)
-		__free_page(__pfn_to_page(
-				vcpu->arch.sie_block->cbrlo >> PAGE_SHIFT));
+	if (kvm_s390_cmma_enabled(vcpu->kvm))
+		kvm_s390_vcpu_unsetup_cmma(vcpu);
 	free_page((unsigned long)(vcpu->arch.sie_block));
 
 	kvm_vcpu_uninit(vcpu);
@@ -553,9 +552,26 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
+{
+	free_page(vcpu->arch.sie_block->cbrlo);
+	vcpu->arch.sie_block->cbrlo = 0;
+}
+
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
+	if (!vcpu->arch.sie_block->cbrlo)
+		return -ENOMEM;
+
+	vcpu->arch.sie_block->ecb2 |= 0x80;
+	vcpu->arch.sie_block->ecb2 &= ~0x08;
+	return 0;
+}
+
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	struct page *cbrl;
+	int rc = 0;
 
 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
 						    CPUSTAT_SM |
@@ -569,13 +585,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->eca   = 0xC1002001U;
 	vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
-	if (kvm_enabled_cmma()) {
-		cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO);
-		if (cbrl) {
-			vcpu->arch.sie_block->ecb2 |= 0x80;
-			vcpu->arch.sie_block->ecb2 &= ~0x08;
-			vcpu->arch.sie_block->cbrlo = page_to_phys(cbrl);
-		}
+	if (kvm_s390_cmma_enabled(vcpu->kvm)) {
+		rc = kvm_s390_vcpu_setup_cmma(vcpu);
+		if (rc)
+			return rc;
 	}
 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
 	tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
@@ -583,7 +596,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
 	get_cpu_id(&vcpu->arch.cpu_id);
 	vcpu->arch.cpu_id.version = 0xff;
-	return 0;
+	return rc;
 }
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
@@ -890,6 +903,18 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 	return -EINVAL; /* not implemented yet */
 }
 
+bool kvm_s390_cmma_enabled(struct kvm *kvm)
+{
+	if (!MACHINE_IS_LPAR)
+		return false;
+	/* only enable for z10 and later */
+	if (!MACHINE_HAS_EDAT1)
+		return false;
+	if (!kvm->arch.use_cmma)
+		return false;
+	return true;
+}
+
 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 {
 	/*
@@ -1072,16 +1097,6 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 	return rc;
 }
 
-bool kvm_enabled_cmma(void)
-{
-	if (!MACHINE_IS_LPAR)
-		return false;
-	/* only enable for z10 and later */
-	if (!MACHINE_HAS_EDAT1)
-		return false;
-	return true;
-}
-
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
 	int rc, exit_reason;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 3c1e2274d9ea..460ccd828bbb 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -158,8 +158,11 @@ void s390_vcpu_block(struct kvm_vcpu *vcpu);
 void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
 void exit_sie(struct kvm_vcpu *vcpu);
 void exit_sie_sync(struct kvm_vcpu *vcpu);
-/* are we going to support cmma? */
-bool kvm_enabled_cmma(void);
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
+/* is cmma enabled */
+bool kvm_s390_cmma_enabled(struct kvm *kvm);
+
 /* implemented in diag.c */
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
 
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 8a63e992936b..9a04d74c5fb4 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -656,7 +656,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 	VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
 	gmap = vcpu->arch.gmap;
 	vcpu->stat.instruction_essa++;
-	if (!kvm_enabled_cmma() || !vcpu->arch.sie_block->cbrlo)
+	if (!kvm_s390_cmma_enabled(vcpu->kvm))
 		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)

From 4f718eab262868aeab83774ec4ce25e6a9906657 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Wed, 9 Apr 2014 13:13:00 +0200
Subject: [PATCH 10/54] KVM: s390: Exploiting generic userspace interface for
 cmma

To enable CMMA and to reset its state we use the vm kvm_device ioctls,
encapsulating attributes within the KVM_S390_VM_MEM_CTRL group.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/devices/vm.txt | 16 +++++++++
 arch/s390/include/uapi/asm/kvm.h         |  7 ++++
 arch/s390/kvm/kvm-s390.c                 | 43 ++++++++++++++++++++++++
 3 files changed, 66 insertions(+)

diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
index 562bee6e600b..0d16f96c0eac 100644
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -8,3 +8,19 @@ and controls.
 
 The groups and attributes per virtual machine, if any, are architecture
 specific.
+
+1. GROUP: KVM_S390_VM_MEM_CTRL
+Architectures: s390
+
+1.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL
+Parameters: none
+Returns: -EBUSY if already a vcpus is defined, otherwise 0
+
+Enables CMMA for the virtual machine
+
+1.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA
+Parameteres: none
+Returns: 0
+
+Clear the CMMA status for all guest pages, so any pages the guest marked
+as unused are again used any may not be reclaimed by the host.
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index c003c6a73b1e..e35c79821d29 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -54,6 +54,13 @@ struct kvm_s390_io_adapter_req {
 	__u64 addr;
 };
 
+/* kvm attr_group  on vm fd */
+#define KVM_S390_VM_MEM_CTRL		0
+
+/* kvm attributes for mem_ctrl */
+#define KVM_S390_VM_MEM_ENABLE_CMMA	0
+#define KVM_S390_VM_MEM_CLR_CMMA	1
+
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
 	/* general purpose regs for s390 */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fc2fe49488c0..fe2396cc0856 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -258,11 +258,43 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 	return r;
 }
 
+static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+	unsigned int idx;
+	switch (attr->attr) {
+	case KVM_S390_VM_MEM_ENABLE_CMMA:
+		ret = -EBUSY;
+		mutex_lock(&kvm->lock);
+		if (atomic_read(&kvm->online_vcpus) == 0) {
+			kvm->arch.use_cmma = 1;
+			ret = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		break;
+	case KVM_S390_VM_MEM_CLR_CMMA:
+		mutex_lock(&kvm->lock);
+		idx = srcu_read_lock(&kvm->srcu);
+		page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false);
+		srcu_read_unlock(&kvm->srcu, idx);
+		mutex_unlock(&kvm->lock);
+		ret = 0;
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+	return ret;
+}
+
 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 {
 	int ret;
 
 	switch (attr->group) {
+	case KVM_S390_VM_MEM_CTRL:
+		ret = kvm_s390_mem_control(kvm, attr);
+		break;
 	default:
 		ret = -ENXIO;
 		break;
@@ -281,6 +313,17 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 	int ret;
 
 	switch (attr->group) {
+	case KVM_S390_VM_MEM_CTRL:
+		switch (attr->attr) {
+		case KVM_S390_VM_MEM_ENABLE_CMMA:
+		case KVM_S390_VM_MEM_CLR_CMMA:
+			ret = 0;
+			break;
+		default:
+			ret = -ENXIO;
+			break;
+		}
+		break;
 	default:
 		ret = -ENXIO;
 		break;

From bcd846837c43ccccc932036d5a634bee48f1ff4f Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Tue, 11 Feb 2014 11:07:05 +0100
Subject: [PATCH 11/54] KVM: s390: allow injecting every kind of interrupt

Add a new data structure and function that allows to inject
all kinds of interrupt as defined in the PoP

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h | 59 ++++++++++++++++++++++++++++----
 arch/s390/kvm/interrupt.c        | 25 ++++++++++++++
 arch/s390/kvm/kvm-s390.h         |  3 ++
 3 files changed, 80 insertions(+), 7 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index b61ac418b510..90905ca93725 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -186,13 +186,58 @@ struct kvm_vcpu_stat {
 	u32 diagnose_9c;
 };
 
-#define PGM_OPERATION            0x01
-#define PGM_PRIVILEGED_OP	 0x02
-#define PGM_EXECUTE              0x03
-#define PGM_PROTECTION           0x04
-#define PGM_ADDRESSING           0x05
-#define PGM_SPECIFICATION        0x06
-#define PGM_DATA                 0x07
+#define PGM_OPERATION			0x01
+#define PGM_PRIVILEGED_OP		0x02
+#define PGM_EXECUTE			0x03
+#define PGM_PROTECTION			0x04
+#define PGM_ADDRESSING			0x05
+#define PGM_SPECIFICATION		0x06
+#define PGM_DATA			0x07
+#define PGM_FIXED_POINT_OVERFLOW	0x08
+#define PGM_FIXED_POINT_DIVIDE		0x09
+#define PGM_DECIMAL_OVERFLOW		0x0a
+#define PGM_DECIMAL_DIVIDE		0x0b
+#define PGM_HFP_EXPONENT_OVERFLOW	0x0c
+#define PGM_HFP_EXPONENT_UNDERFLOW	0x0d
+#define PGM_HFP_SIGNIFICANCE		0x0e
+#define PGM_HFP_DIVIDE			0x0f
+#define PGM_SEGMENT_TRANSLATION		0x10
+#define PGM_PAGE_TRANSLATION		0x11
+#define PGM_TRANSLATION_SPEC		0x12
+#define PGM_SPECIAL_OPERATION		0x13
+#define PGM_OPERAND			0x15
+#define PGM_TRACE_TABEL			0x16
+#define PGM_SPACE_SWITCH		0x1c
+#define PGM_HFP_SQUARE_ROOT		0x1d
+#define PGM_PC_TRANSLATION_SPEC		0x1f
+#define PGM_AFX_TRANSLATION		0x20
+#define PGM_ASX_TRANSLATION		0x21
+#define PGM_LX_TRANSLATION		0x22
+#define PGM_EX_TRANSLATION		0x23
+#define PGM_PRIMARY_AUTHORITY		0x24
+#define PGM_SECONDARY_AUTHORITY		0x25
+#define PGM_LFX_TRANSLATION		0x26
+#define PGM_LSX_TRANSLATION		0x27
+#define PGM_ALET_SPECIFICATION		0x28
+#define PGM_ALEN_TRANSLATION		0x29
+#define PGM_ALE_SEQUENCE		0x2a
+#define PGM_ASTE_VALIDITY		0x2b
+#define PGM_ASTE_SEQUENCE		0x2c
+#define PGM_EXTENDED_AUTHORITY		0x2d
+#define PGM_LSTE_SEQUENCE		0x2e
+#define PGM_ASTE_INSTANCE		0x2f
+#define PGM_STACK_FULL			0x30
+#define PGM_STACK_EMPTY			0x31
+#define PGM_STACK_SPECIFICATION		0x32
+#define PGM_STACK_TYPE			0x33
+#define PGM_STACK_OPERATION		0x34
+#define PGM_ASCE_TYPE			0x38
+#define PGM_REGION_FIRST_TRANS		0x39
+#define PGM_REGION_SECOND_TRANS		0x3a
+#define PGM_REGION_THIRD_TRANS		0x3b
+#define PGM_MONITOR			0x40
+#define PGM_PER				0x80
+#define PGM_CRYPTO_OPERATION		0x119
 
 struct kvm_s390_interrupt_info {
 	struct list_head list;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 200a8f9390b6..a44c68990cf4 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -660,6 +660,31 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 	return 0;
 }
 
+int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+			     struct kvm_s390_pgm_info *pgm_info)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_interrupt_info *inti;
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
+		   pgm_info->code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+				   pgm_info->code, 0, 1);
+
+	inti->type = KVM_S390_PROGRAM_INT;
+	memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
+	spin_lock_bh(&li->lock);
+	list_add(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	BUG_ON(waitqueue_active(li->wq));
+	spin_unlock_bh(&li->lock);
+	return 0;
+}
+
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid)
 {
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 460ccd828bbb..25a8b8478918 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -165,6 +165,9 @@ bool kvm_s390_cmma_enabled(struct kvm *kvm);
 
 /* implemented in diag.c */
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
+/* implemented in interrupt.c */
+int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+			     struct kvm_s390_pgm_info *pgm_info);
 
 /* implemented in interrupt.c */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);

From dfeec843fb237d73947e818f961e8d6f0df22b01 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:09:21 +0100
Subject: [PATCH 12/54] KVM: add kvm_is_error_gpa() helper

It's quite common (in the s390 guest access code) to test if a guest
physical address points to a valid guest memory area or not.
So add a simple helper function in common code, since this might be
of interest for other architectures as well.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 include/linux/kvm_host.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7d21cf9f4380..471d1400c4ac 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -879,6 +879,13 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn)
 	return (hpa_t)pfn << PAGE_SHIFT;
 }
 
+static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
+{
+	unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+	return kvm_is_error_hva(hva);
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);

From 280ef0f1f982e4bd9a22ee3627ca06dccde59a64 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 17 Dec 2013 09:08:28 +0100
Subject: [PATCH 13/54] KVM: s390: export test_vfacility()

Make test_vfacility() available for other files. This is needed for the
new guest access functions, which test if certain facilities are available
for a guest.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 2 +-
 arch/s390/kvm/kvm-s390.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fe2396cc0856..4b1df682e5c3 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -91,7 +91,7 @@ unsigned long *vfacilities;
 static struct gmap_notifier gmap_notifier;
 
 /* test availability of vfacility */
-static inline int test_vfacility(unsigned long nr)
+int test_vfacility(unsigned long nr)
 {
 	return __test_facility(nr, (void *) vfacilities);
 }
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 25a8b8478918..f3ab9fcad496 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -162,6 +162,7 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
 /* is cmma enabled */
 bool kvm_s390_cmma_enabled(struct kvm *kvm);
+int test_vfacility(unsigned long nr);
 
 /* implemented in diag.c */
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);

From 1365632bdeb42e5e252566bb08c0d7d68640edd3 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:08:06 +0100
Subject: [PATCH 14/54] s390/ptrace: add struct psw and accessor function

Introduce a 'struct psw' which makes it easier to decode and test if
certain bits in a psw are set or are not set.
In addition also add a 'psw_bits()' helper define which allows to
directly modify and test a psw_t structure. E.g.

psw_t psw;
psw_bits(psw).t = 1; /* set dat bit */

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/ptrace.h | 44 ++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index f4783c0b7b43..6e7708f3d866 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -16,6 +16,50 @@
 			 PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
 			 PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
 
+struct psw_bits {
+	unsigned long long	: 1;
+	unsigned long long r	: 1; /* PER-Mask */
+	unsigned long long	: 3;
+	unsigned long long t	: 1; /* DAT Mode */
+	unsigned long long i	: 1; /* Input/Output Mask */
+	unsigned long long e	: 1; /* External Mask */
+	unsigned long long key	: 4; /* PSW Key */
+	unsigned long long	: 1;
+	unsigned long long m	: 1; /* Machine-Check Mask */
+	unsigned long long w	: 1; /* Wait State */
+	unsigned long long p	: 1; /* Problem State */
+	unsigned long long as	: 2; /* Address Space Control */
+	unsigned long long cc	: 2; /* Condition Code */
+	unsigned long long pm	: 4; /* Program Mask */
+	unsigned long long ri	: 1; /* Runtime Instrumentation */
+	unsigned long long	: 6;
+	unsigned long long eaba : 2; /* Addressing Mode */
+#ifdef CONFIG_64BIT
+	unsigned long long	: 31;
+	unsigned long long ia	: 64;/* Instruction Address */
+#else
+	unsigned long long ia	: 31;/* Instruction Address */
+#endif
+};
+
+enum {
+	PSW_AMODE_24BIT = 0,
+	PSW_AMODE_31BIT = 1,
+	PSW_AMODE_64BIT = 3
+};
+
+enum {
+	PSW_AS_PRIMARY	 = 0,
+	PSW_AS_ACCREG	 = 1,
+	PSW_AS_SECONDARY = 2,
+	PSW_AS_HOME	 = 3
+};
+
+#define psw_bits(__psw) (*({			\
+	typecheck(psw_t, __psw);		\
+	&(*(struct psw_bits *)(&(__psw)));	\
+}))
+
 /*
  * The pt_regs struct defines the way the registers are stored on
  * the stack during a system call.

From 5f4e87a227757198f4cf0495f7fd6c47a28aacb6 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:08:37 +0100
Subject: [PATCH 15/54] s390/ctl_reg: add union type for control register 0

Add 'union ctlreg0_bits' to easily allow setting and testing bits of
control register 0 bits.
This patch only adds the bits needed for the new guest access functions.
Other bits and control registers can be added when needed.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/ctl_reg.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 4e63f1a13600..31ab9f346d7e 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -57,6 +57,20 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
 void smp_ctl_set_bit(int cr, int bit);
 void smp_ctl_clear_bit(int cr, int bit);
 
+union ctlreg0 {
+	unsigned long val;
+	struct {
+#ifdef CONFIG_64BIT
+		unsigned long	   : 32;
+#endif
+		unsigned long	   : 3;
+		unsigned long lap  : 1; /* Low-address-protection control */
+		unsigned long	   : 4;
+		unsigned long edat : 1; /* Enhanced-DAT-enablement control */
+		unsigned long	   : 23;
+	};
+};
+
 #ifdef CONFIG_SMP
 # define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
 # define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)

From 072c9878eedc2f1ef2f0d19ac7680beeee97bf05 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:21:47 +0100
Subject: [PATCH 16/54] KVM: s390: add kvm_s390_logical_to_effective() helper

Add kvm_s390_logical_to_effective() helper which converts a guest vcpu's
logical storage address to a guest vcpu effective address by applying the
rules of the vcpu's addressing mode defined by PSW bits 31 and 32
(extendended and basic addressing mode).
Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing
mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode)
will be zeroed and the remaining bits will be returned.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/gaccess.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 374a439ccc60..ae3cb638f220 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -30,6 +30,31 @@ static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
 	return gaddr;
 }
 
+/**
+ * kvm_s390_logical_to_effective - convert guest logical to effective address
+ * @vcpu: guest virtual cpu
+ * @ga: guest logical address
+ *
+ * Convert a guest vcpu logical address to a guest vcpu effective address by
+ * applying the rules of the vcpu's addressing mode defined by PSW bits 31
+ * and 32 (extendended/basic addressing mode).
+ *
+ * Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing
+ * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode)
+ * of @ga will be zeroed and the remaining bits will be returned.
+ */
+static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
+							  unsigned long ga)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+
+	if (psw_bits(*psw).eaba == PSW_AMODE_64BIT)
+		return ga;
+	if (psw_bits(*psw).eaba == PSW_AMODE_31BIT)
+		return ga & ((1UL << 31) - 1);
+	return ga & ((1UL << 24) - 1);
+}
+
 static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
 					  void __user *gptr,
 					  int prefixing)

From 1b0462e574f5238bb1ee811f014d629092c160cb Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:31:49 +0100
Subject: [PATCH 17/54] KVM: s390: add 'pgm' member to kvm_vcpu_arch and helper
 function

Add a 'struct kvm_s390_pgm_info pgm' member to kvm_vcpu_arch. This
structure will be used if during instruction emulation in the context
of a vcpu exception data needs to be stored somewhere.

Also add a helper function kvm_s390_inject_prog_cond() which can inject
vcpu's last exception if needed.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |  1 +
 arch/s390/kvm/kvm-s390.h         | 33 ++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 90905ca93725..c290d443d2c1 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -286,6 +286,7 @@ struct kvm_vcpu_arch {
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct tasklet_struct tasklet;
+	struct kvm_s390_pgm_info pgm;
 	union  {
 		struct cpuid	cpu_id;
 		u64		stidp_data;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index f3ab9fcad496..11ed0a596b5a 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -170,6 +170,39 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
 int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
 			     struct kvm_s390_pgm_info *pgm_info);
 
+/**
+ * kvm_s390_inject_prog_cond - conditionally inject a program check
+ * @vcpu: virtual cpu
+ * @rc: original return/error code
+ *
+ * This function is supposed to be used after regular guest access functions
+ * failed, to conditionally inject a program check to a vcpu. The typical
+ * pattern would look like
+ *
+ * rc = write_guest(vcpu, addr, data, len);
+ * if (rc)
+ *	return kvm_s390_inject_prog_cond(vcpu, rc);
+ *
+ * A negative return code from guest access functions implies an internal error
+ * like e.g. out of memory. In these cases no program check should be injected
+ * to the guest.
+ * A positive value implies that an exception happened while accessing a guest's
+ * memory. In this case all data belonging to the corresponding program check
+ * has been stored in vcpu->arch.pgm and can be injected with
+ * kvm_s390_inject_prog_irq().
+ *
+ * Returns: - the original @rc value if @rc was negative (internal error)
+ *	    - zero if @rc was already zero
+ *	    - zero or error code from injecting if @rc was positive
+ *	      (program check injected to @vcpu)
+ */
+static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
+{
+	if (rc <= 0)
+		return rc;
+	return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+}
+
 /* implemented in interrupt.c */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);

From d95fb12ff4d73e897126043bb5d03a068997a2ef Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:23:29 +0100
Subject: [PATCH 18/54] KVM: s390: add lowcore access functions

put_guest_lc, read_guest_lc and write_guest_lc are guest access
functions which shall only be used to access the lowcore of a vcpu.
These functions should be used for e.g. interrupt handlers where no
guest memory access protection facilities, like key or low address
protection, are applicable.

At a later point guest vcpu lowcore access should happen via pinned
prefix pages, so that these pages can be accessed directly via the
kernel mapping. All of these *_lc functions can be removed then.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/gaccess.h | 95 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 93 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index ae3cb638f220..917aeaa04fff 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -1,7 +1,7 @@
 /*
  * access guest memory
  *
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2014
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
@@ -15,7 +15,8 @@
 
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
 #include "kvm-s390.h"
 
 /* Convert real to absolute address by applying the prefix of the CPU */
@@ -136,4 +137,94 @@ static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to,
 #define copy_from_guest_absolute(vcpu, to, from, size) \
 	__copy_guest(vcpu, (unsigned long)to, from, size, 0, 0)
 
+/*
+ * put_guest_lc, read_guest_lc and write_guest_lc are guest access functions
+ * which shall only be used to access the lowcore of a vcpu.
+ * These functions should be used for e.g. interrupt handlers where no
+ * guest memory access protection facilities, like key or low address
+ * protection, are applicable.
+ * At a later point guest vcpu lowcore access should happen via pinned
+ * prefix pages, so that these pages can be accessed directly via the
+ * kernel mapping. All of these *_lc functions can be removed then.
+ */
+
+/**
+ * put_guest_lc - write a simple variable to a guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @x: value to copy to guest
+ * @gra: vcpu's destination guest real address
+ *
+ * Copies a simple value from kernel space to a guest vcpu's lowcore.
+ * The size of the variable may be 1, 2, 4 or 8 bytes. The destination
+ * must be located in the vcpu's lowcore. Otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *	 the guest memory mapping is broken. In any case the best solution
+ *	 would be to terminate the guest.
+ *	 It is wrong to inject a guest exception.
+ */
+#define put_guest_lc(vcpu, x, gra)				\
+({								\
+	struct kvm_vcpu *__vcpu = (vcpu);			\
+	__typeof__(*(gra)) __x = (x);				\
+	unsigned long __gpa;					\
+								\
+	__gpa = (unsigned long)(gra);				\
+	__gpa += __vcpu->arch.sie_block->prefix;		\
+	kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x));	\
+})
+
+/**
+ * write_guest_lc - copy data from kernel space to guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from kernel space to guest vcpu's lowcore. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *	 the guest memory mapping is broken. In any case the best solution
+ *	 would be to terminate the guest.
+ *	 It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int write_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		   unsigned long len)
+{
+	unsigned long gpa = gra + vcpu->arch.sie_block->prefix;
+
+	return kvm_write_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * read_guest_lc - copy data from guest vcpu's lowcore to kernel space
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from guest vcpu's lowcore to kernel space. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *	 the guest memory mapping is broken. In any case the best solution
+ *	 would be to terminate the guest.
+ *	 It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		  unsigned long len)
+{
+	unsigned long gpa = gra + vcpu->arch.sie_block->prefix;
+
+	return kvm_read_guest(vcpu->kvm, gpa, data, len);
+}
 #endif /* __KVM_S390_GACCESS_H */

From 2293897805c2fea69e45aca31b3589d4590af89d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:26:52 +0100
Subject: [PATCH 19/54] KVM: s390: add architecture compliant guest access
 functions

The new guest memory access function write_guest() and read_guest() can be
used to access guest memory in an architecture compliant way.
These functions will look at the vcpu's PSW and select the correct address
space for memory access and also perform correct address wrap around.
In case DAT is turned on, page tables will be walked otherwise access will
happen to real or absolute memory.

Any access exception will be recognized and exception data will be stored
in the vcpu's kvm_vcpu_arch.pgm member. Subsequently an exception can be
injected if necessary.

Missing are:
- key protection checks
- access register mode support
- program event recording support

This patch also adds write_guest_real(), read_guest_real(),
write_guest_absolute() and read_guest_absolute() guest functions which can
be used to access real and absolute storage. These functions currently do
not perform any access checks, since there is no use case (yet?).

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/Makefile  |   4 +-
 arch/s390/kvm/gaccess.c | 536 ++++++++++++++++++++++++++++++++++++++++
 arch/s390/kvm/gaccess.h | 170 +++++++++++++
 3 files changed, 709 insertions(+), 1 deletion(-)
 create mode 100644 arch/s390/kvm/gaccess.c

diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index d3adb37e93a4..83a7a355befe 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -11,5 +11,7 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqch
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
-kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o diag.o
+kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
+kvm-objs += diag.o gaccess.o
+
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
new file mode 100644
index 000000000000..916e1ee1f8c9
--- /dev/null
+++ b/arch/s390/kvm/gaccess.c
@@ -0,0 +1,536 @@
+/*
+ * guest access functions
+ *
+ * Copyright IBM Corp. 2014
+ *
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/err.h>
+#include <asm/pgtable.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+union asce {
+	unsigned long val;
+	struct {
+		unsigned long origin : 52; /* Region- or Segment-Table Origin */
+		unsigned long	 : 2;
+		unsigned long g  : 1; /* Subspace Group Control */
+		unsigned long p  : 1; /* Private Space Control */
+		unsigned long s  : 1; /* Storage-Alteration-Event Control */
+		unsigned long x  : 1; /* Space-Switch-Event Control */
+		unsigned long r  : 1; /* Real-Space Control */
+		unsigned long	 : 1;
+		unsigned long dt : 2; /* Designation-Type Control */
+		unsigned long tl : 2; /* Region- or Segment-Table Length */
+	};
+};
+
+enum {
+	ASCE_TYPE_SEGMENT = 0,
+	ASCE_TYPE_REGION3 = 1,
+	ASCE_TYPE_REGION2 = 2,
+	ASCE_TYPE_REGION1 = 3
+};
+
+union region1_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long rto: 52;/* Region-Table Origin */
+		unsigned long	 : 2;
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long	 : 1;
+		unsigned long tf : 2; /* Region-Second-Table Offset */
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long	 : 1;
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long tl : 2; /* Region-Second-Table Length */
+	};
+};
+
+union region2_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long rto: 52;/* Region-Table Origin */
+		unsigned long	 : 2;
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long	 : 1;
+		unsigned long tf : 2; /* Region-Third-Table Offset */
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long	 : 1;
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long tl : 2; /* Region-Third-Table Length */
+	};
+};
+
+struct region3_table_entry_fc0 {
+	unsigned long sto: 52;/* Segment-Table Origin */
+	unsigned long	 : 1;
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long	 : 1;
+	unsigned long tf : 2; /* Segment-Table Offset */
+	unsigned long i  : 1; /* Region-Invalid Bit */
+	unsigned long cr : 1; /* Common-Region Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long tl : 2; /* Segment-Table Length */
+};
+
+struct region3_table_entry_fc1 {
+	unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+	unsigned long	 : 14;
+	unsigned long av : 1; /* ACCF-Validity Control */
+	unsigned long acc: 4; /* Access-Control Bits */
+	unsigned long f  : 1; /* Fetch-Protection Bit */
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long co : 1; /* Change-Recording Override */
+	unsigned long	 : 2;
+	unsigned long i  : 1; /* Region-Invalid Bit */
+	unsigned long cr : 1; /* Common-Region Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long	 : 2;
+};
+
+union region3_table_entry {
+	unsigned long val;
+	struct region3_table_entry_fc0 fc0;
+	struct region3_table_entry_fc1 fc1;
+	struct {
+		unsigned long	 : 53;
+		unsigned long fc : 1; /* Format-Control */
+		unsigned long	 : 4;
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long cr : 1; /* Common-Region Bit */
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long	 : 2;
+	};
+};
+
+struct segment_entry_fc0 {
+	unsigned long pto: 53;/* Page-Table Origin */
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long	 : 3;
+	unsigned long i  : 1; /* Segment-Invalid Bit */
+	unsigned long cs : 1; /* Common-Segment Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long	 : 2;
+};
+
+struct segment_entry_fc1 {
+	unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+	unsigned long	 : 3;
+	unsigned long av : 1; /* ACCF-Validity Control */
+	unsigned long acc: 4; /* Access-Control Bits */
+	unsigned long f  : 1; /* Fetch-Protection Bit */
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long co : 1; /* Change-Recording Override */
+	unsigned long	 : 2;
+	unsigned long i  : 1; /* Segment-Invalid Bit */
+	unsigned long cs : 1; /* Common-Segment Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long	 : 2;
+};
+
+union segment_table_entry {
+	unsigned long val;
+	struct segment_entry_fc0 fc0;
+	struct segment_entry_fc1 fc1;
+	struct {
+		unsigned long	 : 53;
+		unsigned long fc : 1; /* Format-Control */
+		unsigned long	 : 4;
+		unsigned long i  : 1; /* Segment-Invalid Bit */
+		unsigned long cs : 1; /* Common-Segment Bit */
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long	 : 2;
+	};
+};
+
+enum {
+	TABLE_TYPE_SEGMENT = 0,
+	TABLE_TYPE_REGION3 = 1,
+	TABLE_TYPE_REGION2 = 2,
+	TABLE_TYPE_REGION1 = 3
+};
+
+union page_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long pfra : 52; /* Page-Frame Real Address */
+		unsigned long z  : 1; /* Zero Bit */
+		unsigned long i  : 1; /* Page-Invalid Bit */
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long co : 1; /* Change-Recording Override */
+		unsigned long	 : 8;
+	};
+};
+
+/*
+ * vaddress union in order to easily decode a virtual address into its
+ * region first index, region second index etc. parts.
+ */
+union vaddress {
+	unsigned long addr;
+	struct {
+		unsigned long rfx : 11;
+		unsigned long rsx : 11;
+		unsigned long rtx : 11;
+		unsigned long sx  : 11;
+		unsigned long px  : 8;
+		unsigned long bx  : 12;
+	};
+	struct {
+		unsigned long rfx01 : 2;
+		unsigned long	    : 9;
+		unsigned long rsx01 : 2;
+		unsigned long	    : 9;
+		unsigned long rtx01 : 2;
+		unsigned long	    : 9;
+		unsigned long sx01  : 2;
+		unsigned long	    : 29;
+	};
+};
+
+/*
+ * raddress union which will contain the result (real or absolute address)
+ * after a page table walk. The rfaa, sfaa and pfra members are used to
+ * simply assign them the value of a region, segment or page table entry.
+ */
+union raddress {
+	unsigned long addr;
+	unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+	unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+	unsigned long pfra : 52; /* Page-Frame Real Address */
+};
+
+static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
+{
+	switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
+	case PSW_AS_PRIMARY:
+		return vcpu->arch.sie_block->gcr[1];
+	case PSW_AS_SECONDARY:
+		return vcpu->arch.sie_block->gcr[7];
+	case PSW_AS_HOME:
+		return vcpu->arch.sie_block->gcr[13];
+	}
+	return 0;
+}
+
+static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
+{
+	return kvm_read_guest(kvm, gpa, val, sizeof(*val));
+}
+
+/**
+ * guest_translate - translate a guest virtual into a guest absolute address
+ * @vcpu: virtual cpu
+ * @gva: guest virtual address
+ * @gpa: points to where guest physical (absolute) address should be stored
+ * @write: indicates if access is a write access
+ *
+ * Translate a guest virtual address into a guest absolute address by means
+ * of dynamic address translation as specified by the architecuture.
+ * If the resulting absolute address is not available in the configuration
+ * an addressing exception is indicated and @gpa will not be changed.
+ *
+ * Returns: - zero on success; @gpa contains the resulting absolute address
+ *	    - a negative value if guest access failed due to e.g. broken
+ *	      guest mapping
+ *	    - a positve value if an access exception happened. In this case
+ *	      the returned value is the program interruption code as defined
+ *	      by the architecture
+ */
+static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
+				     unsigned long *gpa, int write)
+{
+	union vaddress vaddr = {.addr = gva};
+	union raddress raddr = {.addr = gva};
+	union page_table_entry pte;
+	int dat_protection = 0;
+	union ctlreg0 ctlreg0;
+	unsigned long ptr;
+	int edat1, edat2;
+	union asce asce;
+
+	ctlreg0.val = vcpu->arch.sie_block->gcr[0];
+	edat1 = ctlreg0.edat && test_vfacility(8);
+	edat2 = edat1 && test_vfacility(78);
+	asce.val = get_vcpu_asce(vcpu);
+	if (asce.r)
+		goto real_address;
+	ptr = asce.origin * 4096;
+	switch (asce.dt) {
+	case ASCE_TYPE_REGION1:
+		if (vaddr.rfx01 > asce.tl)
+			return PGM_REGION_FIRST_TRANS;
+		ptr += vaddr.rfx * 8;
+		break;
+	case ASCE_TYPE_REGION2:
+		if (vaddr.rfx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.rsx01 > asce.tl)
+			return PGM_REGION_SECOND_TRANS;
+		ptr += vaddr.rsx * 8;
+		break;
+	case ASCE_TYPE_REGION3:
+		if (vaddr.rfx || vaddr.rsx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.rtx01 > asce.tl)
+			return PGM_REGION_THIRD_TRANS;
+		ptr += vaddr.rtx * 8;
+		break;
+	case ASCE_TYPE_SEGMENT:
+		if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.sx01 > asce.tl)
+			return PGM_SEGMENT_TRANSLATION;
+		ptr += vaddr.sx * 8;
+		break;
+	}
+	switch (asce.dt) {
+	case ASCE_TYPE_REGION1:	{
+		union region1_table_entry rfte;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &rfte.val))
+			return -EFAULT;
+		if (rfte.i)
+			return PGM_REGION_FIRST_TRANS;
+		if (rfte.tt != TABLE_TYPE_REGION1)
+			return PGM_TRANSLATION_SPEC;
+		if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
+			return PGM_REGION_SECOND_TRANS;
+		if (edat1)
+			dat_protection |= rfte.p;
+		ptr = rfte.rto * 4096 + vaddr.rsx * 8;
+	}
+		/* fallthrough */
+	case ASCE_TYPE_REGION2: {
+		union region2_table_entry rste;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &rste.val))
+			return -EFAULT;
+		if (rste.i)
+			return PGM_REGION_SECOND_TRANS;
+		if (rste.tt != TABLE_TYPE_REGION2)
+			return PGM_TRANSLATION_SPEC;
+		if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
+			return PGM_REGION_THIRD_TRANS;
+		if (edat1)
+			dat_protection |= rste.p;
+		ptr = rste.rto * 4096 + vaddr.rtx * 8;
+	}
+		/* fallthrough */
+	case ASCE_TYPE_REGION3: {
+		union region3_table_entry rtte;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &rtte.val))
+			return -EFAULT;
+		if (rtte.i)
+			return PGM_REGION_THIRD_TRANS;
+		if (rtte.tt != TABLE_TYPE_REGION3)
+			return PGM_TRANSLATION_SPEC;
+		if (rtte.cr && asce.p && edat2)
+			return PGM_TRANSLATION_SPEC;
+		if (rtte.fc && edat2) {
+			dat_protection |= rtte.fc1.p;
+			raddr.rfaa = rtte.fc1.rfaa;
+			goto absolute_address;
+		}
+		if (vaddr.sx01 < rtte.fc0.tf)
+			return PGM_SEGMENT_TRANSLATION;
+		if (vaddr.sx01 > rtte.fc0.tl)
+			return PGM_SEGMENT_TRANSLATION;
+		if (edat1)
+			dat_protection |= rtte.fc0.p;
+		ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8;
+	}
+		/* fallthrough */
+	case ASCE_TYPE_SEGMENT: {
+		union segment_table_entry ste;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &ste.val))
+			return -EFAULT;
+		if (ste.i)
+			return PGM_SEGMENT_TRANSLATION;
+		if (ste.tt != TABLE_TYPE_SEGMENT)
+			return PGM_TRANSLATION_SPEC;
+		if (ste.cs && asce.p)
+			return PGM_TRANSLATION_SPEC;
+		if (ste.fc && edat1) {
+			dat_protection |= ste.fc1.p;
+			raddr.sfaa = ste.fc1.sfaa;
+			goto absolute_address;
+		}
+		dat_protection |= ste.fc0.p;
+		ptr = ste.fc0.pto * 2048 + vaddr.px * 8;
+	}
+	}
+	if (kvm_is_error_gpa(vcpu->kvm, ptr))
+		return PGM_ADDRESSING;
+	if (deref_table(vcpu->kvm, ptr, &pte.val))
+		return -EFAULT;
+	if (pte.i)
+		return PGM_PAGE_TRANSLATION;
+	if (pte.z)
+		return PGM_TRANSLATION_SPEC;
+	if (pte.co && !edat1)
+		return PGM_TRANSLATION_SPEC;
+	dat_protection |= pte.p;
+	raddr.pfra = pte.pfra;
+real_address:
+	raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
+absolute_address:
+	if (write && dat_protection)
+		return PGM_PROTECTION;
+	if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+		return PGM_ADDRESSING;
+	*gpa = raddr.addr;
+	return 0;
+}
+
+static inline int is_low_address(unsigned long ga)
+{
+	/* Check for address ranges 0..511 and 4096..4607 */
+	return (ga & ~0x11fful) == 0;
+}
+
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu)
+{
+	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	union asce asce;
+
+	if (!ctlreg0.lap)
+		return 0;
+	asce.val = get_vcpu_asce(vcpu);
+	if (psw_bits(*psw).t && asce.p)
+		return 0;
+	return 1;
+}
+
+struct trans_exc_code_bits {
+	unsigned long addr : 52; /* Translation-exception Address */
+	unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
+	unsigned long	   : 7;
+	unsigned long b61  : 1;
+	unsigned long as   : 2;  /* ASCE Identifier */
+};
+
+enum {
+	FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
+	FSI_STORE   = 1, /* Exception was due to store operation */
+	FSI_FETCH   = 2  /* Exception was due to fetch operation */
+};
+
+static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
+			    unsigned long *pages, unsigned long nr_pages,
+			    int write)
+{
+	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	struct trans_exc_code_bits *tec_bits;
+	int lap_enabled, rc;
+
+	memset(pgm, 0, sizeof(*pgm));
+	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+	tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
+	tec_bits->as = psw_bits(*psw).as;
+	lap_enabled = low_address_protection_enabled(vcpu);
+	while (nr_pages) {
+		ga = kvm_s390_logical_to_effective(vcpu, ga);
+		tec_bits->addr = ga >> PAGE_SHIFT;
+		if (write && lap_enabled && is_low_address(ga)) {
+			pgm->code = PGM_PROTECTION;
+			return pgm->code;
+		}
+		ga &= PAGE_MASK;
+		if (psw_bits(*psw).t) {
+			rc = guest_translate(vcpu, ga, pages, write);
+			if (rc < 0)
+				return rc;
+			if (rc == PGM_PROTECTION)
+				tec_bits->b61 = 1;
+			if (rc)
+				pgm->code = rc;
+		} else {
+			*pages = kvm_s390_real_to_abs(vcpu, ga);
+			if (kvm_is_error_gpa(vcpu->kvm, *pages))
+				pgm->code = PGM_ADDRESSING;
+		}
+		if (pgm->code)
+			return pgm->code;
+		ga += PAGE_SIZE;
+		pages++;
+		nr_pages--;
+	}
+	return 0;
+}
+
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+		 unsigned long len, int write)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	unsigned long _len, nr_pages, gpa, idx;
+	unsigned long pages_array[2];
+	unsigned long *pages;
+	int rc;
+
+	if (!len)
+		return 0;
+	/* Access register mode is not supported yet. */
+	if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
+		return -EOPNOTSUPP;
+	nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
+	pages = pages_array;
+	if (nr_pages > ARRAY_SIZE(pages_array))
+		pages = vmalloc(nr_pages * sizeof(unsigned long));
+	if (!pages)
+		return -ENOMEM;
+	rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
+	for (idx = 0; idx < nr_pages && !rc; idx++) {
+		gpa = *(pages + idx) + (ga & ~PAGE_MASK);
+		_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+		if (write)
+			rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
+		else
+			rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
+		len -= _len;
+		ga += _len;
+		data += _len;
+	}
+	if (nr_pages > ARRAY_SIZE(pages_array))
+		vfree(pages);
+	return rc;
+}
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+		      void *data, unsigned long len, int write)
+{
+	unsigned long _len, gpa;
+	int rc = 0;
+
+	while (len && !rc) {
+		gpa = kvm_s390_real_to_abs(vcpu, gra);
+		_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+		if (write)
+			rc = write_guest_abs(vcpu, gpa, data, _len);
+		else
+			rc = read_guest_abs(vcpu, gpa, data, _len);
+		len -= _len;
+		gra += _len;
+		data += _len;
+	}
+	return rc;
+}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 917aeaa04fff..21ee62cd948e 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -227,4 +227,174 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 
 	return kvm_read_guest(vcpu->kvm, gpa, data, len);
 }
+
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+		 unsigned long len, int write);
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+		      void *data, unsigned long len, int write);
+
+/**
+ * write_guest - copy data from kernel space to guest space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @ga (guest address).
+ * In order to copy data to guest space the PSW of the vcpu is inspected:
+ * If DAT is off data will be copied to guest real or absolute memory.
+ * If DAT is on data will be copied to the address space as specified by
+ * the address space bits of the PSW:
+ * Primary, secondory or home space (access register mode is currently not
+ * implemented).
+ * The addressing mode of the PSW is also inspected, so that address wrap
+ * around is taken into account for 24-, 31- and 64-bit addressing mode,
+ * if the to be copied data crosses page boundaries in guest address space.
+ * In addition also low address and DAT protection are inspected before
+ * copying any data (key protection is currently not implemented).
+ *
+ * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu.
+ * In case of an access exception (e.g. protection exception) pgm will contain
+ * all data necessary so that a subsequent call to 'kvm_s390_inject_prog_vcpu()'
+ * will inject a correct exception into the guest.
+ * If no access exception happened, the contents of pgm are undefined when
+ * this function returns.
+ *
+ * Returns:  - zero on success
+ *	     - a negative value if e.g. the guest mapping is broken or in
+ *	       case of out-of-memory. In this case the contents of pgm are
+ *	       undefined. Also parts of @data may have been copied to guest
+ *	       space.
+ *	     - a positive value if an access exception happened. In this case
+ *	       the returned value is the program interruption code and the
+ *	       contents of pgm may be used to inject an exception into the
+ *	       guest. No data has been copied to guest space.
+ *
+ * Note: in case an access exception is recognized no data has been copied to
+ *	 guest space (this is also true, if the to be copied data would cross
+ *	 one or more page boundaries in guest space).
+ *	 Therefore this function may be used for nullifying and suppressing
+ *	 instruction emulation.
+ *	 It may also be used for terminating instructions, if it is undefined
+ *	 if data has been changed in guest space in case of an exception.
+ */
+static inline __must_check
+int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+		unsigned long len)
+{
+	return access_guest(vcpu, ga, data, len, 1);
+}
+
+/**
+ * read_guest - copy data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @ga (guest address) to @data (kernel space).
+ *
+ * The behaviour of read_guest is identical to write_guest, except that
+ * data will be copied from guest space to kernel space.
+ */
+static inline __must_check
+int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+	       unsigned long len)
+{
+	return access_guest(vcpu, ga, data, len, 0);
+}
+
+/**
+ * write_guest_abs - copy data from kernel space to guest space absolute
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gpa (guest absolute address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+		    unsigned long len)
+{
+	return kvm_write_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * read_guest_abs - copy data from guest space absolute to kernel space
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gpa (guest absolute address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+		   unsigned long len)
+{
+	return kvm_read_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * write_guest_real - copy data from kernel space to guest space real
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gra (guest real address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		     unsigned long len)
+{
+	return access_guest_real(vcpu, gra, data, len, 1);
+}
+
+/**
+ * read_guest_real - copy data from guest space real to kernel space
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gra (guest real address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		    unsigned long len)
+{
+	return access_guest_real(vcpu, gra, data, len, 0);
+}
+
 #endif /* __KVM_S390_GACCESS_H */

From 217a440683b51463f53e397cfdda27d7e92bf275 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 30 Dec 2013 12:54:14 +0100
Subject: [PATCH 20/54] KVM: s390/sclp: correctly set eca siif bit

Check if siif is available before setting.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/sclp.h   |  7 ++++++-
 arch/s390/kvm/kvm-s390.c       |  4 +++-
 drivers/s390/char/sclp_early.c | 22 +++++++++++++++++++++-
 3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 2f5e9932b4de..943d43451116 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -28,7 +28,11 @@ struct sclp_ipl_info {
 
 struct sclp_cpu_entry {
 	u8 address;
-	u8 reserved0[13];
+	u8 reserved0[2];
+	u8 : 3;
+	u8 siif : 1;
+	u8 : 4;
+	u8 reserved2[10];
 	u8 type;
 	u8 reserved1;
 } __attribute__((packed));
@@ -61,5 +65,6 @@ int sclp_pci_deconfigure(u32 fid);
 int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
 unsigned long sclp_get_hsa_size(void);
 void sclp_early_detect(void);
+int sclp_has_siif(void);
 
 #endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4b1df682e5c3..927ba7361da9 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -625,7 +625,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->ecb |= 0x10;
 
 	vcpu->arch.sie_block->ecb2  = 8;
-	vcpu->arch.sie_block->eca   = 0xC1002001U;
+	vcpu->arch.sie_block->eca   = 0xC1002000U;
+	if (sclp_has_siif())
+		vcpu->arch.sie_block->eca |= 1;
 	vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 	if (kvm_s390_cmma_enabled(vcpu->kvm)) {
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index 14196ea0fdf3..b57fe0efb422 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -22,7 +22,8 @@ struct read_info_sccb {
 	u8	rnsize;			/* 10 */
 	u8	_reserved0[16 - 11];	/* 11-15 */
 	u16	ncpurl;			/* 16-17 */
-	u8	_reserved7[24 - 18];	/* 18-23 */
+	u16	cpuoff;			/* 18-19 */
+	u8	_reserved7[24 - 20];	/* 20-23 */
 	u8	loadparm[8];		/* 24-31 */
 	u8	_reserved1[48 - 32];	/* 32-47 */
 	u64	facilities;		/* 48-55 */
@@ -45,6 +46,7 @@ static unsigned int sclp_con_has_linemode __initdata;
 static unsigned long sclp_hsa_size;
 static unsigned int sclp_max_cpu;
 static struct sclp_ipl_info sclp_ipl_info;
+static unsigned char sclp_siif;
 
 u64 sclp_facilities;
 u8 sclp_fac84;
@@ -96,6 +98,9 @@ static int __init sclp_read_info_early(struct read_info_sccb *sccb)
 
 static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 {
+	struct sclp_cpu_entry *cpue;
+	u16 boot_cpu_address, cpu;
+
 	if (sclp_read_info_early(sccb))
 		return;
 
@@ -116,6 +121,15 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 		sclp_max_cpu = sccb->hcpua + 1;
 	}
 
+	boot_cpu_address = stap();
+	cpue = (void *)sccb + sccb->cpuoff;
+	for (cpu = 0; cpu < sccb->ncpurl; cpue++, cpu++) {
+		if (boot_cpu_address != cpue->address)
+			continue;
+		sclp_siif = cpue->siif;
+		break;
+	}
+
 	/* Save IPL information */
 	sclp_ipl_info.is_valid = 1;
 	if (sccb->flags & 0x2)
@@ -148,6 +162,12 @@ unsigned int sclp_get_max_cpu(void)
 	return sclp_max_cpu;
 }
 
+int sclp_has_siif(void)
+{
+	return sclp_siif;
+}
+EXPORT_SYMBOL(sclp_has_siif);
+
 /*
  * This function will be called after sclp_facilities_detect(), which gets
  * called from early.c code. The sclp_facilities_detect() function retrieves

From 8a242234b4bfed37f7fbd9b0b16f8088f31ca140 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 10 Jan 2014 14:33:28 +0100
Subject: [PATCH 21/54] KVM: s390: make use of ipte lock

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |  12 +++-
 arch/s390/kvm/gaccess.c          | 109 +++++++++++++++++++++++++++++++
 arch/s390/kvm/gaccess.h          |   2 +
 arch/s390/kvm/kvm-s390.c         |   2 +
 arch/s390/kvm/priv.c             |  18 +++++
 5 files changed, 142 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index c290d443d2c1..f1ed7bdba733 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -39,9 +39,17 @@ struct sca_entry {
 	__u64	reserved2[2];
 } __attribute__((packed));
 
+union ipte_control {
+	unsigned long val;
+	struct {
+		unsigned long k  : 1;
+		unsigned long kh : 31;
+		unsigned long kg : 32;
+	};
+};
 
 struct sca_block {
-	__u64	ipte_control;
+	union ipte_control ipte_control;
 	__u64	reserved[5];
 	__u64	mcn;
 	__u64	reserved2;
@@ -167,6 +175,7 @@ struct kvm_vcpu_stat {
 	u32 instruction_stpx;
 	u32 instruction_stap;
 	u32 instruction_storage_key;
+	u32 instruction_ipte_interlock;
 	u32 instruction_stsch;
 	u32 instruction_chsc;
 	u32 instruction_stsi;
@@ -336,6 +345,7 @@ struct kvm_arch{
 	int use_irqchip;
 	int use_cmma;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
+	wait_queue_head_t ipte_wq;
 };
 
 #define KVM_HVA_ERR_BAD		(-1UL)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 916e1ee1f8c9..691fdb776c90 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -207,6 +207,107 @@ union raddress {
 	unsigned long pfra : 52; /* Page-Frame Real Address */
 };
 
+static int ipte_lock_count;
+static DEFINE_MUTEX(ipte_mutex);
+
+int ipte_lock_held(struct kvm_vcpu *vcpu)
+{
+	union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control;
+
+	if (vcpu->arch.sie_block->eca & 1)
+		return ic->kh != 0;
+	return ipte_lock_count != 0;
+}
+
+static void ipte_lock_simple(struct kvm_vcpu *vcpu)
+{
+	union ipte_control old, new, *ic;
+
+	mutex_lock(&ipte_mutex);
+	ipte_lock_count++;
+	if (ipte_lock_count > 1)
+		goto out;
+	ic = &vcpu->kvm->arch.sca->ipte_control;
+	do {
+		old = ACCESS_ONCE(*ic);
+		while (old.k) {
+			cond_resched();
+			old = ACCESS_ONCE(*ic);
+		}
+		new = old;
+		new.k = 1;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+out:
+	mutex_unlock(&ipte_mutex);
+}
+
+static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
+{
+	union ipte_control old, new, *ic;
+
+	mutex_lock(&ipte_mutex);
+	ipte_lock_count--;
+	if (ipte_lock_count)
+		goto out;
+	ic = &vcpu->kvm->arch.sca->ipte_control;
+	do {
+		new = old = ACCESS_ONCE(*ic);
+		new.k = 0;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	if (!ipte_lock_count)
+		wake_up(&vcpu->kvm->arch.ipte_wq);
+out:
+	mutex_unlock(&ipte_mutex);
+}
+
+static void ipte_lock_siif(struct kvm_vcpu *vcpu)
+{
+	union ipte_control old, new, *ic;
+
+	ic = &vcpu->kvm->arch.sca->ipte_control;
+	do {
+		old = ACCESS_ONCE(*ic);
+		while (old.kg) {
+			cond_resched();
+			old = ACCESS_ONCE(*ic);
+		}
+		new = old;
+		new.k = 1;
+		new.kh++;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+}
+
+static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
+{
+	union ipte_control old, new, *ic;
+
+	ic = &vcpu->kvm->arch.sca->ipte_control;
+	do {
+		new = old = ACCESS_ONCE(*ic);
+		new.kh--;
+		if (!new.kh)
+			new.k = 0;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	if (!new.kh)
+		wake_up(&vcpu->kvm->arch.ipte_wq);
+}
+
+static void ipte_lock(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.sie_block->eca & 1)
+		ipte_lock_siif(vcpu);
+	else
+		ipte_lock_simple(vcpu);
+}
+
+static void ipte_unlock(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.sie_block->eca & 1)
+		ipte_unlock_siif(vcpu);
+	else
+		ipte_unlock_simple(vcpu);
+}
+
 static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
 {
 	switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
@@ -485,6 +586,8 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
 	unsigned long _len, nr_pages, gpa, idx;
 	unsigned long pages_array[2];
 	unsigned long *pages;
+	int need_ipte_lock;
+	union asce asce;
 	int rc;
 
 	if (!len)
@@ -498,6 +601,10 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
 		pages = vmalloc(nr_pages * sizeof(unsigned long));
 	if (!pages)
 		return -ENOMEM;
+	asce.val = get_vcpu_asce(vcpu);
+	need_ipte_lock = psw_bits(*psw).t && !asce.r;
+	if (need_ipte_lock)
+		ipte_lock(vcpu);
 	rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
 	for (idx = 0; idx < nr_pages && !rc; idx++) {
 		gpa = *(pages + idx) + (ga & ~PAGE_MASK);
@@ -510,6 +617,8 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
 		ga += _len;
 		data += _len;
 	}
+	if (need_ipte_lock)
+		ipte_unlock(vcpu);
 	if (nr_pages > ARRAY_SIZE(pages_array))
 		vfree(pages);
 	return rc;
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 21ee62cd948e..f46e764c5b43 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -397,4 +397,6 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 	return access_guest_real(vcpu, gra, data, len, 0);
 }
 
+int ipte_lock_held(struct kvm_vcpu *vcpu);
+
 #endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 927ba7361da9..e1dfe2461d4b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -67,6 +67,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
@@ -437,6 +438,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	spin_lock_init(&kvm->arch.float_int.lock);
 	INIT_LIST_HEAD(&kvm->arch.float_int.list);
+	init_waitqueue_head(&kvm->arch.ipte_wq);
 
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
 	VM_EVENT(kvm, 3, "%s", "vm created");
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 9a04d74c5fb4..4792f1df921a 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -173,6 +173,19 @@ static int handle_skey(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+
+	vcpu->stat.instruction_ipte_interlock++;
+	if (psw_bits(*psw).p)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+	wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
+	psw->addr = __rewind_psw(*psw, 4);
+	VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
+	return 0;
+}
+
 static int handle_test_block(struct kvm_vcpu *vcpu)
 {
 	unsigned long hva;
@@ -509,6 +522,7 @@ static const intercept_handler_t b2_handlers[256] = {
 	[0x10] = handle_set_prefix,
 	[0x11] = handle_store_prefix,
 	[0x12] = handle_store_cpu_address,
+	[0x21] = handle_ipte_interlock,
 	[0x29] = handle_skey,
 	[0x2a] = handle_skey,
 	[0x2b] = handle_skey,
@@ -526,6 +540,7 @@ static const intercept_handler_t b2_handlers[256] = {
 	[0x3a] = handle_io_inst,
 	[0x3b] = handle_io_inst,
 	[0x3c] = handle_io_inst,
+	[0x50] = handle_ipte_interlock,
 	[0x5f] = handle_io_inst,
 	[0x74] = handle_io_inst,
 	[0x76] = handle_io_inst,
@@ -686,7 +701,10 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 }
 
 static const intercept_handler_t b9_handlers[256] = {
+	[0x8a] = handle_ipte_interlock,
 	[0x8d] = handle_epsw,
+	[0x8e] = handle_ipte_interlock,
+	[0x8f] = handle_ipte_interlock,
 	[0xab] = handle_essa,
 	[0xaf] = handle_pfmf,
 };

From 7988276df75e8314e2dbbf75fb419f9c7a8e82f4 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 2 Jan 2014 10:59:41 +0100
Subject: [PATCH 22/54] KVM: s390: convert __do_deliver_interrupt()

Convert __do_deliver_interrupt() to new guest access functions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/interrupt.c | 82 +++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 38 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a44c68990cf4..cb782e30ee4f 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -213,13 +213,14 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		vcpu->stat.deliver_service_signal++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->ext.ext_params, 0);
-		rc  = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE);
-		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		rc  = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_EXT_NEW_PSW, sizeof(psw_t));
-		rc |= put_guest(vcpu, inti->ext.ext_params,
-				(u32 __user *)__LC_EXT_PARAMS);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+				   (u32 *)__LC_EXT_PARAMS);
 		break;
 	case KVM_S390_INT_PFAULT_INIT:
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
@@ -236,14 +237,15 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 	case KVM_S390_INT_PFAULT_DONE:
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
 						 inti->ext.ext_params2);
-		rc  = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
-		rc |= put_guest(vcpu, 0x0680, (u16 __user *) __LC_EXT_CPU_ADDR);
-		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_EXT_NEW_PSW, sizeof(psw_t));
-		rc |= put_guest(vcpu, inti->ext.ext_params2,
-				(u64 __user *) __LC_EXT_PARAMS2);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				   (u64 *)__LC_EXT_PARAMS2);
 		break;
 	case KVM_S390_INT_VIRTIO:
 		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
@@ -252,16 +254,17 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->ext.ext_params,
 						 inti->ext.ext_params2);
-		rc  = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE);
-		rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR);
-		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_EXT_NEW_PSW, sizeof(psw_t));
-		rc |= put_guest(vcpu, inti->ext.ext_params,
-				(u32 __user *)__LC_EXT_PARAMS);
-		rc |= put_guest(vcpu, inti->ext.ext_params2,
-				(u64 __user *)__LC_EXT_PARAMS2);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+				   (u32 *)__LC_EXT_PARAMS);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				   (u64 *)__LC_EXT_PARAMS2);
 		break;
 	case KVM_S390_SIGP_STOP:
 		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
@@ -317,11 +320,12 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 						 inti->mchk.mcic);
 		rc  = kvm_s390_vcpu_store_status(vcpu,
 						 KVM_S390_STORE_STATUS_PREFIXED);
-		rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE);
-		rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW,
+		rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
+		rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_MCK_NEW_PSW, sizeof(psw_t));
 		break;
 
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
@@ -334,18 +338,20 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		vcpu->stat.deliver_io_int++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 param0, param1);
-		rc  = put_guest(vcpu, inti->io.subchannel_id,
-				(u16 __user *) __LC_SUBCHANNEL_ID);
-		rc |= put_guest(vcpu, inti->io.subchannel_nr,
-				(u16 __user *) __LC_SUBCHANNEL_NR);
-		rc |= put_guest(vcpu, inti->io.io_int_parm,
-				(u32 __user *) __LC_IO_INT_PARM);
-		rc |= put_guest(vcpu, inti->io.io_int_word,
-				(u32 __user *) __LC_IO_INT_WORD);
-		rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_IO_NEW_PSW, sizeof(psw_t));
+		rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+				   (u16 *)__LC_SUBCHANNEL_ID);
+		rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+				   (u16 *)__LC_SUBCHANNEL_NR);
+		rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+				   (u32 *)__LC_IO_INT_PARM);
+		rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+				   (u32 *)__LC_IO_INT_WORD);
+		rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+				    &vcpu->arch.sie_block->gpsw,
+				    sizeof(psw_t));
 		break;
 	}
 	default:

From 1a03b764229a5b368fd728fe25485b2510a4eeac Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Wed, 12 Feb 2014 14:05:38 +0100
Subject: [PATCH 23/54] KVM: s390: convert local irqs in 
 __do_deliver_interrupt()

Convert local irqs in __do_deliver_interrupt() to new guest
access functions.

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/interrupt.c | 81 +++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 38 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index cb782e30ee4f..a1403ba75bfc 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -186,26 +186,28 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		vcpu->stat.deliver_emergency_signal++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->emerg.code, 0);
-		rc  = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE);
-		rc |= put_guest(vcpu, inti->emerg.code,
-				(u16 __user *)__LC_EXT_CPU_ADDR);
-		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		rc  = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, inti->emerg.code,
+				   (u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_EXT_NEW_PSW, sizeof(psw_t));
 		break;
 	case KVM_S390_INT_EXTERNAL_CALL:
 		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
 		vcpu->stat.deliver_external_call++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->extcall.code, 0);
-		rc  = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE);
-		rc |= put_guest(vcpu, inti->extcall.code,
-				(u16 __user *)__LC_EXT_CPU_ADDR);
-		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_EXT_NEW_PSW, sizeof(psw_t));
+		rc  = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, inti->extcall.code,
+				   (u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				    &vcpu->arch.sie_block->gpsw,
+				    sizeof(psw_t));
 		break;
 	case KVM_S390_INT_SERVICE:
 		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
@@ -225,14 +227,14 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 	case KVM_S390_INT_PFAULT_INIT:
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
 						 inti->ext.ext_params2);
-		rc  = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
-		rc |= put_guest(vcpu, 0x0600, (u16 __user *) __LC_EXT_CPU_ADDR);
-		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		rc  = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_EXT_NEW_PSW, sizeof(psw_t));
-		rc |= put_guest(vcpu, inti->ext.ext_params2,
-				(u64 __user *) __LC_EXT_PARAMS2);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				   (u64 *) __LC_EXT_PARAMS2);
 		break;
 	case KVM_S390_INT_PFAULT_DONE:
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
@@ -288,12 +290,12 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		vcpu->stat.deliver_restart_signal++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 0, 0);
-		rc  = copy_to_guest(vcpu,
-				    offsetof(struct _lowcore, restart_old_psw),
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      offsetof(struct _lowcore, restart_psw),
-				      sizeof(psw_t));
+		rc  = write_guest_lc(vcpu,
+				     offsetof(struct _lowcore, restart_old_psw),
+				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
+				    &vcpu->arch.sie_block->gpsw,
+				    sizeof(psw_t));
 		atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
 		break;
 	case KVM_S390_PROGRAM_INT:
@@ -303,13 +305,15 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		vcpu->stat.deliver_program_int++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->pgm.code, 0);
-		rc  = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE);
-		rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
-				(u16 __user *)__LC_PGM_ILC);
-		rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_PGM_NEW_PSW, sizeof(psw_t));
+		rc  = put_guest_lc(vcpu, inti->pgm.code,
+				   (u16 __user *)__LC_PGM_INT_CODE);
+		rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
+				   (u16 __user *)__LC_PGM_ILC);
+		rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
+				    &vcpu->arch.sie_block->gpsw,
+				    sizeof(psw_t));
 		break;
 
 	case KVM_S390_MCHK:
@@ -372,11 +376,12 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 		return 0;
 	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
 		return 0;
-	rc  = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
-	rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-			      __LC_EXT_NEW_PSW, sizeof(psw_t));
+	rc  = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw,
+			    sizeof(psw_t));
 	if (rc) {
 		printk("kvm: The guest lowcore is not mapped during interrupt "
 			"delivery, killing userspace\n");

From 0f9701c6c2b39588971f4634dfcb10199941ce7a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:56:41 +0100
Subject: [PATCH 24/54] KVM: s390: convert handle_stfl()

Convert handle_stfl() to new guest access functions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/priv.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 4792f1df921a..7066fc5bf48a 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -318,10 +318,10 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
-			   vfacilities, 4);
+	rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list),
+			    vfacilities, 4);
 	if (rc)
-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		return rc;
 	VCPU_EVENT(vcpu, 5, "store facility list value %x",
 		   *(unsigned int *) vfacilities);
 	trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities);

From 81480cc19c56efb573c63d89c46797d34fe58f27 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:36:07 +0100
Subject: [PATCH 25/54] KVM: s390: convert pfault code

Convert pfault code to new guest access functions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/diag.c     | 9 ++++-----
 arch/s390/kvm/kvm-s390.c | 5 +++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index ff768f1dd337..5521ace8b60d 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -64,12 +64,12 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 	int rc;
 	u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
 	u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
-	unsigned long hva_token = KVM_HVA_ERR_BAD;
 
 	if (vcpu->run->s.regs.gprs[rx] & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
 	if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
@@ -89,8 +89,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 		    parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
 			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-		hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
-		if (kvm_is_error_hva(hva_token))
+		if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 		vcpu->arch.pfault_token = parm.token_addr;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e1dfe2461d4b..6cee19c7ad74 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1058,8 +1058,9 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
 	if (!vcpu->arch.gmap->pfault_enabled)
 		return 0;
 
-	hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
-	if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
+	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
+	hva += current->thread.gmap_addr & ~PAGE_MASK;
+	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
 		return 0;
 
 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);

From 0040e7d20fe467a4bcdb8a6c284631b80efffa8b Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:37:47 +0100
Subject: [PATCH 26/54] KVM: s390: convert handle_prog()

Convert handle_prog() to new guest access functions.
Also make the code a bit more readable and look at the return code
of write_guest_lc() which was missing before.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/intercept.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index eeb1ac7d8fa4..a8d8da84005b 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -111,18 +111,22 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
 
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s390_itdb *itdb;
+	int rc;
+
 	vcpu->stat.exit_program_interruption++;
 
 	/* Restore ITDB to Program-Interruption TDB in guest memory */
-	if (IS_TE_ENABLED(vcpu) &&
-	    !(current->thread.per_flags & PER_FLAG_NO_TE) &&
-	    IS_ITDB_VALID(vcpu)) {
-		copy_to_guest(vcpu, TDB_ADDR, vcpu->arch.sie_block->itdba,
-			      sizeof(struct kvm_s390_itdb));
-		memset((void *) vcpu->arch.sie_block->itdba, 0,
-		       sizeof(struct kvm_s390_itdb));
-	}
-
+	if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
+		goto skip_itdb;
+	if (current->thread.per_flags & PER_FLAG_NO_TE)
+		goto skip_itdb;
+	itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
+	rc = write_guest_lc(vcpu, TDB_ADDR, itdb, sizeof(*itdb));
+	if (rc)
+		return rc;
+	memset(itdb, 0, sizeof(*itdb));
+skip_itdb:
 	trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
 	return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
 }

From d0bce6054a1759f1b2c86bf553801c77dcaca745 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:45:58 +0100
Subject: [PATCH 27/54] KVM: s390: convert kvm_s390_store_status_unloaded()

Convert kvm_s390_store_status_unloaded() to new guest access functions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 95 +++++++++++++---------------------------
 1 file changed, 31 insertions(+), 64 deletions(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6cee19c7ad74..ae7c1265fcc1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1249,83 +1249,50 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	return rc;
 }
 
-static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
-		       unsigned long n, int prefix)
-{
-	if (prefix)
-		return copy_to_guest(vcpu, guestdest, from, n);
-	else
-		return copy_to_guest_absolute(vcpu, guestdest, from, n);
-}
-
 /*
  * store status at address
  * we use have two special cases:
  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
  */
-int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 {
 	unsigned char archmode = 1;
-	int prefix;
 	u64 clkcomp;
+	int rc;
 
-	if (addr == KVM_S390_STORE_STATUS_NOADDR) {
-		if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
+	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
+		if (write_guest_abs(vcpu, 163, &archmode, 1))
 			return -EFAULT;
-		addr = SAVE_AREA_BASE;
-		prefix = 0;
-	} else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
-		if (copy_to_guest(vcpu, 163ul, &archmode, 1))
+		gpa = SAVE_AREA_BASE;
+	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
+		if (write_guest_real(vcpu, 163, &archmode, 1))
 			return -EFAULT;
-		addr = SAVE_AREA_BASE;
-		prefix = 1;
-	} else
-		prefix = 0;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
-			vcpu->arch.guest_fpregs.fprs, 128, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
-			vcpu->run->s.regs.gprs, 128, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
-			&vcpu->arch.sie_block->gpsw, 16, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
-			&vcpu->arch.sie_block->prefix, 4, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu,
-			addr + offsetof(struct save_area, fp_ctrl_reg),
-			&vcpu->arch.guest_fpregs.fpc, 4, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
-			&vcpu->arch.sie_block->todpr, 4, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
-			&vcpu->arch.sie_block->cputm, 8, prefix))
-		return -EFAULT;
-
+		gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
+	}
+	rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
+			     vcpu->arch.guest_fpregs.fprs, 128);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
+			      vcpu->run->s.regs.gprs, 128);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
+			      &vcpu->arch.sie_block->gpsw, 16);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
+			      &vcpu->arch.sie_block->prefix, 4);
+	rc |= write_guest_abs(vcpu,
+			      gpa + offsetof(struct save_area, fp_ctrl_reg),
+			      &vcpu->arch.guest_fpregs.fpc, 4);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
+			      &vcpu->arch.sie_block->todpr, 4);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
+			      &vcpu->arch.sie_block->cputm, 8);
 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
-			&clkcomp, 8, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
-			&vcpu->run->s.regs.acrs, 64, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu,
-			addr + offsetof(struct save_area, ctrl_regs),
-			&vcpu->arch.sie_block->gcr, 128, prefix))
-		return -EFAULT;
-	return 0;
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
+			      &clkcomp, 8);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
+			      &vcpu->run->s.regs.acrs, 64);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
+			      &vcpu->arch.sie_block->gcr, 128);
+	return rc ? -EFAULT : 0;
 }
 
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)

From 665170cb47acbddc202df0d8487ca867b64e1604 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:47:12 +0100
Subject: [PATCH 28/54] KVM: s390: convert
 __sigp_set_prefix()/handle_set_prefix()

Convert __sigp_set_prefix() and handle_set_prefix() to new guest
access functions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/priv.c | 20 ++++++++++++--------
 arch/s390/kvm/sigp.c | 12 +++++++-----
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 7066fc5bf48a..dd6ad8445608 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -65,8 +65,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
 static int handle_set_prefix(struct kvm_vcpu *vcpu)
 {
 	u64 operand2;
-	u32 address = 0;
-	u8 tmp;
+	u32 address;
+	int rc;
 
 	vcpu->stat.instruction_spx++;
 
@@ -80,14 +80,18 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/* get the value */
-	if (get_guest(vcpu, address, (u32 __user *) operand2))
-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	rc = read_guest(vcpu, operand2, &address, sizeof(address));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
 
-	address = address & 0x7fffe000u;
+	address &= 0x7fffe000u;
 
-	/* make sure that the new value is valid memory */
-	if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
-	   (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)))
+	/*
+	 * Make sure the new value is valid memory. We only need to check the
+	 * first page, since address is 8k aligned and memory pieces are always
+	 * at least 1MB aligned and have at least a size of 1MB.
+	 */
+	if (kvm_is_error_gpa(vcpu->kvm, address))
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 	kvm_s390_set_prefix(vcpu, address);
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 26caeb530a78..c0b99e0f6b63 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -235,7 +235,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 	struct kvm_vcpu *dst_vcpu = NULL;
 	struct kvm_s390_interrupt_info *inti;
 	int rc;
-	u8 tmp;
 
 	if (cpu_addr < KVM_MAX_VCPUS)
 		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
@@ -243,10 +242,13 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 		return SIGP_CC_NOT_OPERATIONAL;
 	li = &dst_vcpu->arch.local_int;
 
-	/* make sure that the new value is valid memory */
-	address = address & 0x7fffe000u;
-	if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
-	   copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)) {
+	/*
+	 * Make sure the new value is valid memory. We only need to check the
+	 * first page, since address is 8k aligned and memory pieces are always
+	 * at least 1MB aligned and have at least a size of 1MB.
+	 */
+	address &= 0x7fffe000u;
+	if (kvm_is_error_gpa(vcpu->kvm, address)) {
 		*reg &= 0xffffffff00000000UL;
 		*reg |= SIGP_STATUS_INVALID_PARAMETER;
 		return SIGP_CC_STATUS_STORED;

From 0e7a3f9405d327bdc55ef1cdca7b63486b1916a0 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:50:11 +0100
Subject: [PATCH 29/54] KVM: s390: convert handle_set_clock()

Convert handle_set_clock() to new guest access functions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/priv.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index dd6ad8445608..9648504d7818 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -35,8 +35,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
 {
 	struct kvm_vcpu *cpup;
 	s64 hostclk, val;
+	int i, rc;
 	u64 op2;
-	int i;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -44,8 +44,9 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
 	op2 = kvm_s390_get_base_disp_s(vcpu);
 	if (op2 & 7)	/* Operand must be on a doubleword boundary */
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	if (get_guest(vcpu, val, (u64 __user *) op2))
-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	rc = read_guest(vcpu, op2, &val, sizeof(val));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
 
 	if (store_tod_clock(&hostclk)) {
 		kvm_s390_set_psw_cc(vcpu, 3);

From f748f4a7ec549febb76e86082d9445beff1e9b01 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:52:47 +0100
Subject: [PATCH 30/54] KVM: s390: convert handle_store_prefix()

Convert handle_store_prefix() to new guest access functions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/priv.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 9648504d7818..306caa27b962 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -106,6 +106,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
 {
 	u64 operand2;
 	u32 address;
+	int rc;
 
 	vcpu->stat.instruction_stpx++;
 
@@ -122,8 +123,9 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
 	address = address & 0x7fffe000u;
 
 	/* get the value */
-	if (put_guest(vcpu, address, (u32 __user *)operand2))
-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	rc = write_guest(vcpu, operand2, &address, sizeof(address));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
 
 	VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
 	trace_kvm_s390_handle_prefix(vcpu, 0, address);

From 8b96de0e0359b731b1e5ec897f217f8bf7e5903f Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:53:27 +0100
Subject: [PATCH 31/54] KVM: s390: convert handle_store_cpu_address()

Convert handle_store_cpu_address() to new guest access functions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/priv.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 306caa27b962..36c34cf22889 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -134,23 +134,26 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
 
 static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 {
-	u64 useraddr;
+	u16 vcpu_id = vcpu->vcpu_id;
+	u64 ga;
+	int rc;
 
 	vcpu->stat.instruction_stap++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	useraddr = kvm_s390_get_base_disp_s(vcpu);
+	ga = kvm_s390_get_base_disp_s(vcpu);
 
-	if (useraddr & 1)
+	if (ga & 1)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr))
-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
 
-	VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr);
-	trace_kvm_s390_handle_stap(vcpu, useraddr);
+	VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga);
+	trace_kvm_s390_handle_stap(vcpu, ga);
 	return 0;
 }
 

From ef23e7790e148041d159c1f710bee2b7b2f0d8cd Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:53:49 +0100
Subject: [PATCH 32/54] KVM: s390: convert handle_test_block()

Convert handle_test_block() to new guest access functions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/priv.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 36c34cf22889..5abfd531a8f1 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -198,7 +198,6 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
 
 static int handle_test_block(struct kvm_vcpu *vcpu)
 {
-	unsigned long hva;
 	gpa_t addr;
 	int reg2;
 
@@ -209,14 +208,13 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
 	addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
 	addr = kvm_s390_real_to_abs(vcpu, addr);
 
-	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
-	if (kvm_is_error_hva(hva))
+	if (kvm_is_error_gpa(vcpu->kvm, addr))
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 	/*
 	 * We don't expect errors on modern systems, and do not care
 	 * about storage keys (yet), so let's just clear the page.
 	 */
-	if (clear_user((void __user *)hva, PAGE_SIZE) != 0)
+	if (kvm_clear_guest(vcpu->kvm, addr, PAGE_SIZE))
 		return -EFAULT;
 	kvm_s390_set_psw_cc(vcpu, 0);
 	vcpu->run->s.regs.gprs[0] = 0;

From 4799b557c9aa3a7b540121f2bece719f22229a21 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:55:48 +0100
Subject: [PATCH 33/54] KVM: s390: convert handle_tpi()

Convert handle_tpi() to new guest access functions.

The code now sets up a structure which is copied with a single call to
guest space instead of issuing several separate guest access calls.
This is necessary since the to be copied data may cross a page boundary.
If a protection exception happens while accessing any of the pages, the
instruction is suppressed and may not have modified any memory contents.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/priv.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 5abfd531a8f1..4f6bc165d79b 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -224,9 +224,12 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
 static int handle_tpi(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_interrupt_info *inti;
+	unsigned long len;
+	u32 tpi_data[3];
+	int cc, rc;
 	u64 addr;
-	int cc;
 
+	rc = 0;
 	addr = kvm_s390_get_base_disp_s(vcpu);
 	if (addr & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -235,30 +238,33 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
 	if (!inti)
 		goto no_interrupt;
 	cc = 1;
+	tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
+	tpi_data[1] = inti->io.io_int_parm;
+	tpi_data[2] = inti->io.io_int_word;
 	if (addr) {
 		/*
 		 * Store the two-word I/O interruption code into the
 		 * provided area.
 		 */
-		if (put_guest(vcpu, inti->io.subchannel_id, (u16 __user *)addr)
-		    || put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *)(addr + 2))
-		    || put_guest(vcpu, inti->io.io_int_parm, (u32 __user *)(addr + 4)))
-			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		len = sizeof(tpi_data) - 4;
+		rc = write_guest(vcpu, addr, &tpi_data, len);
+		if (rc)
+			return kvm_s390_inject_prog_cond(vcpu, rc);
 	} else {
 		/*
 		 * Store the three-word I/O interruption code into
 		 * the appropriate lowcore area.
 		 */
-		put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) __LC_SUBCHANNEL_ID);
-		put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) __LC_SUBCHANNEL_NR);
-		put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) __LC_IO_INT_PARM);
-		put_guest(vcpu, inti->io.io_int_word, (u32 __user *) __LC_IO_INT_WORD);
+		len = sizeof(tpi_data);
+		if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
+			rc = -EFAULT;
 	}
 	kfree(inti);
 no_interrupt:
 	/* Set condition code and we're done. */
-	kvm_s390_set_psw_cc(vcpu, cc);
-	return 0;
+	if (!rc)
+		kvm_s390_set_psw_cc(vcpu, cc);
+	return rc ? -EFAULT : 0;
 }
 
 static int handle_tsch(struct kvm_vcpu *vcpu)

From 2f32d4ea280c35d5f3a43fe7724020cb4824fffa Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Wed, 8 Jan 2014 18:07:54 +0100
Subject: [PATCH 34/54] KVM: s390: reinject io interrupt on tpi failure

The tpi instruction should be suppressed on addressing and protection
exceptions, so we need to re-inject the dequeued io interrupt in that
case.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/interrupt.c |  6 ++++++
 arch/s390/kvm/kvm-s390.h  |  2 ++
 arch/s390/kvm/priv.c      | 10 +++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a1403ba75bfc..1c74bb92329b 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -846,6 +846,12 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 	return __inject_vm(kvm, inti);
 }
 
+void kvm_s390_reinject_io_int(struct kvm *kvm,
+			      struct kvm_s390_interrupt_info *inti)
+{
+	__inject_vm(kvm, inti);
+}
+
 int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 			 struct kvm_s390_interrupt *s390int)
 {
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 11ed0a596b5a..dc506f3782ea 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -137,6 +137,8 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid);
+void kvm_s390_reinject_io_int(struct kvm *kvm,
+			      struct kvm_s390_interrupt_info *inti);
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
 /* implemented in priv.c */
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 4f6bc165d79b..f4451f09b9ed 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -259,7 +259,15 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
 		if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
 			rc = -EFAULT;
 	}
-	kfree(inti);
+	/*
+	 * If we encounter a problem storing the interruption code, the
+	 * instruction is suppressed from the guest's view: reinject the
+	 * interrupt.
+	 */
+	if (!rc)
+		kfree(inti);
+	else
+		kvm_s390_reinject_io_int(vcpu->kvm, inti);
 no_interrupt:
 	/* Set condition code and we're done. */
 	if (!rc)

From 2d8bcaeda1576ddd970629a6afb9a188c83cd409 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:57:42 +0100
Subject: [PATCH 35/54] KVM: s390: convert handle_lpsw[e]()

Convert handle_lpsw[e]() to new guest access functions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/priv.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f4451f09b9ed..2de74543bd07 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -381,6 +381,7 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
 	psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
 	psw_compat_t new_psw;
 	u64 addr;
+	int rc;
 
 	if (gpsw->mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -388,8 +389,10 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
 	addr = kvm_s390_get_base_disp_s(vcpu);
 	if (addr & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+	rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
 	if (!(new_psw.mask & PSW32_MASK_BASE))
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
@@ -405,6 +408,7 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
 {
 	psw_t new_psw;
 	u64 addr;
+	int rc;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -412,8 +416,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
 	addr = kvm_s390_get_base_disp_s(vcpu);
 	if (addr & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
 	vcpu->arch.sie_block->gpsw = new_psw;
 	if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);

From 7d777d78241d98bbd75886a8d09a3c793eafc807 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:58:16 +0100
Subject: [PATCH 36/54] KVM: s390: convert handle_stidp()

Convert handle_stidp() to new guest access functions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/priv.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 2de74543bd07..bc969722a293 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -428,7 +428,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
 
 static int handle_stidp(struct kvm_vcpu *vcpu)
 {
+	u64 stidp_data = vcpu->arch.stidp_data;
 	u64 operand2;
+	int rc;
 
 	vcpu->stat.instruction_stidp++;
 
@@ -440,8 +442,9 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
 	if (operand2 & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2))
-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
 
 	VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
 	return 0;

From f987a3eef03092c895be5de7ac38ebf3558d5113 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:59:21 +0100
Subject: [PATCH 37/54] KVM: s390: convert handle lctl[g]()

Convert handle lctl[g]() to new guest access functions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/priv.c | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index bc969722a293..44ff22007052 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -755,32 +755,31 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u64 useraddr;
 	u32 val = 0;
 	int reg, rc;
+	u64 ga;
 
 	vcpu->stat.instruction_lctl++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	useraddr = kvm_s390_get_base_disp_rs(vcpu);
+	ga = kvm_s390_get_base_disp_rs(vcpu);
 
-	if (useraddr & 3)
+	if (ga & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3,
-		   useraddr);
-	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
+	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
 	reg = reg1;
 	do {
-		rc = get_guest(vcpu, val, (u32 __user *) useraddr);
+		rc = read_guest(vcpu, ga, &val, sizeof(val));
 		if (rc)
-			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+			return kvm_s390_inject_prog_cond(vcpu, rc);
 		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
 		vcpu->arch.sie_block->gcr[reg] |= val;
-		useraddr += 4;
+		ga += 4;
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
@@ -793,7 +792,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u64 useraddr;
+	u64 ga, val;
 	int reg, rc;
 
 	vcpu->stat.instruction_lctlg++;
@@ -801,23 +800,22 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	useraddr = kvm_s390_get_base_disp_rsy(vcpu);
+	ga = kvm_s390_get_base_disp_rsy(vcpu);
 
-	if (useraddr & 7)
+	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	reg = reg1;
 
-	VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3,
-		   useraddr);
-	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
+	VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
 	do {
-		rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
-			       (u64 __user *) useraddr);
+		rc = read_guest(vcpu, ga, &val, sizeof(val));
 		if (rc)
-			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		useraddr += 8;
+			return kvm_s390_inject_prog_cond(vcpu, rc);
+		vcpu->arch.sie_block->gcr[reg] = val;
+		ga += 8;
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;

From 645c5bc1d5b1253bf3df849edc339ec09b43371a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:58:59 +0100
Subject: [PATCH 38/54] KVM: s390: convert handle_stsi()

Convert handle_stsi() to new guest access functions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/priv.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 44ff22007052..5fb503a6c443 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -530,9 +530,10 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 		break;
 	}
 
-	if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
-		rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		goto out_exception;
+	rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE);
+	if (rc) {
+		rc = kvm_s390_inject_prog_cond(vcpu, rc);
+		goto out;
 	}
 	trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
 	free_page(mem);
@@ -541,7 +542,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 	return 0;
 out_no_data:
 	kvm_s390_set_psw_cc(vcpu, 3);
-out_exception:
+out:
 	free_page(mem);
 	return rc;
 }

From 3263bd163720807e68045370b70aaf38acde1598 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 2 Jan 2014 10:46:01 +0100
Subject: [PATCH 39/54] KVM: s390: remove old guest access functions

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/gaccess.h | 81 -----------------------------------------
 1 file changed, 81 deletions(-)

diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index f46e764c5b43..ed4749ecef8b 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -56,87 +56,6 @@ static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
 	return ga & ((1UL << 24) - 1);
 }
 
-static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
-					  void __user *gptr,
-					  int prefixing)
-{
-	unsigned long gaddr = (unsigned long) gptr;
-	unsigned long uaddr;
-
-	if (prefixing)
-		gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
-	uaddr = gmap_fault(gaddr, vcpu->arch.gmap);
-	if (IS_ERR_VALUE(uaddr))
-		uaddr = -EFAULT;
-	return (void __user *)uaddr;
-}
-
-#define get_guest(vcpu, x, gptr)				\
-({								\
-	__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
-	int __mask = sizeof(__typeof__(*(gptr))) - 1;		\
-	int __ret;						\
-								\
-	if (IS_ERR((void __force *)__uptr)) {			\
-		__ret = PTR_ERR((void __force *)__uptr);	\
-	} else {						\
-		BUG_ON((unsigned long)__uptr & __mask);		\
-		__ret = get_user(x, __uptr);			\
-	}							\
-	__ret;							\
-})
-
-#define put_guest(vcpu, x, gptr)				\
-({								\
-	__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
-	int __mask = sizeof(__typeof__(*(gptr))) - 1;		\
-	int __ret;						\
-								\
-	if (IS_ERR((void __force *)__uptr)) {			\
-		__ret = PTR_ERR((void __force *)__uptr);	\
-	} else {						\
-		BUG_ON((unsigned long)__uptr & __mask);		\
-		__ret = put_user(x, __uptr);			\
-	}							\
-	__ret;							\
-})
-
-static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to,
-			       unsigned long from, unsigned long len,
-			       int to_guest, int prefixing)
-{
-	unsigned long _len, rc;
-	void __user *uptr;
-
-	while (len) {
-		uptr = to_guest ? (void __user *)to : (void __user *)from;
-		uptr = __gptr_to_uptr(vcpu, uptr, prefixing);
-		if (IS_ERR((void __force *)uptr))
-			return -EFAULT;
-		_len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1));
-		_len = min(_len, len);
-		if (to_guest)
-			rc = copy_to_user((void __user *) uptr, (void *)from, _len);
-		else
-			rc = copy_from_user((void *)to, (void __user *)uptr, _len);
-		if (rc)
-			return -EFAULT;
-		len -= _len;
-		from += _len;
-		to += _len;
-	}
-	return 0;
-}
-
-#define copy_to_guest(vcpu, to, from, size) \
-	__copy_guest(vcpu, to, (unsigned long)from, size, 1, 1)
-#define copy_from_guest(vcpu, to, from, size) \
-	__copy_guest(vcpu, (unsigned long)to, from, size, 0, 1)
-#define copy_to_guest_absolute(vcpu, to, from, size) \
-	__copy_guest(vcpu, to, (unsigned long)from, size, 1, 0)
-#define copy_from_guest_absolute(vcpu, to, from, size) \
-	__copy_guest(vcpu, (unsigned long)to, from, size, 0, 0)
-
 /*
  * put_guest_lc, read_guest_lc and write_guest_lc are guest access functions
  * which shall only be used to access the lowcore of a vcpu.

From e497a96ae8eda8b693bebc8f464712cd788d641f Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:19:55 +0100
Subject: [PATCH 40/54] KVM: s390: cleanup kvm_s390_real_to_abs()

Add kerneldoc comment to kvm_s390_real_to_abs() and change the code
so it matches the coding style of the rest of gaccess.h.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/gaccess.h | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index ed4749ecef8b..1079c8fc6d0d 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -19,16 +19,24 @@
 #include <linux/ptrace.h>
 #include "kvm-s390.h"
 
-/* Convert real to absolute address by applying the prefix of the CPU */
+/**
+ * kvm_s390_real_to_abs - convert guest real address to guest absolute address
+ * @vcpu - guest virtual cpu
+ * @gra - guest real address
+ *
+ * Returns the guest absolute address that corresponds to the passed guest real
+ * address @gra of a virtual guest cpu by applying its prefix.
+ */
 static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
-						 unsigned long gaddr)
+						 unsigned long gra)
 {
-	unsigned long prefix  = vcpu->arch.sie_block->prefix;
-	if (gaddr < 2 * PAGE_SIZE)
-		gaddr += prefix;
-	else if (gaddr >= prefix && gaddr < prefix + 2 * PAGE_SIZE)
-		gaddr -= prefix;
-	return gaddr;
+	unsigned long prefix = vcpu->arch.sie_block->prefix;
+
+	if (gra < 2 * PAGE_SIZE)
+		gra += prefix;
+	else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE)
+		gra -= prefix;
+	return gra;
 }
 
 /**

From 3d53b46ce8b1b873cf8501bac251b8c0cf489d4f Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Mon, 10 Feb 2014 10:55:37 +0100
Subject: [PATCH 41/54] s390: fix name of lowcore field at offset 0xa3

According to the Principles of Operation, at offset 0xA3
in the lowcore we have the "Architectural-Mode identification",
not an "access identification".

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/lowcore.h | 4 ++--
 arch/s390/kernel/asm-offsets.c  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index bbf8141408cd..edfa89b67e78 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -62,7 +62,7 @@ struct _lowcore {
 	__u8	exc_access_id;			/* 0x00a0 */
 	__u8	per_access_id;			/* 0x00a1 */
 	__u8	op_access_id;			/* 0x00a2 */
-	__u8	ar_access_id;			/* 0x00a3 */
+	__u8	ar_mode_id;			/* 0x00a3 */
 	__u8	pad_0x00a4[0x00b8-0x00a4];	/* 0x00a4 */
 	__u16	subchannel_id;			/* 0x00b8 */
 	__u16	subchannel_nr;			/* 0x00ba */
@@ -201,7 +201,7 @@ struct _lowcore {
 	__u8	exc_access_id;			/* 0x00a0 */
 	__u8	per_access_id;			/* 0x00a1 */
 	__u8	op_access_id;			/* 0x00a2 */
-	__u8	ar_access_id;			/* 0x00a3 */
+	__u8	ar_mode_id;			/* 0x00a3 */
 	__u8	pad_0x00a4[0x00a8-0x00a4];	/* 0x00a4 */
 	__u64	trans_exc_code;			/* 0x00a8 */
 	__u64	monitor_code;			/* 0x00b0 */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index cc10cdd4d6a2..94c18d482ce7 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -92,7 +92,7 @@ int main(void)
 	DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid));
 	DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
 	DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id));
-	DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_access_id));
+	DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
 	DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
 	DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
 	DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));

From 21ee7ffd176a238cf185c142bd4c20d0152eda4f Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Wed, 26 Feb 2014 16:32:46 +0100
Subject: [PATCH 42/54] s390: rename and split lowcore field per_perc_atmid

per_perc_atmid is currently a two-byte field that combines two
fields, the PER code and the PER Addressing-and-Translation-Mode
Identification (ATMID)

Let's make them accessible indepently and also rename per_cause to
per_code.

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/lowcore.h | 6 ++++--
 arch/s390/kernel/asm-offsets.c  | 5 +++--
 arch/s390/kernel/entry.S        | 4 ++--
 arch/s390/kernel/entry64.S      | 4 ++--
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index edfa89b67e78..e88cb8c54130 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -56,7 +56,8 @@ struct _lowcore {
 	__u16	pgm_code;			/* 0x008e */
 	__u32	trans_exc_code;			/* 0x0090 */
 	__u16	mon_class_num;			/* 0x0094 */
-	__u16	per_perc_atmid;			/* 0x0096 */
+	__u8	per_code;			/* 0x0096 */
+	__u8	per_atmid;			/* 0x0097 */
 	__u32	per_address;			/* 0x0098 */
 	__u32	monitor_code;			/* 0x009c */
 	__u8	exc_access_id;			/* 0x00a0 */
@@ -196,7 +197,8 @@ struct _lowcore {
 	__u16	pgm_code;			/* 0x008e */
 	__u32	data_exc_code;			/* 0x0090 */
 	__u16	mon_class_num;			/* 0x0094 */
-	__u16	per_perc_atmid;			/* 0x0096 */
+	__u8	per_code;			/* 0x0096 */
+	__u8	per_atmid;			/* 0x0097 */
 	__u64	per_address;			/* 0x0098 */
 	__u8	exc_access_id;			/* 0x00a0 */
 	__u8	per_access_id;			/* 0x00a1 */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 94c18d482ce7..31e4ba4aaf17 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -89,9 +89,10 @@ int main(void)
 	DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
 	DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
 	DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
-	DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid));
+	DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
+	DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
 	DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
-	DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id));
+	DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
 	DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
 	DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
 	DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 1662038516c0..e66f046b9c43 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -391,8 +391,8 @@ ENTRY(pgm_check_handler)
 	jz	pgm_kprobe
 	oi	__TI_flags+3(%r12),_TIF_PER_TRAP
 	mvc	__THREAD_per_address(4,%r1),__LC_PER_ADDRESS
-	mvc	__THREAD_per_cause(2,%r1),__LC_PER_CAUSE
-	mvc	__THREAD_per_paid(1,%r1),__LC_PER_PAID
+	mvc	__THREAD_per_cause(2,%r1),__LC_PER_CODE
+	mvc	__THREAD_per_paid(1,%r1),__LC_PER_ACCESS_ID
 0:	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
 	l	%r1,BASED(.Ljump_table)
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 5963e43618bb..3c34753de6ad 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -423,8 +423,8 @@ ENTRY(pgm_check_handler)
 	jz	pgm_kprobe
 	oi	__TI_flags+7(%r12),_TIF_PER_TRAP
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
-	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CAUSE
-	mvc	__THREAD_per_paid(1,%r14),__LC_PER_PAID
+	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
+	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
 0:	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	larl	%r1,pgm_check_table

From da7cf2570c5fa29a02a3e8026bfff89620706d2f Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Wed, 26 Feb 2014 17:03:29 +0100
Subject: [PATCH 43/54] s390: add fields to lowcore definition

This patch adds fields which are currently missing but needed for the correct
injection of interrupts.

This is based on a patch by David Hildenbrand

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kernel/asm-offsets.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 31e4ba4aaf17..859a7ed36c4b 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -89,17 +89,22 @@ int main(void)
 	DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
 	DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
 	DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
+	DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num));
 	DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
 	DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
 	DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
+	DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id));
 	DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
+	DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id));
 	DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
+	DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code));
 	DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
 	DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
 	DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
 	DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
 	DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
 	DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
+	DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code));
 	DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
 	DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
 	DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
@@ -157,6 +162,8 @@ int main(void)
 #ifdef CONFIG_32BIT
 	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
 #else /* CONFIG_32BIT */
+	DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
+	DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
 	DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
 	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
 	DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));

From 439716a5cab30e930bd1ec0f8fd66cbbaa319250 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 3 Mar 2014 10:54:33 +0100
Subject: [PATCH 44/54] KVM: s390: extract irq parameters of intercepted
 program irqs

Whenever a program interrupt is intercepted, some parameters are stored in the
sie control block. These parameters have to be extracted in order to be
reinjected correctly. This patch also takes care of intercepted PER events which
can occurr in addition to any program interrupt.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h | 18 +++++++--
 arch/s390/kvm/intercept.c        | 65 +++++++++++++++++++++++++++++++-
 2 files changed, 79 insertions(+), 4 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index f1ed7bdba733..b8c808192893 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -120,9 +120,21 @@ struct kvm_s390_sie_block {
 	psw_t	gpsw;			/* 0x0090 */
 	__u64	gg14;			/* 0x00a0 */
 	__u64	gg15;			/* 0x00a8 */
-	__u8	reservedb0[30];		/* 0x00b0 */
-	__u16   iprcc;			/* 0x00ce */
-	__u8	reservedd0[48];		/* 0x00d0 */
+	__u8	reservedb0[28];		/* 0x00b0 */
+	__u16	pgmilc;			/* 0x00cc */
+	__u16	iprcc;			/* 0x00ce */
+	__u32	dxc;			/* 0x00d0 */
+	__u16	mcn;			/* 0x00d4 */
+	__u8	perc;			/* 0x00d6 */
+	__u8	peratmid;		/* 0x00d7 */
+	__u64	peraddr;		/* 0x00d8 */
+	__u8	eai;			/* 0x00e0 */
+	__u8	peraid;			/* 0x00e1 */
+	__u8	oai;			/* 0x00e2 */
+	__u8	armid;			/* 0x00e3 */
+	__u8	reservede4[4];		/* 0x00e4 */
+	__u64	tecmc;			/* 0x00e8 */
+	__u8	reservedf0[16];		/* 0x00f0 */
 	__u64	gcr[16];		/* 0x0100 */
 	__u64	gbea;			/* 0x0180 */
 	__u8	reserved188[24];	/* 0x0188 */
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index a8d8da84005b..4c3311e41727 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -109,8 +109,69 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
 	return -EOPNOTSUPP;
 }
 
+static void __extract_prog_irq(struct kvm_vcpu *vcpu,
+			       struct kvm_s390_pgm_info *pgm_info)
+{
+	memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info));
+	pgm_info->code = vcpu->arch.sie_block->iprcc;
+
+	switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
+	case PGM_AFX_TRANSLATION:
+	case PGM_ASX_TRANSLATION:
+	case PGM_EX_TRANSLATION:
+	case PGM_LFX_TRANSLATION:
+	case PGM_LSTE_SEQUENCE:
+	case PGM_LSX_TRANSLATION:
+	case PGM_LX_TRANSLATION:
+	case PGM_PRIMARY_AUTHORITY:
+	case PGM_SECONDARY_AUTHORITY:
+	case PGM_SPACE_SWITCH:
+		pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+		break;
+	case PGM_ALEN_TRANSLATION:
+	case PGM_ALE_SEQUENCE:
+	case PGM_ASTE_INSTANCE:
+	case PGM_ASTE_SEQUENCE:
+	case PGM_ASTE_VALIDITY:
+	case PGM_EXTENDED_AUTHORITY:
+		pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+		break;
+	case PGM_ASCE_TYPE:
+	case PGM_PAGE_TRANSLATION:
+	case PGM_REGION_FIRST_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_SEGMENT_TRANSLATION:
+		pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+		pgm_info->exc_access_id  = vcpu->arch.sie_block->eai;
+		pgm_info->op_access_id  = vcpu->arch.sie_block->oai;
+		break;
+	case PGM_MONITOR:
+		pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
+		pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
+		break;
+	case PGM_DATA:
+		pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
+		break;
+	case PGM_PROTECTION:
+		pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+		pgm_info->exc_access_id  = vcpu->arch.sie_block->eai;
+		break;
+	default:
+		break;
+	}
+
+	if (vcpu->arch.sie_block->iprcc & PGM_PER) {
+		pgm_info->per_code = vcpu->arch.sie_block->perc;
+		pgm_info->per_atmid = vcpu->arch.sie_block->peratmid;
+		pgm_info->per_address = vcpu->arch.sie_block->peraddr;
+		pgm_info->per_access_id = vcpu->arch.sie_block->peraid;
+	}
+}
+
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s390_pgm_info pgm_info;
 	struct kvm_s390_itdb *itdb;
 	int rc;
 
@@ -128,7 +189,9 @@ static int handle_prog(struct kvm_vcpu *vcpu)
 	memset(itdb, 0, sizeof(*itdb));
 skip_itdb:
 	trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
-	return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
+	__extract_prog_irq(vcpu, &pgm_info);
+
+	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
 }
 
 static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)

From 8712836b30cef5d49bc3bb8bc3da88a40e11e574 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 3 Mar 2014 10:55:13 +0100
Subject: [PATCH 45/54] KVM: s390: deliver program irq parameters and use
 correct ilc

When a program interrupt was to be delivered until now, no program interrupt
parameters were stored in the low-core of the target vcpu.

This patch enables the delivery of those program interrupt parameters, takes
care of concurrent PER events which can be injected in addition to any program
interrupt and uses the correct instruction length code (depending on the
interception code) for the injection of program interrupts.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |   6 ++
 arch/s390/kvm/interrupt.c        | 110 ++++++++++++++++++++++++++++---
 2 files changed, 107 insertions(+), 9 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index b8c808192893..5e5a14db8c21 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -102,6 +102,12 @@ struct kvm_s390_sie_block {
 #define ICTL_RRBE 0x00001000
 	__u32	ictl;			/* 0x0048 */
 	__u32	eca;			/* 0x004c */
+#define ICPT_INST	0x04
+#define ICPT_PROGI	0x08
+#define ICPT_INSTPROGI	0x0C
+#define ICPT_OPEREXC	0x2C
+#define ICPT_PARTEXEC	0x38
+#define ICPT_IOINST	0x40
 	__u8	icptcode;		/* 0x0050 */
 	__u8	reserved51;		/* 0x0051 */
 	__u16	ihcpu;			/* 0x0052 */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 1c74bb92329b..c49b4d4d310a 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -174,6 +174,106 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 	}
 }
 
+static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
+			      struct kvm_s390_pgm_info *pgm_info)
+{
+	const unsigned short table[] = { 2, 4, 4, 6 };
+	int rc = 0;
+
+	switch (pgm_info->code & ~PGM_PER) {
+	case PGM_AFX_TRANSLATION:
+	case PGM_ASX_TRANSLATION:
+	case PGM_EX_TRANSLATION:
+	case PGM_LFX_TRANSLATION:
+	case PGM_LSTE_SEQUENCE:
+	case PGM_LSX_TRANSLATION:
+	case PGM_LX_TRANSLATION:
+	case PGM_PRIMARY_AUTHORITY:
+	case PGM_SECONDARY_AUTHORITY:
+	case PGM_SPACE_SWITCH:
+		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+				  (u64 *)__LC_TRANS_EXC_CODE);
+		break;
+	case PGM_ALEN_TRANSLATION:
+	case PGM_ALE_SEQUENCE:
+	case PGM_ASTE_INSTANCE:
+	case PGM_ASTE_SEQUENCE:
+	case PGM_ASTE_VALIDITY:
+	case PGM_EXTENDED_AUTHORITY:
+		rc = put_guest_lc(vcpu, pgm_info->exc_access_id,
+				  (u8 *)__LC_EXC_ACCESS_ID);
+		break;
+	case PGM_ASCE_TYPE:
+	case PGM_PAGE_TRANSLATION:
+	case PGM_REGION_FIRST_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_SEGMENT_TRANSLATION:
+		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+				  (u64 *)__LC_TRANS_EXC_CODE);
+		rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+				   (u8 *)__LC_EXC_ACCESS_ID);
+		rc |= put_guest_lc(vcpu, pgm_info->op_access_id,
+				   (u8 *)__LC_OP_ACCESS_ID);
+		break;
+	case PGM_MONITOR:
+		rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
+				  (u64 *)__LC_MON_CLASS_NR);
+		rc |= put_guest_lc(vcpu, pgm_info->mon_code,
+				   (u64 *)__LC_MON_CODE);
+		break;
+	case PGM_DATA:
+		rc = put_guest_lc(vcpu, pgm_info->data_exc_code,
+				  (u32 *)__LC_DATA_EXC_CODE);
+		break;
+	case PGM_PROTECTION:
+		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+				  (u64 *)__LC_TRANS_EXC_CODE);
+		rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+				   (u8 *)__LC_EXC_ACCESS_ID);
+		break;
+	}
+
+	if (pgm_info->code & PGM_PER) {
+		rc |= put_guest_lc(vcpu, pgm_info->per_code,
+				   (u8 *) __LC_PER_CODE);
+		rc |= put_guest_lc(vcpu, pgm_info->per_atmid,
+				   (u8 *)__LC_PER_ATMID);
+		rc |= put_guest_lc(vcpu, pgm_info->per_address,
+				   (u64 *) __LC_PER_ADDRESS);
+		rc |= put_guest_lc(vcpu, pgm_info->per_access_id,
+				   (u8 *) __LC_PER_ACCESS_ID);
+	}
+
+	switch (vcpu->arch.sie_block->icptcode) {
+	case ICPT_INST:
+	case ICPT_INSTPROGI:
+	case ICPT_OPEREXC:
+	case ICPT_PARTEXEC:
+	case ICPT_IOINST:
+		/* last instruction only stored for these icptcodes */
+		rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
+				   (u16 *) __LC_PGM_ILC);
+		break;
+	case ICPT_PROGI:
+		rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc,
+				   (u16 *) __LC_PGM_ILC);
+		break;
+	default:
+		rc |= put_guest_lc(vcpu, 0,
+				   (u16 *) __LC_PGM_ILC);
+	}
+
+	rc |= put_guest_lc(vcpu, pgm_info->code,
+			   (u16 *)__LC_PGM_INT_CODE);
+	rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+
+	return rc;
+}
+
 static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 				   struct kvm_s390_interrupt_info *inti)
 {
@@ -305,15 +405,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		vcpu->stat.deliver_program_int++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->pgm.code, 0);
-		rc  = put_guest_lc(vcpu, inti->pgm.code,
-				   (u16 __user *)__LC_PGM_INT_CODE);
-		rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
-				   (u16 __user *)__LC_PGM_ILC);
-		rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw,
-				    sizeof(psw_t));
+		rc = __deliver_prog_irq(vcpu, &inti->pgm);
 		break;
 
 	case KVM_S390_MCHK:

From aba0750889d012f84a719112997abb7be11bba4b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 23 Jan 2014 10:47:13 +0100
Subject: [PATCH 46/54] KVM: s390: emulate stctl and stctg

Introduce the methods to emulate the stctl and stctg instruction. Added tracing
code.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |  2 +
 arch/s390/kvm/intercept.c        |  1 +
 arch/s390/kvm/kvm-s390.c         |  2 +
 arch/s390/kvm/kvm-s390.h         |  1 +
 arch/s390/kvm/priv.c             | 73 ++++++++++++++++++++++++++++++++
 arch/s390/kvm/trace.h            | 25 +++++++++++
 6 files changed, 104 insertions(+)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 5e5a14db8c21..5d9648925a8e 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -175,6 +175,8 @@ struct kvm_vcpu_stat {
 	u32 exit_instruction;
 	u32 instruction_lctl;
 	u32 instruction_lctlg;
+	u32 instruction_stctl;
+	u32 instruction_stctg;
 	u32 exit_program_interruption;
 	u32 exit_instr_and_program;
 	u32 deliver_external_call;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 4c3311e41727..c0e6b49191ba 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -29,6 +29,7 @@ static const intercept_handler_t instruction_handlers[256] = {
 	[0x83] = kvm_s390_handle_diag,
 	[0xae] = kvm_s390_handle_sigp,
 	[0xb2] = kvm_s390_handle_b2,
+	[0xb6] = kvm_s390_handle_stctl,
 	[0xb7] = kvm_s390_handle_lctl,
 	[0xb9] = kvm_s390_handle_b9,
 	[0xe5] = kvm_s390_handle_e5,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ae7c1265fcc1..7ae8c26065fb 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -52,6 +52,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
+	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
+	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index dc506f3782ea..5f00fc1e9785 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -147,6 +147,7 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
 
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 5fb503a6c443..27f9051a78f8 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -789,6 +789,42 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	u64 ga;
+	u32 val;
+	int reg, rc;
+
+	vcpu->stat.instruction_stctl++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	ga = kvm_s390_get_base_disp_rs(vcpu);
+
+	if (ga & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
+
+	reg = reg1;
+	do {
+		val = vcpu->arch.sie_block->gcr[reg] &  0x00000000fffffffful;
+		rc = write_guest(vcpu, ga, &val, sizeof(val));
+		if (rc)
+			return kvm_s390_inject_prog_cond(vcpu, rc);
+		ga += 4;
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+
+	return 0;
+}
+
 static int handle_lctlg(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
@@ -825,8 +861,45 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int handle_stctg(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	u64 ga, val;
+	int reg, rc;
+
+	vcpu->stat.instruction_stctg++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	ga = kvm_s390_get_base_disp_rsy(vcpu);
+
+	if (ga & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	reg = reg1;
+
+	VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
+
+	do {
+		val = vcpu->arch.sie_block->gcr[reg];
+		rc = write_guest(vcpu, ga, &val, sizeof(val));
+		if (rc)
+			return kvm_s390_inject_prog_cond(vcpu, rc);
+		ga += 8;
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+
+	return 0;
+}
+
 static const intercept_handler_t eb_handlers[256] = {
 	[0x2f] = handle_lctlg,
+	[0x25] = handle_stctg,
 };
 
 int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index a4bf7d78a0db..abf6ba52769e 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -315,6 +315,31 @@ TRACE_EVENT(kvm_s390_handle_lctl,
 			   __entry->reg1, __entry->reg3, __entry->addr)
 	);
 
+TRACE_EVENT(kvm_s390_handle_stctl,
+	    TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
+	    TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(int, g)
+		    __field(int, reg1)
+		    __field(int, reg3)
+		    __field(u64, addr)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->g = g;
+		    __entry->reg1 = reg1;
+		    __entry->reg3 = reg3;
+		    __entry->addr = addr;
+		    ),
+
+	    VCPU_TP_PRINTK("%s: storing cr %x-%x to %016llx",
+			   __entry->g ? "stctg" : "stctl",
+			   __entry->reg1, __entry->reg3, __entry->addr)
+	);
+
 TRACE_EVENT(kvm_s390_handle_prefix,
 	    TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address),
 	    TP_ARGS(VCPU_ARGS_COMMON, set, address),

From af1827e773c983f1d601d674447aea89efdb1acb Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 23 Jan 2014 11:03:51 +0100
Subject: [PATCH 47/54] KVM: s390: kernel header addition for guest debugging

This patch adds the structs to the kernel headers needed to pass information
from/to userspace in order to debug a guest on s390 with hardware support.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/uapi/asm/kvm.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index e35c79821d29..b8c0f07a0e08 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -79,11 +79,31 @@ struct kvm_fpu {
 	__u64 fprs[16];
 };
 
+#define KVM_GUESTDBG_USE_HW_BP		0x00010000
+
+#define KVM_HW_BP			1
+#define KVM_HW_WP_WRITE			2
+#define KVM_SINGLESTEP			4
+
 struct kvm_debug_exit_arch {
+	__u64 addr;
+	__u8 type;
+	__u8 pad[7]; /* Should be set to 0 */
+};
+
+struct kvm_hw_breakpoint {
+	__u64 addr;
+	__u64 phys_addr;
+	__u64 len;
+	__u8 type;
+	__u8 pad[7]; /* Should be set to 0 */
 };
 
 /* for KVM_SET_GUEST_DEBUG */
 struct kvm_guest_debug_arch {
+	__u32 nr_hw_bp;
+	__u32 pad; /* Should be set to 0 */
+	struct kvm_hw_breakpoint __user *hw_bp;
 };
 
 #define KVM_SYNC_PREFIX (1UL << 0)

From 27291e2165b6de70c476b7b675308113edd69a60 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 23 Jan 2014 12:26:52 +0100
Subject: [PATCH 48/54] KVM: s390: hardware support for guest debugging

This patch adds support to debug the guest using the PER facility on s390.
Single-stepping, hardware breakpoints and hardware watchpoints are supported. In
order to use the PER facility of the guest without it noticing it, the control
registers of the guest have to be patched and access to them has to be
intercepted(stctl, stctg, lctl, lctlg).

All PER program interrupts have to be intercepted and only the relevant PER
interrupts for the guest have to be given back. Special care has to be taken
about repeated exits on the same hardware breakpoint. The intervention of the
host in the guests PER configuration is not fully transparent. PER instruction
nullification can not be used by the guest and too many storage alteration
events may be reported to the guest (if it is activated for special address
ranges only) when the host concurrently debugging it.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |  53 +++-
 arch/s390/include/uapi/asm/kvm.h |   1 +
 arch/s390/kvm/Makefile           |   2 +-
 arch/s390/kvm/guestdbg.c         | 479 +++++++++++++++++++++++++++++++
 arch/s390/kvm/intercept.c        |   9 +
 arch/s390/kvm/interrupt.c        |   8 +-
 arch/s390/kvm/kvm-s390.c         |  53 +++-
 arch/s390/kvm/kvm-s390.h         |  10 +
 8 files changed, 607 insertions(+), 8 deletions(-)
 create mode 100644 arch/s390/kvm/guestdbg.c

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 5d9648925a8e..0d45f6fe734f 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -93,13 +93,18 @@ struct kvm_s390_sie_block {
 	__u8	reserved40[4];		/* 0x0040 */
 #define LCTL_CR0	0x8000
 #define LCTL_CR6	0x0200
+#define LCTL_CR9	0x0040
+#define LCTL_CR10	0x0020
+#define LCTL_CR11	0x0010
 #define LCTL_CR14	0x0002
 	__u16   lctl;			/* 0x0044 */
 	__s16	icpua;			/* 0x0046 */
-#define ICTL_LPSW 0x00400000
-#define ICTL_ISKE 0x00004000
-#define ICTL_SSKE 0x00002000
-#define ICTL_RRBE 0x00001000
+#define ICTL_PINT	0x20000000
+#define ICTL_LPSW	0x00400000
+#define ICTL_STCTL	0x00040000
+#define ICTL_ISKE	0x00004000
+#define ICTL_SSKE	0x00002000
+#define ICTL_RRBE	0x00001000
 	__u32	ictl;			/* 0x0048 */
 	__u32	eca;			/* 0x004c */
 #define ICPT_INST	0x04
@@ -306,6 +311,45 @@ struct kvm_s390_float_interrupt {
 	unsigned int irq_count;
 };
 
+struct kvm_hw_wp_info_arch {
+	unsigned long addr;
+	unsigned long phys_addr;
+	int len;
+	char *old_data;
+};
+
+struct kvm_hw_bp_info_arch {
+	unsigned long addr;
+	int len;
+};
+
+/*
+ * Only the upper 16 bits of kvm_guest_debug->control are arch specific.
+ * Further KVM_GUESTDBG flags which an be used from userspace can be found in
+ * arch/s390/include/uapi/asm/kvm.h
+ */
+#define KVM_GUESTDBG_EXIT_PENDING 0x10000000
+
+#define guestdbg_enabled(vcpu) \
+		(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)
+#define guestdbg_sstep_enabled(vcpu) \
+		(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+#define guestdbg_hw_bp_enabled(vcpu) \
+		(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \
+		(vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING))
+
+struct kvm_guestdbg_info_arch {
+	unsigned long cr0;
+	unsigned long cr9;
+	unsigned long cr10;
+	unsigned long cr11;
+	struct kvm_hw_bp_info_arch *hw_bp_info;
+	struct kvm_hw_wp_info_arch *hw_wp_info;
+	int nr_hw_bp;
+	int nr_hw_wp;
+	unsigned long last_bp;
+};
 
 struct kvm_vcpu_arch {
 	struct kvm_s390_sie_block *sie_block;
@@ -321,6 +365,7 @@ struct kvm_vcpu_arch {
 		u64		stidp_data;
 	};
 	struct gmap *gmap;
+	struct kvm_guestdbg_info_arch guestdbg;
 #define KVM_S390_PFAULT_TOKEN_INVALID	(-1UL)
 	unsigned long pfault_token;
 	unsigned long pfault_select;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index b8c0f07a0e08..0fc26430a1e5 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 
 #define __KVM_S390
+#define __KVM_HAVE_GUEST_DEBUG
 
 /* Device control API: s390-specific devices */
 #define KVM_DEV_FLIC_GET_ALL_IRQS	1
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 83a7a355befe..b3b553469650 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqch
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
 kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o
+kvm-objs += diag.o gaccess.o guestdbg.o
 
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
new file mode 100644
index 000000000000..100e99d1030d
--- /dev/null
+++ b/arch/s390/kvm/guestdbg.c
@@ -0,0 +1,479 @@
+/*
+ * kvm guest debug support
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ */
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+/*
+ * Extends the address range given by *start and *stop to include the address
+ * range starting with estart and the length len. Takes care of overflowing
+ * intervals and tries to minimize the overall intervall size.
+ */
+static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
+{
+	u64 estop;
+
+	if (len > 0)
+		len--;
+	else
+		len = 0;
+
+	estop = estart + len;
+
+	/* 0-0 range represents "not set" */
+	if ((*start == 0) && (*stop == 0)) {
+		*start = estart;
+		*stop = estop;
+	} else if (*start <= *stop) {
+		/* increase the existing range */
+		if (estart < *start)
+			*start = estart;
+		if (estop > *stop)
+			*stop = estop;
+	} else {
+		/* "overflowing" interval, whereby *stop > *start */
+		if (estart <= *stop) {
+			if (estop > *stop)
+				*stop = estop;
+		} else if (estop > *start) {
+			if (estart < *start)
+				*start = estart;
+		}
+		/* minimize the range */
+		else if ((estop - *stop) < (*start - estart))
+			*stop = estop;
+		else
+			*start = estart;
+	}
+}
+
+#define MAX_INST_SIZE 6
+
+static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
+{
+	unsigned long start, len;
+	u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+	u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+	u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+	int i;
+
+	if (vcpu->arch.guestdbg.nr_hw_bp <= 0 ||
+	    vcpu->arch.guestdbg.hw_bp_info == NULL)
+		return;
+
+	/*
+	 * If the guest is not interrested in branching events, we can savely
+	 * limit them to the PER address range.
+	 */
+	if (!(*cr9 & PER_EVENT_BRANCH))
+		*cr9 |= PER_CONTROL_BRANCH_ADDRESS;
+	*cr9 |= PER_EVENT_IFETCH | PER_EVENT_BRANCH;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+		start = vcpu->arch.guestdbg.hw_bp_info[i].addr;
+		len = vcpu->arch.guestdbg.hw_bp_info[i].len;
+
+		/*
+		 * The instruction in front of the desired bp has to
+		 * report instruction-fetching events
+		 */
+		if (start < MAX_INST_SIZE) {
+			len += start;
+			start = 0;
+		} else {
+			start -= MAX_INST_SIZE;
+			len += MAX_INST_SIZE;
+		}
+
+		extend_address_range(cr10, cr11, start, len);
+	}
+}
+
+static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
+{
+	unsigned long start, len;
+	u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+	u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+	u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+	int i;
+
+	if (vcpu->arch.guestdbg.nr_hw_wp <= 0 ||
+	    vcpu->arch.guestdbg.hw_wp_info == NULL)
+		return;
+
+	/* if host uses storage alternation for special address
+	 * spaces, enable all events and give all to the guest */
+	if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
+		*cr9 &= ~PER_CONTROL_ALTERATION;
+		*cr10 = 0;
+		*cr11 = PSW_ADDR_INSN;
+	} else {
+		*cr9 &= ~PER_CONTROL_ALTERATION;
+		*cr9 |= PER_EVENT_STORE;
+
+		for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+			start = vcpu->arch.guestdbg.hw_wp_info[i].addr;
+			len = vcpu->arch.guestdbg.hw_wp_info[i].len;
+
+			extend_address_range(cr10, cr11, start, len);
+		}
+	}
+}
+
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.guestdbg.cr0 = vcpu->arch.sie_block->gcr[0];
+	vcpu->arch.guestdbg.cr9 = vcpu->arch.sie_block->gcr[9];
+	vcpu->arch.guestdbg.cr10 = vcpu->arch.sie_block->gcr[10];
+	vcpu->arch.guestdbg.cr11 = vcpu->arch.sie_block->gcr[11];
+}
+
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.sie_block->gcr[0] = vcpu->arch.guestdbg.cr0;
+	vcpu->arch.sie_block->gcr[9] = vcpu->arch.guestdbg.cr9;
+	vcpu->arch.sie_block->gcr[10] = vcpu->arch.guestdbg.cr10;
+	vcpu->arch.sie_block->gcr[11] = vcpu->arch.guestdbg.cr11;
+}
+
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * TODO: if guest psw has per enabled, otherwise 0s!
+	 * This reduces the amount of reported events.
+	 * Need to intercept all psw changes!
+	 */
+
+	if (guestdbg_sstep_enabled(vcpu)) {
+		vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
+		vcpu->arch.sie_block->gcr[10] = 0;
+		vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+	}
+
+	if (guestdbg_hw_bp_enabled(vcpu)) {
+		enable_all_hw_bp(vcpu);
+		enable_all_hw_wp(vcpu);
+	}
+
+	/* TODO: Instruction-fetching-nullification not allowed for now */
+	if (vcpu->arch.sie_block->gcr[9] & PER_EVENT_NULLIFICATION)
+		vcpu->arch.sie_block->gcr[9] &= ~PER_EVENT_NULLIFICATION;
+}
+
+#define MAX_WP_SIZE 100
+
+static int __import_wp_info(struct kvm_vcpu *vcpu,
+			    struct kvm_hw_breakpoint *bp_data,
+			    struct kvm_hw_wp_info_arch *wp_info)
+{
+	int ret = 0;
+	wp_info->len = bp_data->len;
+	wp_info->addr = bp_data->addr;
+	wp_info->phys_addr = bp_data->phys_addr;
+	wp_info->old_data = NULL;
+
+	if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE)
+		return -EINVAL;
+
+	wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL);
+	if (!wp_info->old_data)
+		return -ENOMEM;
+	/* try to backup the original value */
+	ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data,
+			 wp_info->len);
+	if (ret) {
+		kfree(wp_info->old_data);
+		wp_info->old_data = NULL;
+	}
+
+	return ret;
+}
+
+#define MAX_BP_COUNT 50
+
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+			    struct kvm_guest_debug *dbg)
+{
+	int ret = 0, nr_wp = 0, nr_bp = 0, i, size;
+	struct kvm_hw_breakpoint *bp_data = NULL;
+	struct kvm_hw_wp_info_arch *wp_info = NULL;
+	struct kvm_hw_bp_info_arch *bp_info = NULL;
+
+	if (dbg->arch.nr_hw_bp <= 0 || !dbg->arch.hw_bp)
+		return 0;
+	else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
+		return -EINVAL;
+
+	size = dbg->arch.nr_hw_bp * sizeof(struct kvm_hw_breakpoint);
+	bp_data = kmalloc(size, GFP_KERNEL);
+	if (!bp_data) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	ret = copy_from_user(bp_data, dbg->arch.hw_bp, size);
+	if (ret)
+		goto error;
+
+	for (i = 0; i < dbg->arch.nr_hw_bp; i++) {
+		switch (bp_data[i].type) {
+		case KVM_HW_WP_WRITE:
+			nr_wp++;
+			break;
+		case KVM_HW_BP:
+			nr_bp++;
+			break;
+		default:
+			break;
+		}
+	}
+
+	size = nr_wp * sizeof(struct kvm_hw_wp_info_arch);
+	if (size > 0) {
+		wp_info = kmalloc(size, GFP_KERNEL);
+		if (!wp_info) {
+			ret = -ENOMEM;
+			goto error;
+		}
+	}
+	size = nr_bp * sizeof(struct kvm_hw_bp_info_arch);
+	if (size > 0) {
+		bp_info = kmalloc(size, GFP_KERNEL);
+		if (!bp_info) {
+			ret = -ENOMEM;
+			goto error;
+		}
+	}
+
+	for (nr_wp = 0, nr_bp = 0, i = 0; i < dbg->arch.nr_hw_bp; i++) {
+		switch (bp_data[i].type) {
+		case KVM_HW_WP_WRITE:
+			ret = __import_wp_info(vcpu, &bp_data[i],
+					       &wp_info[nr_wp]);
+			if (ret)
+				goto error;
+			nr_wp++;
+			break;
+		case KVM_HW_BP:
+			bp_info[nr_bp].len = bp_data[i].len;
+			bp_info[nr_bp].addr = bp_data[i].addr;
+			nr_bp++;
+			break;
+		}
+	}
+
+	vcpu->arch.guestdbg.nr_hw_bp = nr_bp;
+	vcpu->arch.guestdbg.hw_bp_info = bp_info;
+	vcpu->arch.guestdbg.nr_hw_wp = nr_wp;
+	vcpu->arch.guestdbg.hw_wp_info = wp_info;
+	return 0;
+error:
+	kfree(bp_data);
+	kfree(wp_info);
+	kfree(bp_info);
+	return ret;
+}
+
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_hw_wp_info_arch *hw_wp_info = NULL;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+		hw_wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+		kfree(hw_wp_info->old_data);
+		hw_wp_info->old_data = NULL;
+	}
+	kfree(vcpu->arch.guestdbg.hw_wp_info);
+	vcpu->arch.guestdbg.hw_wp_info = NULL;
+
+	kfree(vcpu->arch.guestdbg.hw_bp_info);
+	vcpu->arch.guestdbg.hw_bp_info = NULL;
+
+	vcpu->arch.guestdbg.nr_hw_wp = 0;
+	vcpu->arch.guestdbg.nr_hw_bp = 0;
+}
+
+static inline int in_addr_range(u64 addr, u64 a, u64 b)
+{
+	if (a <= b)
+		return (addr >= a) && (addr <= b);
+	else
+		/* "overflowing" interval */
+		return (addr <= a) && (addr >= b);
+}
+
+#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1)
+
+static struct kvm_hw_bp_info_arch *find_hw_bp(struct kvm_vcpu *vcpu,
+					      unsigned long addr)
+{
+	struct kvm_hw_bp_info_arch *bp_info = vcpu->arch.guestdbg.hw_bp_info;
+	int i;
+
+	if (vcpu->arch.guestdbg.nr_hw_bp == 0)
+		return NULL;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+		/* addr is directly the start or in the range of a bp */
+		if (addr == bp_info->addr)
+			goto found;
+		if (bp_info->len > 0 &&
+		    in_addr_range(addr, bp_info->addr, end_of_range(bp_info)))
+			goto found;
+
+		bp_info++;
+	}
+
+	return NULL;
+found:
+	return bp_info;
+}
+
+static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_hw_wp_info_arch *wp_info = NULL;
+	void *temp = NULL;
+
+	if (vcpu->arch.guestdbg.nr_hw_wp == 0)
+		return NULL;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+		wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+		if (!wp_info || !wp_info->old_data || wp_info->len <= 0)
+			continue;
+
+		temp = kmalloc(wp_info->len, GFP_KERNEL);
+		if (!temp)
+			continue;
+
+		/* refetch the wp data and compare it to the old value */
+		if (!read_guest(vcpu, wp_info->phys_addr, temp,
+				wp_info->len)) {
+			if (memcmp(temp, wp_info->old_data, wp_info->len)) {
+				kfree(temp);
+				return wp_info;
+			}
+		}
+		kfree(temp);
+		temp = NULL;
+	}
+
+	return NULL;
+}
+
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu)
+{
+	vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+	vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+}
+
+#define per_bp_event(code) \
+			(code & (PER_EVENT_IFETCH | PER_EVENT_BRANCH))
+#define per_write_wp_event(code) \
+			(code & (PER_EVENT_STORE | PER_EVENT_STORE_REAL))
+
+static int debug_exit_required(struct kvm_vcpu *vcpu)
+{
+	u32 perc = (vcpu->arch.sie_block->perc << 24);
+	struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+	struct kvm_hw_wp_info_arch *wp_info = NULL;
+	struct kvm_hw_bp_info_arch *bp_info = NULL;
+	unsigned long addr = vcpu->arch.sie_block->gpsw.addr;
+	unsigned long peraddr = vcpu->arch.sie_block->peraddr;
+
+	if (guestdbg_hw_bp_enabled(vcpu)) {
+		if (per_write_wp_event(perc) &&
+		    vcpu->arch.guestdbg.nr_hw_wp > 0) {
+			wp_info = any_wp_changed(vcpu);
+			if (wp_info) {
+				debug_exit->addr = wp_info->addr;
+				debug_exit->type = KVM_HW_WP_WRITE;
+				goto exit_required;
+			}
+		}
+		if (per_bp_event(perc) &&
+			 vcpu->arch.guestdbg.nr_hw_bp > 0) {
+			bp_info = find_hw_bp(vcpu, addr);
+			/* remove duplicate events if PC==PER address */
+			if (bp_info && (addr != peraddr)) {
+				debug_exit->addr = addr;
+				debug_exit->type = KVM_HW_BP;
+				vcpu->arch.guestdbg.last_bp = addr;
+				goto exit_required;
+			}
+			/* breakpoint missed */
+			bp_info = find_hw_bp(vcpu, peraddr);
+			if (bp_info && vcpu->arch.guestdbg.last_bp != peraddr) {
+				debug_exit->addr = peraddr;
+				debug_exit->type = KVM_HW_BP;
+				goto exit_required;
+			}
+		}
+	}
+	if (guestdbg_sstep_enabled(vcpu) && per_bp_event(perc)) {
+		debug_exit->addr = addr;
+		debug_exit->type = KVM_SINGLESTEP;
+		goto exit_required;
+	}
+
+	return 0;
+exit_required:
+	return 1;
+}
+
+#define guest_per_enabled(vcpu) \
+			     (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
+
+static void filter_guest_per_event(struct kvm_vcpu *vcpu)
+{
+	u32 perc = vcpu->arch.sie_block->perc << 24;
+	u64 peraddr = vcpu->arch.sie_block->peraddr;
+	u64 addr = vcpu->arch.sie_block->gpsw.addr;
+	u64 cr9 = vcpu->arch.sie_block->gcr[9];
+	u64 cr10 = vcpu->arch.sie_block->gcr[10];
+	u64 cr11 = vcpu->arch.sie_block->gcr[11];
+	/* filter all events, demanded by the guest */
+	u32 guest_perc = perc & cr9 & PER_EVENT_MASK;
+
+	if (!guest_per_enabled(vcpu))
+		guest_perc = 0;
+
+	/* filter "successful-branching" events */
+	if (guest_perc & PER_EVENT_BRANCH &&
+	    cr9 & PER_CONTROL_BRANCH_ADDRESS &&
+	    !in_addr_range(addr, cr10, cr11))
+		guest_perc &= ~PER_EVENT_BRANCH;
+
+	/* filter "instruction-fetching" events */
+	if (guest_perc & PER_EVENT_IFETCH &&
+	    !in_addr_range(peraddr, cr10, cr11))
+		guest_perc &= ~PER_EVENT_IFETCH;
+
+	/* All other PER events will be given to the guest */
+	/* TODO: Check alterated address/address space */
+
+	vcpu->arch.sie_block->perc = guest_perc >> 24;
+
+	if (!guest_perc)
+		vcpu->arch.sie_block->iprcc &= ~PGM_PER;
+}
+
+void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
+{
+	if (debug_exit_required(vcpu))
+		vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
+
+	filter_guest_per_event(vcpu);
+}
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index c0e6b49191ba..f61c800a2d2c 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -170,6 +170,8 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
 	}
 }
 
+#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
+
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_pgm_info pgm_info;
@@ -178,6 +180,13 @@ static int handle_prog(struct kvm_vcpu *vcpu)
 
 	vcpu->stat.exit_program_interruption++;
 
+	if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+		kvm_s390_handle_per_event(vcpu);
+		/* the interrupt might have been filtered out completely */
+		if (vcpu->arch.sie_block->iprcc == 0)
+			return 0;
+	}
+
 	/* Restore ITDB to Program-Interruption TDB in guest memory */
 	if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
 		goto skip_itdb;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c49b4d4d310a..f331014dd766 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -131,7 +131,13 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 		CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
 		&vcpu->arch.sie_block->cpuflags);
 	vcpu->arch.sie_block->lctl = 0x0000;
-	vcpu->arch.sie_block->ictl &= ~ICTL_LPSW;
+	vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
+
+	if (guestdbg_enabled(vcpu)) {
+		vcpu->arch.sie_block->lctl |= (LCTL_CR0 | LCTL_CR9 |
+					       LCTL_CR10 | LCTL_CR11);
+		vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
+	}
 }
 
 static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 7ae8c26065fb..e6bbfe1a9474 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -934,10 +934,40 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 	return -EINVAL; /* not implemented yet */
 }
 
+#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
+			      KVM_GUESTDBG_USE_HW_BP | \
+			      KVM_GUESTDBG_ENABLE)
+
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 					struct kvm_guest_debug *dbg)
 {
-	return -EINVAL; /* not implemented yet */
+	int rc = 0;
+
+	vcpu->guest_debug = 0;
+	kvm_s390_clear_bp_data(vcpu);
+
+	if (vcpu->guest_debug & ~VALID_GUESTDBG_FLAGS)
+		return -EINVAL;
+
+	if (dbg->control & KVM_GUESTDBG_ENABLE) {
+		vcpu->guest_debug = dbg->control;
+		/* enforce guest PER */
+		atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+
+		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
+			rc = kvm_s390_import_bp_data(vcpu, dbg);
+	} else {
+		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+		vcpu->arch.guestdbg.last_bp = 0;
+	}
+
+	if (rc) {
+		vcpu->guest_debug = 0;
+		kvm_s390_clear_bp_data(vcpu);
+		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+	}
+
+	return rc;
 }
 
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
@@ -1095,6 +1125,11 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 	if (rc)
 		return rc;
 
+	if (guestdbg_enabled(vcpu)) {
+		kvm_s390_backup_guest_per_regs(vcpu);
+		kvm_s390_patch_guest_per_regs(vcpu);
+	}
+
 	vcpu->arch.sie_block->icptcode = 0;
 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
@@ -1111,6 +1146,9 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 		   vcpu->arch.sie_block->icptcode);
 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
 
+	if (guestdbg_enabled(vcpu))
+		kvm_s390_restore_guest_per_regs(vcpu);
+
 	if (exit_reason >= 0) {
 		rc = 0;
 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
@@ -1176,7 +1214,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 		rc = vcpu_post_run(vcpu, exit_reason);
-	} while (!signal_pending(current) && !rc);
+	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
 
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 	return rc;
@@ -1187,6 +1225,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	int rc;
 	sigset_t sigsaved;
 
+	if (guestdbg_exit_pending(vcpu)) {
+		kvm_s390_prepare_debug_exit(vcpu);
+		return 0;
+	}
+
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
@@ -1199,6 +1242,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	case KVM_EXIT_S390_RESET:
 	case KVM_EXIT_S390_UCONTROL:
 	case KVM_EXIT_S390_TSCH:
+	case KVM_EXIT_DEBUG:
 		break;
 	default:
 		BUG();
@@ -1224,6 +1268,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		rc = -EINTR;
 	}
 
+	if (guestdbg_exit_pending(vcpu) && !rc)  {
+		kvm_s390_prepare_debug_exit(vcpu);
+		rc = 0;
+	}
+
 	if (rc == -EOPNOTSUPP) {
 		/* intercept cannot be handled in-kernel, prepare kvm-run */
 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 5f00fc1e9785..be8ae0d68ab6 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -211,4 +211,14 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);
 void kvm_s390_destroy_adapters(struct kvm *kvm);
 
+/* implemented in guestdbg.c */
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu);
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+			    struct kvm_guest_debug *dbg);
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
+void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
+
 #endif

From ffad018cc06ea1e3ada0f613676f97d408817182 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 3 Feb 2014 17:18:02 +0100
Subject: [PATCH 49/54] KVM: s390: add documentation for diag 501

Added documentation for diag 501, stating that no subfunctions are provided and
no parameters are used.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/s390-diag.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/virtual/kvm/s390-diag.txt b/Documentation/virtual/kvm/s390-diag.txt
index f1de4fbade15..48c4921794ed 100644
--- a/Documentation/virtual/kvm/s390-diag.txt
+++ b/Documentation/virtual/kvm/s390-diag.txt
@@ -78,3 +78,5 @@ DIAGNOSE function code 'X'501 - KVM breakpoint
 
 If the function code specifies 0x501, breakpoint functions may be performed.
 This function code is handled by userspace.
+
+This diagnose function code has no subfunctions and uses no parameters.

From bb78c5ec9161561586f8d81d53258b251aab324a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Tue, 18 Mar 2014 10:03:26 +0100
Subject: [PATCH 50/54] KVM: s390: move timer interrupt checks into own
 functions

This patch moves the checks for enabled timer (clock-comparator) interrupts and pending
timer interrupts into own functions, making the code better readable and easier to
maintain.

The method kvm_cpu_has_pending_timer is filled with life.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/interrupt.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f331014dd766..0165f1b089ac 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -56,6 +56,14 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+	if (psw_extint_disabled(vcpu) ||
+	    !(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+		return 0;
+	return 1;
+}
+
 static u64 int_word_to_isc_bits(u32 int_word)
 {
 	u8 isc = (int_word & 0x38000000) >> 27;
@@ -466,14 +474,10 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 	}
 }
 
-static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 {
 	int rc;
 
-	if (psw_extint_disabled(vcpu))
-		return 0;
-	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
-		return 0;
 	rc  = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
 	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
 			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
@@ -485,7 +489,6 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 			"delivery, killing userspace\n");
 		do_exit(SIGKILL);
 	}
-	return 1;
 }
 
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
@@ -515,19 +518,20 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 		spin_unlock(&fi->lock);
 	}
 
-	if ((!rc) && (vcpu->arch.sie_block->ckc <
-		get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
-		if ((!psw_extint_disabled(vcpu)) &&
-			(vcpu->arch.sie_block->gcr[0] & 0x800ul))
-			rc = 1;
-	}
+	if (!rc && kvm_cpu_has_pending_timer(vcpu))
+		rc = 1;
 
 	return rc;
 }
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	return 0;
+	if (!(vcpu->arch.sie_block->ckc <
+	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+		return 0;
+	if (!ckc_interrupts_enabled(vcpu))
+		return 0;
+	return 1;
 }
 
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
@@ -550,8 +554,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 		return -EOPNOTSUPP; /* disabled wait */
 	}
 
-	if (psw_extint_disabled(vcpu) ||
-	    (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
+	if (!ckc_interrupts_enabled(vcpu)) {
 		VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
 		goto no_timer;
 	}
@@ -663,9 +666,8 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 		} while (deliver);
 	}
 
-	if ((vcpu->arch.sie_block->ckc <
-		get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
-		__try_deliver_ckc_interrupt(vcpu);
+	if (kvm_cpu_has_pending_timer(vcpu))
+		deliver_ckc_interrupt(vcpu);
 
 	if (atomic_read(&fi->active)) {
 		do {

From f71d0dc5084b4de761b5be1aef1a855136cecd15 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Tue, 18 Mar 2014 10:06:14 +0100
Subject: [PATCH 51/54] KVM: s390: no timer interrupts when single-stepping a
 guest

When a guest is single-stepped, we want to disable timer interrupts. Otherwise,
the guest will continuously execute the external interrupt handler and make
debugging of code where timer interrupts are enabled almost impossible.

The delivery of timer interrupts can be enforced in such sections by setting a
breakpoint and continuing execution.

In order to disable timer interrupts, they are disabled in the control register
of the guest just before SIE entry and are suppressed in the interrupt
check/delivery methods.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/guestdbg.c  | 2 ++
 arch/s390/kvm/interrupt.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 100e99d1030d..757ccef62fd5 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -155,6 +155,8 @@ void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
 	 */
 
 	if (guestdbg_sstep_enabled(vcpu)) {
+		/* disable timer (clock-comparator) interrupts */
+		vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
 		vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
 		vcpu->arch.sie_block->gcr[10] = 0;
 		vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 0165f1b089ac..d020c5f8eabb 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -61,6 +61,9 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
 	if (psw_extint_disabled(vcpu) ||
 	    !(vcpu->arch.sie_block->gcr[0] & 0x800ul))
 		return 0;
+	if (guestdbg_enabled(vcpu) && guestdbg_sstep_enabled(vcpu))
+		/* No timer interrupts when single stepping */
+		return 0;
 	return 1;
 }
 

From 67335e63c9ef59e97b45a08b4a6a93767762031d Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 25 Mar 2014 17:09:08 +0100
Subject: [PATCH 52/54] KVM: s390: Drop pending interrupts on guest exit

On hard exits (abort, sigkill) we have have some kvm_s390_interrupt_info
structures hanging around. Delete those on exit to avoid memory leaks.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
CC: stable@vger.kernel.org
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
---
 arch/s390/kvm/interrupt.c | 4 ++--
 arch/s390/kvm/kvm-s390.c  | 2 ++
 arch/s390/kvm/kvm-s390.h  | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index d020c5f8eabb..077e4738ebdc 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1045,7 +1045,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-static void clear_floating_interrupts(struct kvm *kvm)
+void kvm_s390_clear_float_irqs(struct kvm *kvm)
 {
 	struct kvm_s390_float_interrupt *fi;
 	struct kvm_s390_interrupt_info	*n, *inti = NULL;
@@ -1391,7 +1391,7 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		break;
 	case KVM_DEV_FLIC_CLEAR_IRQS:
 		r = 0;
-		clear_floating_interrupts(dev->kvm);
+		kvm_s390_clear_float_irqs(dev->kvm);
 		break;
 	case KVM_DEV_FLIC_APF_ENABLE:
 		dev->kvm->arch.gmap->pfault_enabled = 1;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e6bbfe1a9474..b32c42cbc706 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -471,6 +471,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+	kvm_s390_clear_local_irqs(vcpu);
 	kvm_clear_async_pf_completion_queue(vcpu);
 	if (!kvm_is_ucontrol(vcpu->kvm)) {
 		clear_bit(63 - vcpu->vcpu_id,
@@ -520,6 +521,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	if (!kvm_is_ucontrol(kvm))
 		gmap_free(kvm->arch.gmap);
 	kvm_s390_destroy_adapters(kvm);
+	kvm_s390_clear_float_irqs(kvm);
 }
 
 /* Section: vcpu related */
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index be8ae0d68ab6..c93ed5fabfbc 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -130,6 +130,7 @@ void kvm_s390_tasklet(unsigned long parm);
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
 void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_float_irqs(struct kvm *kvm);
 int __must_check kvm_s390_inject_vm(struct kvm *kvm,
 				    struct kvm_s390_interrupt *s390int);
 int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,

From a86dcc2482cf29ffcf6fbc174ce55e4f5f18cd8b Mon Sep 17 00:00:00 2001
From: Michael Mueller <mimu@linux.vnet.ibm.com>
Date: Thu, 13 Mar 2014 19:29:09 +0100
Subject: [PATCH 53/54] KVM: s390: replace TDB_ADDR by __LC_PGM_TDB

The generically assembled low core labels already contain the
address for the TDB.

Signed-off-by: Michael Mueller <mimu@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/intercept.c | 3 ++-
 arch/s390/kvm/kvm-s390.h  | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index f61c800a2d2c..f39160406efa 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -16,6 +16,7 @@
 #include <linux/pagemap.h>
 
 #include <asm/kvm_host.h>
+#include <asm/asm-offsets.h>
 
 #include "kvm-s390.h"
 #include "gaccess.h"
@@ -193,7 +194,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
 	if (current->thread.per_flags & PER_FLAG_NO_TE)
 		goto skip_itdb;
 	itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
-	rc = write_guest_lc(vcpu, TDB_ADDR, itdb, sizeof(*itdb));
+	rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
 	if (rc)
 		return rc;
 	memset(itdb, 0, sizeof(*itdb));
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index c93ed5fabfbc..9b5680d1f6cc 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -28,7 +28,6 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
 
 /* Transactional Memory Execution related macros */
 #define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & 0x10))
-#define TDB_ADDR		0x1800UL
 #define TDB_FORMAT1		1
 #define IS_ITDB_VALID(vcpu)	((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
 

From e325fe69aa37b485635521568651642791d6d140 Mon Sep 17 00:00:00 2001
From: Michael Mueller <mimu@linux.vnet.ibm.com>
Date: Thu, 13 Mar 2014 12:16:45 +0100
Subject: [PATCH 54/54] KVM: s390: Factor out handle_itdb to handle TX aborts

Factor out the new function handle_itdb(), which copies the ITDB into
guest lowcore to fully handle a TX abort.

Signed-off-by: Michael Mueller <mimu@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/intercept.c | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index f39160406efa..30e1c5eb726a 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -171,12 +171,33 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
 	}
 }
 
+/*
+ * restore ITDB to program-interruption TDB in guest lowcore
+ * and set TX abort indication if required
+*/
+static int handle_itdb(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_itdb *itdb;
+	int rc;
+
+	if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
+		return 0;
+	if (current->thread.per_flags & PER_FLAG_NO_TE)
+		return 0;
+	itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
+	rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
+	if (rc)
+		return rc;
+	memset(itdb, 0, sizeof(*itdb));
+
+	return 0;
+}
+
 #define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
 
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_pgm_info pgm_info;
-	struct kvm_s390_itdb *itdb;
 	int rc;
 
 	vcpu->stat.exit_program_interruption++;
@@ -188,20 +209,13 @@ static int handle_prog(struct kvm_vcpu *vcpu)
 			return 0;
 	}
 
-	/* Restore ITDB to Program-Interruption TDB in guest memory */
-	if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
-		goto skip_itdb;
-	if (current->thread.per_flags & PER_FLAG_NO_TE)
-		goto skip_itdb;
-	itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
-	rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
+	trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
+
+	rc = handle_itdb(vcpu);
 	if (rc)
 		return rc;
-	memset(itdb, 0, sizeof(*itdb));
-skip_itdb:
-	trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
-	__extract_prog_irq(vcpu, &pgm_info);
 
+	__extract_prog_irq(vcpu, &pgm_info);
 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
 }