KVM: s390: Switch to new gmap

Switch KVM/s390 to use the new gmap code.

Remove includes to <gmap.h> and include "gmap.h" instead; fix all the
existing users of the old gmap functions to use the new ones instead.

Fix guest storage key access functions to work with the new gmap.

Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
This commit is contained in:
Claudio Imbrenda 2026-02-04 16:02:53 +01:00
parent d29a29a9e1
commit e38c884df9
21 changed files with 1129 additions and 1736 deletions

View File

@ -33,7 +33,7 @@ config GENERIC_LOCKBREAK
def_bool y if PREEMPTION
config PGSTE
def_bool y if KVM
def_bool n
config AUDIT_ARCH
def_bool y

View File

@ -442,7 +442,7 @@ struct kvm_vcpu_arch {
bool acrs_loaded;
struct kvm_s390_pv_vcpu pv;
union diag318_info diag318_info;
void *mc; /* Placeholder */
struct kvm_s390_mmu_cache *mc;
};
struct kvm_vm_stat {
@ -636,6 +636,8 @@ struct kvm_s390_pv {
struct mutex import_lock;
};
struct kvm_s390_mmu_cache;
struct kvm_arch {
struct esca_block *sca;
debug_info_t *dbf;
@ -675,6 +677,7 @@ struct kvm_arch {
struct kvm_s390_pv pv;
struct list_head kzdev_list;
spinlock_t kzdev_list_lock;
struct kvm_s390_mmu_cache *mc;
};
#define KVM_HVA_ERR_BAD (-1UL)

View File

@ -30,11 +30,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
#if IS_ENABLED(CONFIG_KVM)
mm->context.has_pgste = 0;
mm->context.uses_skeys = 0;
mm->context.uses_cmm = 0;
mm->context.allow_cow_sharing = 1;
mm->context.allow_gmap_hpage_1m = 0;
#endif
switch (mm->context.asce_limit) {
default:

View File

@ -36,7 +36,6 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
#include <asm/tlbflush.h>
#include <asm-generic/tlb.h>
#include <asm/gmap.h>
/*
* Release the page cache reference for a pte removed by
@ -85,8 +84,6 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_pmds = 1;
if (mm_has_pgste(tlb->mm))
gmap_unlink(tlb->mm, (unsigned long *)pte, address);
tlb_remove_ptdesc(tlb, virt_to_ptdesc(pte));
}

View File

@ -471,65 +471,15 @@ do { \
#define arch_get_kernel_nofault __mvc_kernel_nofault
#define arch_put_kernel_nofault __mvc_kernel_nofault
void __cmpxchg_user_key_called_with_bad_pointer(void);
int __cmpxchg_user_key1(unsigned long address, unsigned char *uval,
unsigned char old, unsigned char new, unsigned long key);
int __cmpxchg_user_key2(unsigned long address, unsigned short *uval,
unsigned short old, unsigned short new, unsigned long key);
int __cmpxchg_user_key4(unsigned long address, unsigned int *uval,
unsigned int old, unsigned int new, unsigned long key);
int __cmpxchg_user_key8(unsigned long address, unsigned long *uval,
unsigned long old, unsigned long new, unsigned long key);
int __cmpxchg_user_key16(unsigned long address, __uint128_t *uval,
__uint128_t old, __uint128_t new, unsigned long key);
static __always_inline int _cmpxchg_user_key(unsigned long address, void *uval,
__uint128_t old, __uint128_t new,
unsigned long key, int size)
{
switch (size) {
case 1: return __cmpxchg_user_key1(address, uval, old, new, key);
case 2: return __cmpxchg_user_key2(address, uval, old, new, key);
case 4: return __cmpxchg_user_key4(address, uval, old, new, key);
case 8: return __cmpxchg_user_key8(address, uval, old, new, key);
case 16: return __cmpxchg_user_key16(address, uval, old, new, key);
default: __cmpxchg_user_key_called_with_bad_pointer();
}
return 0;
}
/**
* cmpxchg_user_key() - cmpxchg with user space target, honoring storage keys
* @ptr: User space address of value to compare to @old and exchange with
* @new. Must be aligned to sizeof(*@ptr).
* @uval: Address where the old value of *@ptr is written to.
* @old: Old value. Compared to the content pointed to by @ptr in order to
* determine if the exchange occurs. The old value read from *@ptr is
* written to *@uval.
* @new: New value to place at *@ptr.
* @key: Access key to use for checking storage key protection.
*
* Perform a cmpxchg on a user space target, honoring storage key protection.
* @key alone determines how key checking is performed, neither
* storage-protection-override nor fetch-protection-override apply.
* The caller must compare *@uval and @old to determine if values have been
* exchanged. In case of an exception *@uval is set to zero.
*
* Return: 0: cmpxchg executed
* -EFAULT: an exception happened when trying to access *@ptr
* -EAGAIN: maxed out number of retries (byte and short only)
*/
#define cmpxchg_user_key(ptr, uval, old, new, key) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(uval) __uval = (uval); \
\
BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval))); \
might_fault(); \
__chk_user_ptr(__ptr); \
_cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \
(old), (new), (key), sizeof(*(__ptr))); \
})
int __cmpxchg_key1(void *address, unsigned char *uval, unsigned char old,
unsigned char new, unsigned long key);
int __cmpxchg_key2(void *address, unsigned short *uval, unsigned short old,
unsigned short new, unsigned long key);
int __cmpxchg_key4(void *address, unsigned int *uval, unsigned int old,
unsigned int new, unsigned long key);
int __cmpxchg_key8(void *address, unsigned long *uval, unsigned long old,
unsigned long new, unsigned long key);
int __cmpxchg_key16(void *address, __uint128_t *uval, __uint128_t old,
__uint128_t new, unsigned long key);
#endif /* __S390_UACCESS_H */

View File

@ -631,7 +631,6 @@ int uv_pin_shared(unsigned long paddr);
int uv_destroy_folio(struct folio *folio);
int uv_destroy_pte(pte_t pte);
int uv_convert_from_secure_pte(pte_t pte);
int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb);
int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio);
int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb);
int uv_convert_from_secure(unsigned long paddr);

View File

@ -209,39 +209,6 @@ int uv_convert_from_secure_pte(pte_t pte)
return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte)));
}
/**
* should_export_before_import - Determine whether an export is needed
* before an import-like operation
* @uvcb: the Ultravisor control block of the UVC to be performed
* @mm: the mm of the process
*
* Returns whether an export is needed before every import-like operation.
* This is needed for shared pages, which don't trigger a secure storage
* exception when accessed from a different guest.
*
* Although considered as one, the Unpin Page UVC is not an actual import,
* so it is not affected.
*
* No export is needed also when there is only one protected VM, because the
* page cannot belong to the wrong VM in that case (there is no "other VM"
* it can belong to).
*
* Return: true if an export is needed before every import, otherwise false.
*/
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
{
/*
* The misc feature indicates, among other things, that importing a
* shared page from a different protected VM will automatically also
* transfer its ownership.
*/
if (uv_has_feature(BIT_UV_FEAT_MISC))
return false;
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
return false;
return atomic_read(&mm->context.protected_count) > 1;
}
/*
* Calculate the expected ref_count for a folio that would otherwise have no
* further pins. This was cribbed from similar functions in other places in
@ -313,20 +280,6 @@ int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
}
EXPORT_SYMBOL(__make_folio_secure);
static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb)
{
int rc;
if (!folio_trylock(folio))
return -EAGAIN;
if (should_export_before_import(uvcb, mm))
uv_convert_from_secure(folio_to_phys(folio));
rc = __make_folio_secure(folio, uvcb);
folio_unlock(folio);
return rc;
}
/**
* s390_wiggle_split_folio() - try to drain extra references to a folio and
* split the folio if it is large.
@ -414,56 +367,6 @@ int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
}
EXPORT_SYMBOL_GPL(s390_wiggle_split_folio);
int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb)
{
struct vm_area_struct *vma;
struct folio_walk fw;
struct folio *folio;
int rc;
mmap_read_lock(mm);
vma = vma_lookup(mm, hva);
if (!vma) {
mmap_read_unlock(mm);
return -EFAULT;
}
folio = folio_walk_start(&fw, vma, hva, 0);
if (!folio) {
mmap_read_unlock(mm);
return -ENXIO;
}
folio_get(folio);
/*
* Secure pages cannot be huge and userspace should not combine both.
* In case userspace does it anyway this will result in an -EFAULT for
* the unpack. The guest is thus never reaching secure mode.
* If userspace plays dirty tricks and decides to map huge pages at a
* later point in time, it will receive a segmentation fault or
* KVM_RUN will return -EFAULT.
*/
if (folio_test_hugetlb(folio))
rc = -EFAULT;
else if (folio_test_large(folio))
rc = -E2BIG;
else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID))
rc = -ENXIO;
else
rc = make_folio_secure(mm, folio, uvcb);
folio_walk_end(&fw, vma);
mmap_read_unlock(mm);
if (rc == -E2BIG || rc == -EBUSY) {
rc = s390_wiggle_split_folio(mm, folio);
if (!rc)
rc = -EAGAIN;
}
folio_put(folio);
return rc;
}
EXPORT_SYMBOL_GPL(make_hva_secure);
/*
* To be called with the folio locked or with an extra reference! This will
* prevent kvm_s390_pv_make_secure() from touching the folio concurrently.
@ -474,21 +377,18 @@ int arch_make_folio_accessible(struct folio *folio)
{
int rc = 0;
/* Large folios cannot be secure */
if (unlikely(folio_test_large(folio)))
return 0;
/*
* PG_arch_1 is used in 2 places:
* 1. for storage keys of hugetlb folios and KVM
* 2. As an indication that this small folio might be secure. This can
* overindicate, e.g. we set the bit before calling
* convert_to_secure.
* As secure pages are never large folios, both variants can co-exists.
* PG_arch_1 is used as an indication that this small folio might be
* secure. This can overindicate, e.g. we set the bit before calling
* convert_to_secure.
*/
if (!test_bit(PG_arch_1, &folio->flags.f))
return 0;
/* Large folios cannot be secure. */
if (WARN_ON_ONCE(folio_test_large(folio)))
return -EFAULT;
rc = uv_pin_shared(folio_to_phys(folio));
if (!rc) {
clear_bit(PG_arch_1, &folio->flags.f);

View File

@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
kvm-y += dat.o gmap.o faultin.o
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o

View File

@ -10,13 +10,13 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/gmap.h>
#include <asm/gmap_helpers.h>
#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
#include "trace.h"
#include "trace-s390.h"
#include "gaccess.h"
#include "gmap.h"
static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end)
{

File diff suppressed because it is too large Load Diff

View File

@ -206,7 +206,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
void *data, unsigned long len, enum gacc_mode mode);
int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, union kvm_s390_quad *old_addr,
int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, union kvm_s390_quad *old,
union kvm_s390_quad new, u8 access_key, bool *success);
/**
@ -450,11 +450,17 @@ void ipte_unlock(struct kvm *kvm);
int ipte_lock_held(struct kvm *kvm);
int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
/* MVPG PEI indication bits */
#define PEI_DAT_PROT 2
#define PEI_NOT_PTE 4
union mvpg_pei {
unsigned long val;
struct {
unsigned long addr : 61;
unsigned long not_pte : 1;
unsigned long dat_prot: 1;
unsigned long real : 1;
};
};
int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
unsigned long saddr, unsigned long *datptr);
int gaccess_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, gpa_t saddr,
union mvpg_pei *datptr, bool wr);
#endif /* __KVM_S390_GACCESS_H */

View File

@ -1,141 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Guest memory management for KVM/s390 nested VMs.
*
* Copyright IBM Corp. 2008, 2020, 2024
*
* Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
* Martin Schwidefsky <schwidefsky@de.ibm.com>
* David Hildenbrand <david@redhat.com>
* Janosch Frank <frankja@linux.vnet.ibm.com>
*/
#include <linux/compiler.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/pgtable.h>
#include <linux/pagemap.h>
#include <linux/mman.h>
#include <asm/lowcore.h>
#include <asm/gmap.h>
#include <asm/uv.h>
#include "kvm-s390.h"
/**
* gmap_find_shadow - find a specific asce in the list of shadow tables
* @parent: pointer to the parent gmap
* @asce: ASCE for which the shadow table is created
* @edat_level: edat level to be used for the shadow translation
*
* Returns the pointer to a gmap if a shadow table with the given asce is
* already available, ERR_PTR(-EAGAIN) if another one is just being created,
* otherwise NULL
*
* Context: Called with parent->shadow_lock held
*/
static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level)
{
struct gmap *sg;
lockdep_assert_held(&parent->shadow_lock);
list_for_each_entry(sg, &parent->children, list) {
if (!gmap_shadow_valid(sg, asce, edat_level))
continue;
if (!sg->initialized)
return ERR_PTR(-EAGAIN);
refcount_inc(&sg->ref_count);
return sg;
}
return NULL;
}
/**
* gmap_shadow - create/find a shadow guest address space
* @parent: pointer to the parent gmap
* @asce: ASCE for which the shadow table is created
* @edat_level: edat level to be used for the shadow translation
*
* The pages of the top level page table referred by the asce parameter
* will be set to read-only and marked in the PGSTEs of the kvm process.
* The shadow table will be removed automatically on any change to the
* PTE mapping for the source table.
*
* Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
* ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
* parent gmap table could not be protected.
*/
struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level)
{
struct gmap *sg, *new;
unsigned long limit;
int rc;
if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) ||
KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private))
return ERR_PTR(-EFAULT);
spin_lock(&parent->shadow_lock);
sg = gmap_find_shadow(parent, asce, edat_level);
spin_unlock(&parent->shadow_lock);
if (sg)
return sg;
/* Create a new shadow gmap */
limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
if (asce & _ASCE_REAL_SPACE)
limit = -1UL;
new = gmap_alloc(limit);
if (!new)
return ERR_PTR(-ENOMEM);
new->mm = parent->mm;
new->parent = gmap_get(parent);
new->private = parent->private;
new->orig_asce = asce;
new->edat_level = edat_level;
new->initialized = false;
spin_lock(&parent->shadow_lock);
/* Recheck if another CPU created the same shadow */
sg = gmap_find_shadow(parent, asce, edat_level);
if (sg) {
spin_unlock(&parent->shadow_lock);
gmap_free(new);
return sg;
}
if (asce & _ASCE_REAL_SPACE) {
/* only allow one real-space gmap shadow */
list_for_each_entry(sg, &parent->children, list) {
if (sg->orig_asce & _ASCE_REAL_SPACE) {
spin_lock(&sg->guest_table_lock);
gmap_unshadow(sg);
spin_unlock(&sg->guest_table_lock);
list_del(&sg->list);
gmap_put(sg);
break;
}
}
}
refcount_set(&new->ref_count, 2);
list_add(&new->list, &parent->children);
if (asce & _ASCE_REAL_SPACE) {
/* nothing to protect, return right away */
new->initialized = true;
spin_unlock(&parent->shadow_lock);
return new;
}
spin_unlock(&parent->shadow_lock);
/* protect after insertion, so it will get properly invalidated */
mmap_read_lock(parent->mm);
rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN,
((asce & _ASCE_TABLE_LENGTH) + 1),
PROT_READ, GMAP_NOTIFY_SHADOW);
mmap_read_unlock(parent->mm);
spin_lock(&parent->shadow_lock);
new->initialized = true;
if (rc) {
list_del(&new->list);
gmap_free(new);
new = ERR_PTR(rc);
}
spin_unlock(&parent->shadow_lock);
return new;
}

View File

@ -21,6 +21,7 @@
#include "gaccess.h"
#include "trace.h"
#include "trace-s390.h"
#include "faultin.h"
u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
{
@ -367,8 +368,11 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
reg2, &srcaddr, GACC_FETCH, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0);
if (rc != 0)
do {
rc = kvm_s390_faultin_gfn_simple(vcpu, NULL, gpa_to_gfn(srcaddr), false);
} while (rc == -EAGAIN);
if (rc)
return rc;
/* Ensure that the source is paged-in, no actual access -> no key checking */
@ -376,8 +380,11 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
reg1, &dstaddr, GACC_STORE, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE);
if (rc != 0)
do {
rc = kvm_s390_faultin_gfn_simple(vcpu, NULL, gpa_to_gfn(dstaddr), true);
} while (rc == -EAGAIN);
if (rc)
return rc;
kvm_s390_retry_instr(vcpu);

View File

@ -26,7 +26,6 @@
#include <linux/uaccess.h>
#include <asm/sclp.h>
#include <asm/isc.h>
#include <asm/gmap.h>
#include <asm/nmi.h>
#include <asm/airq.h>
#include <asm/tpi.h>
@ -34,6 +33,7 @@
#include "gaccess.h"
#include "trace-s390.h"
#include "pci.h"
#include "gmap.h"
#define PFAULT_INIT 0x0600
#define PFAULT_DONE 0x0680
@ -2632,12 +2632,12 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
case KVM_DEV_FLIC_APF_ENABLE:
if (kvm_is_ucontrol(dev->kvm))
return -EINVAL;
dev->kvm->arch.gmap->pfault_enabled = 1;
set_bit(GMAP_FLAG_PFAULT_ENABLED, &dev->kvm->arch.gmap->flags);
break;
case KVM_DEV_FLIC_APF_DISABLE_WAIT:
if (kvm_is_ucontrol(dev->kvm))
return -EINVAL;
dev->kvm->arch.gmap->pfault_enabled = 0;
clear_bit(GMAP_FLAG_PFAULT_ENABLED, &dev->kvm->arch.gmap->flags);
/*
* Make sure no async faults are in transition when
* clearing the queues. So we don't need to worry

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,8 @@
#include <asm/facility.h>
#include <asm/processor.h>
#include <asm/sclp.h>
#include "dat.h"
#include "gmap.h"
#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
@ -114,9 +116,7 @@ static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
static inline int kvm_is_ucontrol(struct kvm *kvm)
{
#ifdef CONFIG_KVM_S390_UCONTROL
if (kvm->arch.gmap)
return 0;
return 1;
return test_bit(GMAP_FLAG_IS_UCONTROL, &kvm->arch.gmap->flags);
#else
return 0;
#endif
@ -440,14 +440,9 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu);
/* implemented in vsie.c */
int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu);
void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu);
void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, gpa_t start, gpa_t end);
void kvm_s390_vsie_init(struct kvm *kvm);
void kvm_s390_vsie_destroy(struct kvm *kvm);
int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
/* implemented in gmap-vsie.c */
struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
/* implemented in sigp.c */
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
@ -469,15 +464,9 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags);
int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
unsigned long bits);
static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags)
{
return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags);
}
bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu);
/* implemented in diag.c */

View File

@ -21,13 +21,14 @@
#include <asm/ebcdic.h>
#include <asm/sysinfo.h>
#include <asm/page-states.h>
#include <asm/gmap.h>
#include <asm/ptrace.h>
#include <asm/sclp.h>
#include <asm/ap.h>
#include <asm/gmap_helpers.h>
#include "gaccess.h"
#include "kvm-s390.h"
#include "trace.h"
#include "gmap.h"
static int handle_ri(struct kvm_vcpu *vcpu)
{
@ -222,7 +223,7 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
if (vcpu->arch.skey_enabled)
return 0;
rc = s390_enable_skey();
rc = gmap_enable_skeys(vcpu->arch.gmap);
VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
if (rc)
return rc;
@ -255,10 +256,9 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
static int handle_iske(struct kvm_vcpu *vcpu)
{
unsigned long gaddr, vmaddr;
unsigned char key;
unsigned long gaddr;
int reg1, reg2;
bool unlocked;
union skey key;
int rc;
vcpu->stat.instruction_iske++;
@ -275,37 +275,21 @@ static int handle_iske(struct kvm_vcpu *vcpu)
gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
retry:
unlocked = false;
mmap_read_lock(current->mm);
rc = get_guest_storage_key(current->mm, vmaddr, &key);
if (rc) {
rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
mmap_read_unlock(current->mm);
goto retry;
}
}
mmap_read_unlock(current->mm);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
rc = dat_get_storage_key(vcpu->arch.gmap->asce, gpa_to_gfn(gaddr), &key);
if (rc > 0)
return kvm_s390_inject_program_int(vcpu, rc);
if (rc < 0)
return rc;
vcpu->run->s.regs.gprs[reg1] &= ~0xff;
vcpu->run->s.regs.gprs[reg1] |= key;
vcpu->run->s.regs.gprs[reg1] |= key.skey;
return 0;
}
static int handle_rrbe(struct kvm_vcpu *vcpu)
{
unsigned long vmaddr, gaddr;
unsigned long gaddr;
int reg1, reg2;
bool unlocked;
int rc;
vcpu->stat.instruction_rrbe++;
@ -322,24 +306,10 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
retry:
unlocked = false;
mmap_read_lock(current->mm);
rc = reset_guest_reference_bit(current->mm, vmaddr);
if (rc < 0) {
rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
mmap_read_unlock(current->mm);
goto retry;
}
}
mmap_read_unlock(current->mm);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
rc = dat_reset_reference_bit(vcpu->arch.gmap->asce, gpa_to_gfn(gaddr));
if (rc > 0)
return kvm_s390_inject_program_int(vcpu, rc);
if (rc < 0)
return rc;
kvm_s390_set_psw_cc(vcpu, rc);
@ -354,9 +324,8 @@ static int handle_sske(struct kvm_vcpu *vcpu)
{
unsigned char m3 = vcpu->arch.sie_block->ipb >> 28;
unsigned long start, end;
unsigned char key, oldkey;
union skey key, oldkey;
int reg1, reg2;
bool unlocked;
int rc;
vcpu->stat.instruction_sske++;
@ -377,7 +346,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
key = vcpu->run->s.regs.gprs[reg1] & 0xfe;
key.skey = vcpu->run->s.regs.gprs[reg1] & 0xfe;
start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
start = kvm_s390_logical_to_effective(vcpu, start);
if (m3 & SSKE_MB) {
@ -389,27 +358,17 @@ static int handle_sske(struct kvm_vcpu *vcpu)
}
while (start != end) {
unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
unlocked = false;
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
mmap_read_lock(current->mm);
rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
m3 & SSKE_NQ, m3 & SSKE_MR,
m3 & SSKE_MC);
if (rc < 0) {
rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
scoped_guard(read_lock, &vcpu->kvm->mmu_lock) {
rc = dat_cond_set_storage_key(vcpu->arch.mc, vcpu->arch.gmap->asce,
gpa_to_gfn(start), key, &oldkey,
m3 & SSKE_NQ, m3 & SSKE_MR, m3 & SSKE_MC);
}
mmap_read_unlock(current->mm);
if (rc == -EFAULT)
if (rc > 1)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc == -EAGAIN)
if (rc == -ENOMEM) {
kvm_s390_mmu_cache_topup(vcpu->arch.mc);
continue;
}
if (rc < 0)
return rc;
start += PAGE_SIZE;
@ -422,7 +381,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
} else {
kvm_s390_set_psw_cc(vcpu, rc);
vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL;
vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8;
vcpu->run->s.regs.gprs[reg1] |= (u64)oldkey.skey << 8;
}
}
if (m3 & SSKE_MB) {
@ -1082,7 +1041,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
bool mr = false, mc = false, nq;
int reg1, reg2;
unsigned long start, end;
unsigned char key;
union skey key;
vcpu->stat.instruction_pfmf++;
@ -1110,7 +1069,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
}
nq = vcpu->run->s.regs.gprs[reg1] & PFMF_NQ;
key = vcpu->run->s.regs.gprs[reg1] & PFMF_KEY;
key.skey = vcpu->run->s.regs.gprs[reg1] & PFMF_KEY;
start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
start = kvm_s390_logical_to_effective(vcpu, start);
@ -1141,14 +1100,6 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
}
while (start != end) {
unsigned long vmaddr;
bool unlocked = false;
/* Translate guest address to host address */
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
if (kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
@ -1159,19 +1110,17 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
if (rc)
return rc;
mmap_read_lock(current->mm);
rc = cond_set_guest_storage_key(current->mm, vmaddr,
key, NULL, nq, mr, mc);
if (rc < 0) {
rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
scoped_guard(read_lock, &vcpu->kvm->mmu_lock) {
rc = dat_cond_set_storage_key(vcpu->arch.mc, vcpu->arch.gmap->asce,
gpa_to_gfn(start), key,
NULL, nq, mr, mc);
}
mmap_read_unlock(current->mm);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc == -EAGAIN)
if (rc > 1)
return kvm_s390_inject_program_int(vcpu, rc);
if (rc == -ENOMEM) {
kvm_s390_mmu_cache_topup(vcpu->arch.mc);
continue;
}
if (rc < 0)
return rc;
}
@ -1195,8 +1144,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
{
int r1, r2, nappended, entries;
unsigned long gfn, hva, res, pgstev, ptev;
union essa_state state;
unsigned long *cbrlo;
unsigned long gfn;
bool dirtied;
/*
* We don't need to set SD.FPF.SK to 1 here, because if we have a
@ -1205,33 +1156,12 @@ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
kvm_s390_get_regs_rre(vcpu, &r1, &r2);
gfn = vcpu->run->s.regs.gprs[r2] >> PAGE_SHIFT;
hva = gfn_to_hva(vcpu->kvm, gfn);
entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
if (kvm_is_error_hva(hva))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
nappended = pgste_perform_essa(vcpu->kvm->mm, hva, orc, &ptev, &pgstev);
if (nappended < 0) {
res = orc ? 0x10 : 0;
vcpu->run->s.regs.gprs[r1] = res; /* Exception Indication */
nappended = dat_perform_essa(vcpu->arch.gmap->asce, gfn, orc, &state, &dirtied);
vcpu->run->s.regs.gprs[r1] = state.val;
if (nappended < 0)
return 0;
}
res = (pgstev & _PGSTE_GPS_USAGE_MASK) >> 22;
/*
* Set the block-content state part of the result. 0 means resident, so
* nothing to do if the page is valid. 2 is for preserved pages
* (non-present and non-zero), and 3 for zero pages (non-present and
* zero).
*/
if (ptev & _PAGE_INVALID) {
res |= 2;
if (pgstev & _PGSTE_GPS_ZERO)
res |= 1;
}
if (pgstev & _PGSTE_GPS_NODAT)
res |= 0x20;
vcpu->run->s.regs.gprs[r1] = res;
/*
* It is possible that all the normal 511 slots were full, in which case
* we will now write in the 512th slot, which is reserved for host use.
@ -1243,17 +1173,34 @@ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
cbrlo[entries] = gfn << PAGE_SHIFT;
}
if (orc) {
struct kvm_memory_slot *ms = gfn_to_memslot(vcpu->kvm, gfn);
/* Increment only if we are really flipping the bit */
if (ms && !test_and_set_bit(gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages);
}
if (dirtied)
atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages);
return nappended;
}
static void _essa_clear_cbrl(struct kvm_vcpu *vcpu, unsigned long *cbrl, int len)
{
union crste *crstep;
union pgste pgste;
union pte *ptep;
int i;
lockdep_assert_held(&vcpu->kvm->mmu_lock);
for (i = 0; i < len; i++) {
if (dat_entry_walk(NULL, gpa_to_gfn(cbrl[i]), vcpu->arch.gmap->asce,
0, TABLE_TYPE_PAGE_TABLE, &crstep, &ptep))
continue;
if (!ptep || ptep->s.pr)
continue;
pgste = pgste_get_lock(ptep);
if (pgste.usage == PGSTE_GPS_USAGE_UNUSED || pgste.zero)
gmap_helper_zap_one_page(vcpu->kvm->mm, cbrl[i]);
pgste_set_unlock(ptep, pgste);
}
}
static int handle_essa(struct kvm_vcpu *vcpu)
{
lockdep_assert_held(&vcpu->kvm->srcu);
@ -1261,11 +1208,9 @@ static int handle_essa(struct kvm_vcpu *vcpu)
/* entries expected to be 1FF */
int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
unsigned long *cbrlo;
struct gmap *gmap;
int i, orc;
VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
gmap = vcpu->arch.gmap;
vcpu->stat.instruction_essa++;
if (!vcpu->kvm->arch.use_cmma)
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
@ -1289,11 +1234,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
* value really needs to be written to; if the value is
* already correct, we do nothing and avoid the lock.
*/
if (vcpu->kvm->mm->context.uses_cmm == 0) {
mmap_write_lock(vcpu->kvm->mm);
vcpu->kvm->mm->context.uses_cmm = 1;
mmap_write_unlock(vcpu->kvm->mm);
}
set_bit(GMAP_FLAG_USES_CMM, &vcpu->arch.gmap->flags);
/*
* If we are here, we are supposed to have CMMA enabled in
* the SIE block. Enabling CMMA works on a per-CPU basis,
@ -1307,20 +1248,22 @@ static int handle_essa(struct kvm_vcpu *vcpu)
/* Retry the ESSA instruction */
kvm_s390_retry_instr(vcpu);
} else {
mmap_read_lock(vcpu->kvm->mm);
i = __do_essa(vcpu, orc);
mmap_read_unlock(vcpu->kvm->mm);
scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
i = __do_essa(vcpu, orc);
if (i < 0)
return i;
/* Account for the possible extra cbrl entry */
entries += i;
}
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
/* reset nceo */
vcpu->arch.sie_block->cbrlo &= PAGE_MASK;
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
mmap_read_lock(gmap->mm);
for (i = 0; i < entries; ++i)
__gmap_zap(gmap, cbrlo[i]);
mmap_read_unlock(gmap->mm);
mmap_read_lock(vcpu->kvm->mm);
scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
_essa_clear_cbrl(vcpu, cbrlo, entries);
mmap_read_unlock(vcpu->kvm->mm);
return 0;
}

View File

@ -12,13 +12,16 @@
#include <linux/minmax.h>
#include <linux/pagemap.h>
#include <linux/sched/signal.h>
#include <asm/gmap.h>
#include <asm/uv.h>
#include <asm/mman.h>
#include <linux/pagewalk.h>
#include <linux/sched/mm.h>
#include <linux/mmu_notifier.h>
#include "kvm-s390.h"
#include "dat.h"
#include "gaccess.h"
#include "gmap.h"
#include "faultin.h"
bool kvm_s390_pv_is_protected(struct kvm *kvm)
{
@ -34,6 +37,85 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
/**
* should_export_before_import() - Determine whether an export is needed
* before an import-like operation.
* @uvcb: The Ultravisor control block of the UVC to be performed.
* @mm: The mm of the process.
*
* Returns whether an export is needed before every import-like operation.
* This is needed for shared pages, which don't trigger a secure storage
* exception when accessed from a different guest.
*
* Although considered as one, the Unpin Page UVC is not an actual import,
* so it is not affected.
*
* No export is needed also when there is only one protected VM, because the
* page cannot belong to the wrong VM in that case (there is no "other VM"
* it can belong to).
*
* Return: %true if an export is needed before every import, otherwise %false.
*/
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
{
/*
* The misc feature indicates, among other things, that importing a
* shared page from a different protected VM will automatically also
* transfer its ownership.
*/
if (uv_has_feature(BIT_UV_FEAT_MISC))
return false;
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
return false;
return atomic_read(&mm->context.protected_count) > 1;
}
struct pv_make_secure {
void *uvcb;
struct folio *folio;
int rc;
bool needs_export;
};
static int __kvm_s390_pv_make_secure(struct guest_fault *f, struct folio *folio)
{
struct pv_make_secure *priv = f->priv;
int rc;
if (priv->needs_export)
uv_convert_from_secure(folio_to_phys(folio));
if (folio_test_hugetlb(folio))
return -EFAULT;
if (folio_test_large(folio))
return -E2BIG;
if (!f->page)
folio_get(folio);
rc = __make_folio_secure(folio, priv->uvcb);
if (!f->page)
folio_put(folio);
return rc;
}
static void _kvm_s390_pv_make_secure(struct guest_fault *f)
{
struct pv_make_secure *priv = f->priv;
struct folio *folio;
folio = pfn_folio(f->pfn);
priv->rc = -EAGAIN;
if (folio_trylock(folio)) {
priv->rc = __kvm_s390_pv_make_secure(f, folio);
if (priv->rc == -E2BIG || priv->rc == -EBUSY) {
priv->folio = folio;
folio_get(folio);
}
folio_unlock(folio);
}
}
/**
* kvm_s390_pv_make_secure() - make one guest page secure
* @kvm: the guest
@ -45,14 +127,34 @@ EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
*/
int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
{
unsigned long vmaddr;
struct pv_make_secure priv = { .uvcb = uvcb };
struct guest_fault f = {
.write_attempt = true,
.gfn = gpa_to_gfn(gaddr),
.callback = _kvm_s390_pv_make_secure,
.priv = &priv,
};
int rc;
lockdep_assert_held(&kvm->srcu);
vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
if (kvm_is_error_hva(vmaddr))
return -EFAULT;
return make_hva_secure(kvm->mm, vmaddr, uvcb);
priv.needs_export = should_export_before_import(uvcb, kvm->mm);
scoped_guard(mutex, &kvm->arch.pv.import_lock) {
rc = kvm_s390_faultin_gfn(NULL, kvm, &f);
if (!rc) {
rc = priv.rc;
if (priv.folio) {
rc = s390_wiggle_split_folio(kvm->mm, priv.folio);
if (!rc)
rc = -EAGAIN;
}
}
}
if (priv.folio)
folio_put(priv.folio);
return rc;
}
int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
@ -299,35 +401,6 @@ done_fast:
return 0;
}
/**
* kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory.
* @kvm: the VM whose memory is to be cleared.
*
* Destroy the first 2GB of guest memory, to avoid prefix issues after reboot.
* The CPUs of the protected VM need to be destroyed beforehand.
*/
static void kvm_s390_destroy_lower_2g(struct kvm *kvm)
{
const unsigned long pages_2g = SZ_2G / PAGE_SIZE;
struct kvm_memory_slot *slot;
unsigned long len;
int srcu_idx;
srcu_idx = srcu_read_lock(&kvm->srcu);
/* Take the memslot containing guest absolute address 0 */
slot = gfn_to_memslot(kvm, 0);
/* Clear all slots or parts thereof that are below 2GB */
while (slot && slot->base_gfn < pages_2g) {
len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE;
s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len);
/* Take the next memslot */
slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages);
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
}
static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
{
struct uv_cb_destroy_fast uvcb = {
@ -342,7 +415,6 @@ static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
*rc = uvcb.header.rc;
if (rrc)
*rrc = uvcb.header.rrc;
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
uvcb.header.rc, uvcb.header.rrc);
WARN_ONCE(cc && uvcb.header.rc != 0x104,
@ -391,7 +463,7 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
return -EINVAL;
/* Guest with segment type ASCE, refuse to destroy asynchronously */
if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
if (kvm->arch.gmap->asce.dt == TABLE_TYPE_SEGMENT)
return -EINVAL;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
@ -404,8 +476,7 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
priv->stor_var = kvm->arch.pv.stor_var;
priv->stor_base = kvm->arch.pv.stor_base;
priv->handle = kvm_s390_pv_get_handle(kvm);
priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table;
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
priv->old_gmap_table = (unsigned long)dereference_asce(kvm->arch.gmap->asce);
if (s390_replace_asce(kvm->arch.gmap))
res = -ENOMEM;
}
@ -415,7 +486,7 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
return res;
}
kvm_s390_destroy_lower_2g(kvm);
gmap_pv_destroy_range(kvm->arch.gmap, 0, gpa_to_gfn(SZ_2G), false);
kvm_s390_clear_pv_state(kvm);
kvm->arch.pv.set_aside = priv;
@ -449,7 +520,6 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
if (!cc) {
atomic_dec(&kvm->mm->context.protected_count);
kvm_s390_pv_dealloc_vm(kvm);
@ -532,7 +602,7 @@ int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
* cleanup has been performed.
*/
if (need_zap && mmget_not_zero(kvm->mm)) {
s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), false);
mmput(kvm->mm);
}
@ -570,7 +640,7 @@ int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
return -EINVAL;
/* When a fatal signal is received, stop immediately */
if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX))
if (gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), true))
goto done;
if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
ret = -EIO;
@ -609,6 +679,7 @@ static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
set_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &kvm->arch.gmap->flags);
}
static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
@ -642,7 +713,7 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
/* Inputs */
uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
uvcb.guest_stor_len = kvm->arch.pv.guest_len;
uvcb.guest_asce = kvm->arch.gmap->asce;
uvcb.guest_asce = kvm->arch.gmap->asce.val;
uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
uvcb.conf_base_stor_origin =
virt_to_phys((void *)kvm->arch.pv.stor_base);
@ -669,7 +740,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
}
return -EIO;
}
kvm->arch.gmap->guest_handle = uvcb.guest_handle;
return 0;
}
@ -704,26 +774,14 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
.tweak[1] = offset,
};
int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
unsigned long vmaddr;
bool unlocked;
*rc = uvcb.header.rc;
*rrc = uvcb.header.rrc;
if (ret == -ENXIO) {
mmap_read_lock(kvm->mm);
vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr));
if (kvm_is_error_hva(vmaddr)) {
ret = -EFAULT;
} else {
ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
if (!ret)
ret = __gmap_link(kvm->arch.gmap, addr, vmaddr);
}
mmap_read_unlock(kvm->mm);
ret = kvm_s390_faultin_gfn_simple(NULL, kvm, gpa_to_gfn(addr), true);
if (!ret)
return -EAGAIN;
return ret;
}
if (ret && ret != -EAGAIN)

View File

@ -15,7 +15,6 @@
#include <linux/io.h>
#include <linux/mman.h>
#include <asm/gmap.h>
#include <asm/mmu_context.h>
#include <asm/sclp.h>
#include <asm/nmi.h>
@ -23,6 +22,7 @@
#include <asm/facility.h>
#include "kvm-s390.h"
#include "gaccess.h"
#include "gmap.h"
enum vsie_page_flags {
VSIE_PAGE_IN_USE = 0,
@ -41,8 +41,11 @@ struct vsie_page {
* are reused conditionally, should be accessed via READ_ONCE.
*/
struct kvm_s390_sie_block *scb_o; /* 0x0218 */
/* the shadow gmap in use by the vsie_page */
struct gmap *gmap; /* 0x0220 */
/*
* Flags: must be set/cleared atomically after the vsie page can be
* looked up by other CPUs.
*/
unsigned long flags; /* 0x0220 */
/* address of the last reported fault to guest2 */
unsigned long fault_addr; /* 0x0228 */
/* calculated guest addresses of satellite control blocks */
@ -57,33 +60,14 @@ struct vsie_page {
* radix tree.
*/
gpa_t scb_gpa; /* 0x0258 */
/*
* Flags: must be set/cleared atomically after the vsie page can be
* looked up by other CPUs.
*/
unsigned long flags; /* 0x0260 */
__u8 reserved[0x0700 - 0x0268]; /* 0x0268 */
/* the shadow gmap in use by the vsie_page */
struct gmap_cache gmap_cache; /* 0x0260 */
__u8 reserved[0x0700 - 0x0278]; /* 0x0278 */
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
};
/**
* gmap_shadow_valid() - check if a shadow guest address space matches the
* given properties and is still valid
* @sg: pointer to the shadow guest address space structure
* @asce: ASCE for which the shadow table is requested
* @edat_level: edat level to be used for the shadow translation
*
* Returns 1 if the gmap shadow is still valid and matches the given
* properties, the caller can continue using it. Returns 0 otherwise; the
* caller has to request a new shadow gmap in this case.
*/
int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
{
if (sg->removed)
return 0;
return sg->orig_asce == asce && sg->edat_level == edat_level;
}
static_assert(sizeof(struct vsie_page) == PAGE_SIZE);
/* trigger a validity icpt for the given scb */
static int set_validity_icpt(struct kvm_s390_sie_block *scb,
@ -612,26 +596,17 @@ out:
return rc;
}
void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end)
void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, gpa_t start, gpa_t end)
{
struct kvm *kvm = gmap->private;
struct vsie_page *cur;
struct vsie_page *cur, *next;
unsigned long prefix;
int i;
if (!gmap_is_shadow(gmap))
return;
KVM_BUG_ON(!test_bit(GMAP_FLAG_SHADOW, &gmap->flags), gmap->kvm);
/*
* Only new shadow blocks are added to the list during runtime,
* therefore we can safely reference them all the time.
*/
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
cur = READ_ONCE(kvm->arch.vsie.pages[i]);
if (!cur)
continue;
if (READ_ONCE(cur->gmap) != gmap)
continue;
list_for_each_entry_safe(cur, next, &gmap->scb_users, gmap_cache.list) {
prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
/* with mso/msl, the prefix lies at an offset */
prefix += cur->scb_s.mso;
@ -667,9 +642,9 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struct
/* with mso/msl, the prefix lies at offset *mso* */
prefix += scb_s->mso;
rc = kvm_s390_shadow_fault(vcpu, sg, prefix, NULL);
rc = gaccess_shadow_fault(vcpu, sg, prefix, NULL, true);
if (!rc && (scb_s->ecb & ECB_TE))
rc = kvm_s390_shadow_fault(vcpu, sg, prefix + PAGE_SIZE, NULL);
rc = gaccess_shadow_fault(vcpu, sg, prefix + PAGE_SIZE, NULL, true);
/*
* We don't have to mprotect, we will be called for all unshadows.
* SIE will detect if protection applies and trigger a validity.
@ -952,6 +927,7 @@ static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
*/
static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struct gmap *sg)
{
bool wr = kvm_s390_cur_gmap_fault_is_write();
int rc;
if ((current->thread.gmap_int_code & PGM_INT_CODE_MASK) == PGM_PROTECTION)
@ -959,11 +935,10 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, stru
return inject_fault(vcpu, PGM_PROTECTION,
current->thread.gmap_teid.addr * PAGE_SIZE, 1);
rc = kvm_s390_shadow_fault(vcpu, sg, current->thread.gmap_teid.addr * PAGE_SIZE, NULL);
rc = gaccess_shadow_fault(vcpu, sg, current->thread.gmap_teid.addr * PAGE_SIZE, NULL, wr);
if (rc > 0) {
rc = inject_fault(vcpu, rc,
current->thread.gmap_teid.addr * PAGE_SIZE,
kvm_s390_cur_gmap_fault_is_write());
current->thread.gmap_teid.addr * PAGE_SIZE, wr);
if (rc >= 0)
vsie_page->fault_addr = current->thread.gmap_teid.addr * PAGE_SIZE;
}
@ -979,7 +954,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, stru
static void handle_last_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struct gmap *sg)
{
if (vsie_page->fault_addr)
kvm_s390_shadow_fault(vcpu, sg, vsie_page->fault_addr, NULL);
gaccess_shadow_fault(vcpu, sg, vsie_page->fault_addr, NULL, true);
vsie_page->fault_addr = 0;
}
@ -1064,8 +1039,9 @@ static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struct gmap *sg)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
unsigned long pei_dest, pei_src, src, dest, mask, prefix;
unsigned long src, dest, mask, prefix;
u64 *pei_block = &vsie_page->scb_o->mcic;
union mvpg_pei pei_dest, pei_src;
int edat, rc_dest, rc_src;
union ctlreg0 cr0;
@ -1079,8 +1055,8 @@ static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask;
src = _kvm_s390_real_to_abs(prefix, src) + scb_s->mso;
rc_dest = kvm_s390_shadow_fault(vcpu, sg, dest, &pei_dest);
rc_src = kvm_s390_shadow_fault(vcpu, sg, src, &pei_src);
rc_dest = gaccess_shadow_fault(vcpu, sg, dest, &pei_dest, true);
rc_src = gaccess_shadow_fault(vcpu, sg, src, &pei_src, false);
/*
* Either everything went well, or something non-critical went wrong
* e.g. because of a race. In either case, simply retry.
@ -1115,8 +1091,8 @@ static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
rc_src = rc_src != PGM_PAGE_TRANSLATION ? rc_src : 0;
}
if (!rc_dest && !rc_src) {
pei_block[0] = pei_dest;
pei_block[1] = pei_src;
pei_block[0] = pei_dest.val;
pei_block[1] = pei_src.val;
return 1;
}
@ -1187,7 +1163,7 @@ xfer_to_guest_mode_check:
goto xfer_to_guest_mode_check;
}
guest_timing_enter_irqoff();
rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, sg->asce);
rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, sg->asce.val);
guest_timing_exit_irqoff();
local_irq_enable();
}
@ -1237,43 +1213,63 @@ skip_sie:
static void release_gmap_shadow(struct vsie_page *vsie_page)
{
if (vsie_page->gmap)
gmap_put(vsie_page->gmap);
WRITE_ONCE(vsie_page->gmap, NULL);
struct gmap *gmap = vsie_page->gmap_cache.gmap;
lockdep_assert_held(&gmap->kvm->arch.gmap->children_lock);
list_del(&vsie_page->gmap_cache.list);
vsie_page->gmap_cache.gmap = NULL;
prefix_unmapped(vsie_page);
if (list_empty(&gmap->scb_users)) {
gmap_remove_child(gmap);
gmap_put(gmap);
}
}
static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
struct vsie_page *vsie_page)
static struct gmap *acquire_gmap_shadow(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
unsigned long asce;
union ctlreg0 cr0;
struct gmap *gmap;
union asce asce;
int edat;
asce = vcpu->arch.sie_block->gcr[1];
asce.val = vcpu->arch.sie_block->gcr[1];
cr0.val = vcpu->arch.sie_block->gcr[0];
edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
edat += edat && test_kvm_facility(vcpu->kvm, 78);
/*
* ASCE or EDAT could have changed since last icpt, or the gmap
* we're holding has been unshadowed. If the gmap is still valid,
* we can safely reuse it.
*/
if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) {
vcpu->kvm->stat.gmap_shadow_reuse++;
return 0;
scoped_guard(spinlock, &vcpu->kvm->arch.gmap->children_lock) {
gmap = vsie_page->gmap_cache.gmap;
if (gmap) {
/*
* ASCE or EDAT could have changed since last icpt, or the gmap
* we're holding has been unshadowed. If the gmap is still valid,
* we can safely reuse it.
*/
if (gmap_is_shadow_valid(gmap, asce, edat)) {
vcpu->kvm->stat.gmap_shadow_reuse++;
gmap_get(gmap);
return gmap;
}
/* release the old shadow and mark the prefix as unmapped */
release_gmap_shadow(vsie_page);
}
}
/* release the old shadow - if any, and mark the prefix as unmapped */
release_gmap_shadow(vsie_page);
gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
gmap = gmap_create_shadow(vcpu->arch.mc, vcpu->kvm->arch.gmap, asce, edat);
if (IS_ERR(gmap))
return PTR_ERR(gmap);
vcpu->kvm->stat.gmap_shadow_create++;
WRITE_ONCE(vsie_page->gmap, gmap);
return 0;
return gmap;
scoped_guard(spinlock, &vcpu->kvm->arch.gmap->children_lock) {
/* unlikely race condition, remove the previous shadow */
if (vsie_page->gmap_cache.gmap)
release_gmap_shadow(vsie_page);
vcpu->kvm->stat.gmap_shadow_create++;
list_add(&vsie_page->gmap_cache.list, &gmap->scb_users);
vsie_page->gmap_cache.gmap = gmap;
prefix_unmapped(vsie_page);
gmap_get(gmap);
}
return gmap;
}
/*
@ -1330,8 +1326,11 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
int rc = 0;
while (1) {
rc = acquire_gmap_shadow(vcpu, vsie_page);
sg = vsie_page->gmap;
sg = acquire_gmap_shadow(vcpu, vsie_page);
if (IS_ERR(sg)) {
rc = PTR_ERR(sg);
sg = NULL;
}
if (!rc)
rc = map_prefix(vcpu, vsie_page, sg);
if (!rc) {
@ -1359,6 +1358,9 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
kvm_s390_rewind_psw(vcpu, 4);
break;
}
if (sg)
sg = gmap_put(sg);
cond_resched();
}
if (rc == -EFAULT) {
@ -1455,8 +1457,7 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
vsie_page->scb_gpa = ULONG_MAX;
/* Double use of the same address or allocation failure. */
if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9,
vsie_page)) {
if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, vsie_page)) {
put_vsie_page(vsie_page);
mutex_unlock(&kvm->arch.vsie.mutex);
return NULL;
@ -1465,7 +1466,12 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
mutex_unlock(&kvm->arch.vsie.mutex);
memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
release_gmap_shadow(vsie_page);
if (vsie_page->gmap_cache.gmap) {
scoped_guard(spinlock, &kvm->arch.gmap->children_lock)
if (vsie_page->gmap_cache.gmap)
release_gmap_shadow(vsie_page);
}
prefix_unmapped(vsie_page);
vsie_page->fault_addr = 0;
vsie_page->scb_s.ihcpu = 0xffffU;
return vsie_page;
@ -1543,8 +1549,10 @@ void kvm_s390_vsie_destroy(struct kvm *kvm)
mutex_lock(&kvm->arch.vsie.mutex);
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
vsie_page = kvm->arch.vsie.pages[i];
scoped_guard(spinlock, &kvm->arch.gmap->children_lock)
if (vsie_page->gmap_cache.gmap)
release_gmap_shadow(vsie_page);
kvm->arch.vsie.pages[i] = NULL;
release_gmap_shadow(vsie_page);
/* free the radix tree entry */
if (vsie_page->scb_gpa != ULONG_MAX)
radix_tree_delete(&kvm->arch.vsie.addr_to_page,

View File

@ -34,136 +34,19 @@ void debug_user_asce(int exit)
}
#endif /*CONFIG_DEBUG_ENTRY */
union oac {
unsigned int val;
struct {
struct {
unsigned short key : 4;
unsigned short : 4;
unsigned short as : 2;
unsigned short : 4;
unsigned short k : 1;
unsigned short a : 1;
} oac1;
struct {
unsigned short key : 4;
unsigned short : 4;
unsigned short as : 2;
unsigned short : 4;
unsigned short k : 1;
unsigned short a : 1;
} oac2;
};
};
static uaccess_kmsan_or_inline __must_check unsigned long
raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key)
{
unsigned long osize;
union oac spec = {
.oac2.key = key,
.oac2.as = PSW_BITS_AS_SECONDARY,
.oac2.k = 1,
.oac2.a = 1,
};
int cc;
while (1) {
osize = size;
asm_inline volatile(
" lr %%r0,%[spec]\n"
"0: mvcos %[to],%[from],%[size]\n"
"1: nopr %%r7\n"
CC_IPM(cc)
EX_TABLE_UA_MVCOS_FROM(0b, 0b)
EX_TABLE_UA_MVCOS_FROM(1b, 0b)
: CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to)
: [spec] "d" (spec.val), [from] "Q" (*(const char __user *)from)
: CC_CLOBBER_LIST("memory", "0"));
if (CC_TRANSFORM(cc) == 0)
return osize - size;
size -= 4096;
to += 4096;
from += 4096;
}
}
unsigned long _copy_from_user_key(void *to, const void __user *from,
unsigned long n, unsigned long key)
{
unsigned long res = n;
might_fault();
if (!should_fail_usercopy()) {
instrument_copy_from_user_before(to, from, n);
res = raw_copy_from_user_key(to, from, n, key);
instrument_copy_from_user_after(to, from, n, res);
}
if (unlikely(res))
memset(to + (n - res), 0, res);
return res;
}
EXPORT_SYMBOL(_copy_from_user_key);
static uaccess_kmsan_or_inline __must_check unsigned long
raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key)
{
unsigned long osize;
union oac spec = {
.oac1.key = key,
.oac1.as = PSW_BITS_AS_SECONDARY,
.oac1.k = 1,
.oac1.a = 1,
};
int cc;
while (1) {
osize = size;
asm_inline volatile(
" lr %%r0,%[spec]\n"
"0: mvcos %[to],%[from],%[size]\n"
"1: nopr %%r7\n"
CC_IPM(cc)
EX_TABLE_UA_MVCOS_TO(0b, 0b)
EX_TABLE_UA_MVCOS_TO(1b, 0b)
: CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to)
: [spec] "d" (spec.val), [from] "Q" (*(const char *)from)
: CC_CLOBBER_LIST("memory", "0"));
if (CC_TRANSFORM(cc) == 0)
return osize - size;
size -= 4096;
to += 4096;
from += 4096;
}
}
unsigned long _copy_to_user_key(void __user *to, const void *from,
unsigned long n, unsigned long key)
{
might_fault();
if (should_fail_usercopy())
return n;
instrument_copy_to_user(to, from, n);
return raw_copy_to_user_key(to, from, n, key);
}
EXPORT_SYMBOL(_copy_to_user_key);
#define CMPXCHG_USER_KEY_MAX_LOOPS 128
static nokprobe_inline int __cmpxchg_user_key_small(unsigned long address, unsigned int *uval,
unsigned int old, unsigned int new,
unsigned int mask, unsigned long key)
static nokprobe_inline int __cmpxchg_key_small(void *address, unsigned int *uval,
unsigned int old, unsigned int new,
unsigned int mask, unsigned long key)
{
unsigned long count;
unsigned int prev;
bool sacf_flag;
int rc = 0;
skey_regions_initialize();
sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
"20: spka 0(%[key])\n"
" sacf 256\n"
" llill %[count],%[max_loops]\n"
"0: l %[prev],%[address]\n"
"1: nr %[prev],%[mask]\n"
@ -178,8 +61,7 @@ static nokprobe_inline int __cmpxchg_user_key_small(unsigned long address, unsig
" nr %[tmp],%[mask]\n"
" jnz 5f\n"
" brct %[count],2b\n"
"5: sacf 768\n"
" spka %[default_key]\n"
"5: spka %[default_key]\n"
"21:\n"
EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
@ -197,16 +79,16 @@ static nokprobe_inline int __cmpxchg_user_key_small(unsigned long address, unsig
[default_key] "J" (PAGE_DEFAULT_KEY),
[max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*uval = prev;
if (!count)
rc = -EAGAIN;
return rc;
}
int __kprobes __cmpxchg_user_key1(unsigned long address, unsigned char *uval,
unsigned char old, unsigned char new, unsigned long key)
int __kprobes __cmpxchg_key1(void *addr, unsigned char *uval, unsigned char old,
unsigned char new, unsigned long key)
{
unsigned long address = (unsigned long)addr;
unsigned int prev, shift, mask, _old, _new;
int rc;
@ -215,15 +97,16 @@ int __kprobes __cmpxchg_user_key1(unsigned long address, unsigned char *uval,
_old = (unsigned int)old << shift;
_new = (unsigned int)new << shift;
mask = ~(0xff << shift);
rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key);
rc = __cmpxchg_key_small((void *)address, &prev, _old, _new, mask, key);
*uval = prev >> shift;
return rc;
}
EXPORT_SYMBOL(__cmpxchg_user_key1);
EXPORT_SYMBOL(__cmpxchg_key1);
int __kprobes __cmpxchg_user_key2(unsigned long address, unsigned short *uval,
unsigned short old, unsigned short new, unsigned long key)
int __kprobes __cmpxchg_key2(void *addr, unsigned short *uval, unsigned short old,
unsigned short new, unsigned long key)
{
unsigned long address = (unsigned long)addr;
unsigned int prev, shift, mask, _old, _new;
int rc;
@ -232,27 +115,23 @@ int __kprobes __cmpxchg_user_key2(unsigned long address, unsigned short *uval,
_old = (unsigned int)old << shift;
_new = (unsigned int)new << shift;
mask = ~(0xffff << shift);
rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key);
rc = __cmpxchg_key_small((void *)address, &prev, _old, _new, mask, key);
*uval = prev >> shift;
return rc;
}
EXPORT_SYMBOL(__cmpxchg_user_key2);
EXPORT_SYMBOL(__cmpxchg_key2);
int __kprobes __cmpxchg_user_key4(unsigned long address, unsigned int *uval,
unsigned int old, unsigned int new, unsigned long key)
int __kprobes __cmpxchg_key4(void *address, unsigned int *uval, unsigned int old,
unsigned int new, unsigned long key)
{
unsigned int prev = old;
bool sacf_flag;
int rc = 0;
skey_regions_initialize();
sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
"20: spka 0(%[key])\n"
" sacf 256\n"
"0: cs %[prev],%[new],%[address]\n"
"1: sacf 768\n"
" spka %[default_key]\n"
"1: spka %[default_key]\n"
"21:\n"
EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
@ -264,27 +143,22 @@ int __kprobes __cmpxchg_user_key4(unsigned long address, unsigned int *uval,
[key] "a" (key << 4),
[default_key] "J" (PAGE_DEFAULT_KEY)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*uval = prev;
return rc;
}
EXPORT_SYMBOL(__cmpxchg_user_key4);
EXPORT_SYMBOL(__cmpxchg_key4);
int __kprobes __cmpxchg_user_key8(unsigned long address, unsigned long *uval,
unsigned long old, unsigned long new, unsigned long key)
int __kprobes __cmpxchg_key8(void *address, unsigned long *uval, unsigned long old,
unsigned long new, unsigned long key)
{
unsigned long prev = old;
bool sacf_flag;
int rc = 0;
skey_regions_initialize();
sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
"20: spka 0(%[key])\n"
" sacf 256\n"
"0: csg %[prev],%[new],%[address]\n"
"1: sacf 768\n"
" spka %[default_key]\n"
"1: spka %[default_key]\n"
"21:\n"
EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
@ -296,27 +170,22 @@ int __kprobes __cmpxchg_user_key8(unsigned long address, unsigned long *uval,
[key] "a" (key << 4),
[default_key] "J" (PAGE_DEFAULT_KEY)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*uval = prev;
return rc;
}
EXPORT_SYMBOL(__cmpxchg_user_key8);
EXPORT_SYMBOL(__cmpxchg_key8);
int __kprobes __cmpxchg_user_key16(unsigned long address, __uint128_t *uval,
__uint128_t old, __uint128_t new, unsigned long key)
int __kprobes __cmpxchg_key16(void *address, __uint128_t *uval, __uint128_t old,
__uint128_t new, unsigned long key)
{
__uint128_t prev = old;
bool sacf_flag;
int rc = 0;
skey_regions_initialize();
sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
"20: spka 0(%[key])\n"
" sacf 256\n"
"0: cdsg %[prev],%[new],%[address]\n"
"1: sacf 768\n"
" spka %[default_key]\n"
"1: spka %[default_key]\n"
"21:\n"
EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
@ -328,8 +197,7 @@ int __kprobes __cmpxchg_user_key16(unsigned long address, __uint128_t *uval,
[key] "a" (key << 4),
[default_key] "J" (PAGE_DEFAULT_KEY)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*uval = prev;
return rc;
}
EXPORT_SYMBOL(__cmpxchg_user_key16);
EXPORT_SYMBOL(__cmpxchg_key16);

View File

@ -34,28 +34,6 @@ static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
free_swap_and_cache(entry);
}
static inline pgste_t pgste_get_lock(pte_t *ptep)
{
unsigned long value = 0;
#ifdef CONFIG_PGSTE
unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
do {
value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
} while (value & PGSTE_PCL_BIT);
value |= PGSTE_PCL_BIT;
#endif
return __pgste(value);
}
static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
barrier();
WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
#endif
}
/**
* gmap_helper_zap_one_page() - discard a page if it was swapped.
* @mm: the mm
@ -68,9 +46,7 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
{
struct vm_area_struct *vma;
unsigned long pgstev;
spinlock_t *ptl;
pgste_t pgste;
pte_t *ptep;
mmap_assert_locked(mm);
@ -85,18 +61,8 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
if (unlikely(!ptep))
return;
if (pte_swap(*ptep)) {
preempt_disable();
pgste = pgste_get_lock(ptep);
pgstev = pgste_val(pgste);
if ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
(pgstev & _PGSTE_GPS_ZERO)) {
ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep));
pte_clear(mm, vmaddr, ptep);
}
pgste_set_unlock(ptep, pgste);
preempt_enable();
ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep));
pte_clear(mm, vmaddr, ptep);
}
pte_unmap_unlock(ptep, ptl);
}