s390:
* Lots of small and not-so-small fixes for the newly rewritten gmap, mostly affecting the handling of nested guests. x86: * Fix an issue with shadow paging, which causes KVM to install an MMIO PTE in the shadow page tables without first zapping a non-MMIO SPTE if KVM didn't see the write that modified the shadowed guest PTE. While commita54aa15c6bwas right about it being impossible to miss such a write if it was coming from the guest, it failed to account for writes to guest memory that are outside the scope of KVM: if userspace modifies the guest PTE, and then the guest hits a relevant page fault, KVM will get confused. -----BEGIN PGP SIGNATURE----- iQFIBAABCgAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmnH3j8UHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroOWRQf7BD1dgyO9Id+Y/QQJPzZ0z/zGbNWT jLDTpapxSB960AybvmkOl0pgr7AJrNN+iWQ5cbod/41NKEdJn++ME++NFQlt15oH gZAMdVr72qklyVFOq3BZhQRskleGo35A/YYznKf+re4tdvL5fynyYTLDwVkDR4NU tCwHCg+B6bVSNOLjxMm5eOpDXoboGiwohFYay7IclsXibjDlKyFaj9mZPJW1E6qy SUp+nuseUTf8RFFscNTsW6XRPa/Y7RctPBNQuGSiw3rxFXsq+VyD6Y/AOklbdeyz 8u+25gdKm65sdXFmLWIN1Ogec0DcKMgdNpFrgEj+9PPWyHDHikqksv/vRw== =/YA7 -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "s390: - Lots of small and not-so-small fixes for the newly rewritten gmap, mostly affecting the handling of nested guests. x86: - Fix an issue with shadow paging, which causes KVM to install an MMIO PTE in the shadow page tables without first zapping a non-MMIO SPTE if KVM didn't see the write that modified the shadowed guest PTE. While commita54aa15c6b("KVM: x86/mmu: Handle MMIO SPTEs directly in mmu_set_spte()") was right about it being impossible to miss such a write if it was coming from the guest, it failed to account for writes to guest memory that are outside the scope of KVM: if userspace modifies the guest PTE, and then the guest hits a relevant page fault, KVM will get confused" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86/mmu: Only WARN in direct MMUs when overwriting shadow-present SPTE KVM: x86/mmu: Drop/zap existing present SPTE even when creating an MMIO SPTE KVM: s390: Fix KVM_S390_VCPU_FAULT ioctl KVM: s390: vsie: Fix guest page tables protection KVM: s390: vsie: Fix unshadowing while shadowing KVM: s390: vsie: Fix refcount overflow for shadow gmaps KVM: s390: vsie: Fix nested guest memory shadowing KVM: s390: Correctly handle guest mappings without struct page KVM: s390: Fix gmap_link() KVM: s390: vsie: Fix check for pre-existing shadow mapping KVM: s390: Remove non-atomic dat_crstep_xchg() KVM: s390: vsie: Fix dat_split_ste()
This commit is contained in:
commit
ac354b5cb0
|
|
@ -134,32 +134,6 @@ int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newt
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* dat_crstep_xchg() - Exchange a gmap CRSTE with another.
|
||||
* @crstep: Pointer to the CRST entry
|
||||
* @new: Replacement entry.
|
||||
* @gfn: The affected guest address.
|
||||
* @asce: The ASCE of the address space.
|
||||
*
|
||||
* Context: This function is assumed to be called with kvm->mmu_lock held.
|
||||
*/
|
||||
void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce)
|
||||
{
|
||||
if (crstep->h.i) {
|
||||
WRITE_ONCE(*crstep, new);
|
||||
return;
|
||||
} else if (cpu_has_edat2()) {
|
||||
crdte_crste(crstep, *crstep, new, gfn, asce);
|
||||
return;
|
||||
}
|
||||
|
||||
if (machine_has_tlb_guest())
|
||||
idte_crste(crstep, gfn, IDTE_GUEST_ASCE, asce, IDTE_GLOBAL);
|
||||
else
|
||||
idte_crste(crstep, gfn, 0, NULL_ASCE, IDTE_GLOBAL);
|
||||
WRITE_ONCE(*crstep, new);
|
||||
}
|
||||
|
||||
/**
|
||||
* dat_crstep_xchg_atomic() - Atomically exchange a gmap CRSTE with another.
|
||||
* @crstep: Pointer to the CRST entry.
|
||||
|
|
@ -175,8 +149,8 @@ void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce
|
|||
*
|
||||
* Return: %true if the exchange was successful.
|
||||
*/
|
||||
bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn,
|
||||
union asce asce)
|
||||
bool __must_check dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new,
|
||||
gfn_t gfn, union asce asce)
|
||||
{
|
||||
if (old.h.i)
|
||||
return arch_try_cmpxchg((long *)crstep, &old.val, new.val);
|
||||
|
|
@ -292,6 +266,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g
|
|||
pt->ptes[i].val = init.val | i * PAGE_SIZE;
|
||||
/* No need to take locks as the page table is not installed yet. */
|
||||
pgste_init.prefix_notif = old.s.fc1.prefix_notif;
|
||||
pgste_init.vsie_notif = old.s.fc1.vsie_notif;
|
||||
pgste_init.pcl = uses_skeys && init.h.i;
|
||||
dat_init_pgstes(pt, pgste_init.val);
|
||||
} else {
|
||||
|
|
@ -893,7 +868,8 @@ static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct d
|
|||
|
||||
/* This table entry needs to be updated. */
|
||||
if (walk->start <= gfn && walk->end >= next) {
|
||||
dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce);
|
||||
if (!dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce))
|
||||
return -EINVAL;
|
||||
/* A lower level table was present, needs to be freed. */
|
||||
if (!crste.h.fc && !crste.h.i) {
|
||||
if (is_pmd(crste))
|
||||
|
|
@ -1021,67 +997,21 @@ bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end)
|
|||
return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0;
|
||||
}
|
||||
|
||||
int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
|
||||
bool uses_skeys, struct guest_fault *f)
|
||||
{
|
||||
union crste oldval, newval;
|
||||
union pte newpte, oldpte;
|
||||
union pgste pgste;
|
||||
int rc = 0;
|
||||
|
||||
rc = dat_entry_walk(mc, f->gfn, asce, DAT_WALK_ALLOC_CONTINUE, level, &f->crstep, &f->ptep);
|
||||
if (rc == -EINVAL || rc == -ENOMEM)
|
||||
return rc;
|
||||
if (rc)
|
||||
return -EAGAIN;
|
||||
|
||||
if (WARN_ON_ONCE(unlikely(get_level(f->crstep, f->ptep) > level)))
|
||||
return -EINVAL;
|
||||
|
||||
if (f->ptep) {
|
||||
pgste = pgste_get_lock(f->ptep);
|
||||
oldpte = *f->ptep;
|
||||
newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page);
|
||||
newpte.s.sd = oldpte.s.sd;
|
||||
oldpte.s.sd = 0;
|
||||
if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) {
|
||||
pgste = __dat_ptep_xchg(f->ptep, pgste, newpte, f->gfn, asce, uses_skeys);
|
||||
if (f->callback)
|
||||
f->callback(f);
|
||||
} else {
|
||||
rc = -EAGAIN;
|
||||
}
|
||||
pgste_set_unlock(f->ptep, pgste);
|
||||
} else {
|
||||
oldval = READ_ONCE(*f->crstep);
|
||||
newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable,
|
||||
f->write_attempt | oldval.s.fc1.d);
|
||||
newval.s.fc1.sd = oldval.s.fc1.sd;
|
||||
if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
|
||||
crste_origin_large(oldval) != crste_origin_large(newval))
|
||||
return -EAGAIN;
|
||||
if (!dat_crstep_xchg_atomic(f->crstep, oldval, newval, f->gfn, asce))
|
||||
return -EAGAIN;
|
||||
if (f->callback)
|
||||
f->callback(f);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
|
||||
{
|
||||
union crste crste = READ_ONCE(*crstep);
|
||||
union crste newcrste, oldcrste;
|
||||
int *n = walk->priv;
|
||||
|
||||
if (!crste.h.fc || crste.h.i || crste.h.p)
|
||||
return 0;
|
||||
|
||||
do {
|
||||
oldcrste = READ_ONCE(*crstep);
|
||||
if (!oldcrste.h.fc || oldcrste.h.i || oldcrste.h.p)
|
||||
return 0;
|
||||
if (oldcrste.s.fc1.prefix_notif)
|
||||
break;
|
||||
newcrste = oldcrste;
|
||||
newcrste.s.fc1.prefix_notif = 1;
|
||||
} while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, walk->asce));
|
||||
*n = 2;
|
||||
if (crste.s.fc1.prefix_notif)
|
||||
return 0;
|
||||
crste.s.fc1.prefix_notif = 1;
|
||||
dat_crstep_xchg(crstep, crste, gfn, walk->asce);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -160,14 +160,14 @@ union pmd {
|
|||
unsigned long :44; /* HW */
|
||||
unsigned long : 3; /* Unused */
|
||||
unsigned long : 1; /* HW */
|
||||
unsigned long s : 1; /* Special */
|
||||
unsigned long w : 1; /* Writable soft-bit */
|
||||
unsigned long r : 1; /* Readable soft-bit */
|
||||
unsigned long d : 1; /* Dirty */
|
||||
unsigned long y : 1; /* Young */
|
||||
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
|
||||
unsigned long : 3; /* HW */
|
||||
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
|
||||
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
|
||||
unsigned long : 1; /* Unused */
|
||||
unsigned long : 4; /* HW */
|
||||
unsigned long sd : 1; /* Soft-Dirty */
|
||||
unsigned long pr : 1; /* Present */
|
||||
|
|
@ -183,14 +183,14 @@ union pud {
|
|||
unsigned long :33; /* HW */
|
||||
unsigned long :14; /* Unused */
|
||||
unsigned long : 1; /* HW */
|
||||
unsigned long s : 1; /* Special */
|
||||
unsigned long w : 1; /* Writable soft-bit */
|
||||
unsigned long r : 1; /* Readable soft-bit */
|
||||
unsigned long d : 1; /* Dirty */
|
||||
unsigned long y : 1; /* Young */
|
||||
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
|
||||
unsigned long : 3; /* HW */
|
||||
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
|
||||
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
|
||||
unsigned long : 1; /* Unused */
|
||||
unsigned long : 4; /* HW */
|
||||
unsigned long sd : 1; /* Soft-Dirty */
|
||||
unsigned long pr : 1; /* Present */
|
||||
|
|
@ -254,14 +254,14 @@ union crste {
|
|||
struct {
|
||||
unsigned long :47;
|
||||
unsigned long : 1; /* HW (should be 0) */
|
||||
unsigned long s : 1; /* Special */
|
||||
unsigned long w : 1; /* Writable */
|
||||
unsigned long r : 1; /* Readable */
|
||||
unsigned long d : 1; /* Dirty */
|
||||
unsigned long y : 1; /* Young */
|
||||
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
|
||||
unsigned long : 3; /* HW */
|
||||
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
|
||||
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
|
||||
unsigned long : 1;
|
||||
unsigned long : 4; /* HW */
|
||||
unsigned long sd : 1; /* Soft-Dirty */
|
||||
unsigned long pr : 1; /* Present */
|
||||
|
|
@ -540,8 +540,6 @@ int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gf
|
|||
u16 type, u16 param);
|
||||
int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn);
|
||||
bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
|
||||
int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
|
||||
bool uses_skeys, struct guest_fault *f);
|
||||
|
||||
int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty);
|
||||
long dat_reset_cmma(union asce asce, gfn_t start_gfn);
|
||||
|
|
@ -938,11 +936,14 @@ static inline bool dat_pudp_xchg_atomic(union pud *pudp, union pud old, union pu
|
|||
return dat_crstep_xchg_atomic(_CRSTEP(pudp), _CRSTE(old), _CRSTE(new), gfn, asce);
|
||||
}
|
||||
|
||||
static inline void dat_crstep_clear(union crste *crstep, gfn_t gfn, union asce asce)
|
||||
static inline union crste dat_crstep_clear_atomic(union crste *crstep, gfn_t gfn, union asce asce)
|
||||
{
|
||||
union crste newcrste = _CRSTE_EMPTY(crstep->h.tt);
|
||||
union crste oldcrste, empty = _CRSTE_EMPTY(crstep->h.tt);
|
||||
|
||||
dat_crstep_xchg(crstep, newcrste, gfn, asce);
|
||||
do {
|
||||
oldcrste = READ_ONCE(*crstep);
|
||||
} while (!dat_crstep_xchg_atomic(crstep, oldcrste, empty, gfn, asce));
|
||||
return oldcrste;
|
||||
}
|
||||
|
||||
static inline int get_level(union crste *crstep, union pte *ptep)
|
||||
|
|
|
|||
|
|
@ -1436,13 +1436,21 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
|
|||
|
||||
if (!pgste_get_trylock(ptep_h, &pgste))
|
||||
return -EAGAIN;
|
||||
newpte = _pte(f->pfn, f->writable, !p, 0);
|
||||
newpte.s.d |= ptep->s.d;
|
||||
newpte.s.sd |= ptep->s.sd;
|
||||
newpte.h.p &= ptep->h.p;
|
||||
pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
|
||||
pgste.vsie_notif = 1;
|
||||
newpte = _pte(f->pfn, f->writable, !p, ptep_h->s.s);
|
||||
newpte.s.d |= ptep_h->s.d;
|
||||
newpte.s.sd |= ptep_h->s.sd;
|
||||
newpte.h.p &= ptep_h->h.p;
|
||||
if (!newpte.h.p && !f->writable) {
|
||||
rc = -EOPNOTSUPP;
|
||||
} else {
|
||||
pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
|
||||
pgste.vsie_notif = 1;
|
||||
}
|
||||
pgste_set_unlock(ptep_h, pgste);
|
||||
if (rc)
|
||||
return rc;
|
||||
if (!sg->parent)
|
||||
return -EAGAIN;
|
||||
|
||||
newpte = _pte(f->pfn, 0, !p, 0);
|
||||
if (!pgste_get_trylock(ptep, &pgste))
|
||||
|
|
@ -1456,7 +1464,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
|
|||
static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, union crste *table,
|
||||
struct guest_fault *f, bool p)
|
||||
{
|
||||
union crste newcrste;
|
||||
union crste newcrste, oldcrste;
|
||||
gfn_t gfn;
|
||||
int rc;
|
||||
|
||||
|
|
@ -1469,16 +1477,28 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni
|
|||
if (rc)
|
||||
return rc;
|
||||
|
||||
newcrste = _crste_fc1(f->pfn, host->h.tt, f->writable, !p);
|
||||
newcrste.s.fc1.d |= host->s.fc1.d;
|
||||
newcrste.s.fc1.sd |= host->s.fc1.sd;
|
||||
newcrste.h.p &= host->h.p;
|
||||
newcrste.s.fc1.vsie_notif = 1;
|
||||
newcrste.s.fc1.prefix_notif = host->s.fc1.prefix_notif;
|
||||
_gmap_crstep_xchg(sg->parent, host, newcrste, f->gfn, false);
|
||||
do {
|
||||
/* _gmap_crstep_xchg_atomic() could have unshadowed this shadow gmap */
|
||||
if (!sg->parent)
|
||||
return -EAGAIN;
|
||||
oldcrste = READ_ONCE(*host);
|
||||
newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, f->writable, !p);
|
||||
newcrste.s.fc1.d |= oldcrste.s.fc1.d;
|
||||
newcrste.s.fc1.sd |= oldcrste.s.fc1.sd;
|
||||
newcrste.h.p &= oldcrste.h.p;
|
||||
newcrste.s.fc1.vsie_notif = 1;
|
||||
newcrste.s.fc1.prefix_notif = oldcrste.s.fc1.prefix_notif;
|
||||
newcrste.s.fc1.s = oldcrste.s.fc1.s;
|
||||
if (!newcrste.h.p && !f->writable)
|
||||
return -EOPNOTSUPP;
|
||||
} while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false));
|
||||
if (!sg->parent)
|
||||
return -EAGAIN;
|
||||
|
||||
newcrste = _crste_fc1(f->pfn, host->h.tt, 0, !p);
|
||||
dat_crstep_xchg(table, newcrste, gpa_to_gfn(raddr), sg->asce);
|
||||
newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p);
|
||||
gfn = gpa_to_gfn(raddr);
|
||||
while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, gfn, sg->asce))
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -1502,21 +1522,31 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
|
|||
if (rc)
|
||||
return rc;
|
||||
|
||||
/* A race occourred. The shadow mapping is already valid, nothing to do */
|
||||
if ((ptep && !ptep->h.i) || (!ptep && crste_leaf(*table)))
|
||||
/* A race occurred. The shadow mapping is already valid, nothing to do */
|
||||
if ((ptep && !ptep->h.i && ptep->h.p == w->p) ||
|
||||
(!ptep && crste_leaf(*table) && !table->h.i && table->h.p == w->p))
|
||||
return 0;
|
||||
|
||||
gl = get_level(table, ptep);
|
||||
|
||||
/* In case of a real address space */
|
||||
if (w->level <= LEVEL_MEM) {
|
||||
l = TABLE_TYPE_PAGE_TABLE;
|
||||
hl = TABLE_TYPE_REGION1;
|
||||
goto real_address_space;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip levels that are already protected. For each level, protect
|
||||
* only the page containing the entry, not the whole table.
|
||||
*/
|
||||
for (i = gl ; i >= w->level; i--) {
|
||||
rc = gmap_protect_rmap(mc, sg, entries[i - 1].gfn, gpa_to_gfn(saddr),
|
||||
entries[i - 1].pfn, i, entries[i - 1].writable);
|
||||
rc = gmap_protect_rmap(mc, sg, entries[i].gfn, gpa_to_gfn(saddr),
|
||||
entries[i].pfn, i + 1, entries[i].writable);
|
||||
if (rc)
|
||||
return rc;
|
||||
if (!sg->parent)
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
rc = dat_entry_walk(NULL, entries[LEVEL_MEM].gfn, sg->parent->asce, DAT_WALK_LEAF,
|
||||
|
|
@ -1528,6 +1558,7 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
|
|||
/* Get the smallest granularity */
|
||||
l = min3(gl, hl, w->level);
|
||||
|
||||
real_address_space:
|
||||
flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);
|
||||
/* If necessary, create the shadow mapping */
|
||||
if (l < gl) {
|
||||
|
|
|
|||
|
|
@ -313,13 +313,16 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
|
|||
struct clear_young_pte_priv *priv = walk->priv;
|
||||
union crste crste, new;
|
||||
|
||||
crste = READ_ONCE(*crstep);
|
||||
do {
|
||||
crste = READ_ONCE(*crstep);
|
||||
|
||||
if (!crste.h.fc)
|
||||
return 0;
|
||||
if (!crste.s.fc1.y && crste.h.i)
|
||||
return 0;
|
||||
if (crste_prefix(crste) && !gmap_mkold_prefix(priv->gmap, gfn, end))
|
||||
break;
|
||||
|
||||
if (!crste.h.fc)
|
||||
return 0;
|
||||
if (!crste.s.fc1.y && crste.h.i)
|
||||
return 0;
|
||||
if (!crste_prefix(crste) || gmap_mkold_prefix(priv->gmap, gfn, end)) {
|
||||
new = crste;
|
||||
new.h.i = 1;
|
||||
new.s.fc1.y = 0;
|
||||
|
|
@ -328,8 +331,8 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
|
|||
folio_set_dirty(phys_to_folio(crste_origin_large(crste)));
|
||||
new.s.fc1.d = 0;
|
||||
new.h.p = 1;
|
||||
dat_crstep_xchg(crstep, new, gfn, walk->asce);
|
||||
}
|
||||
} while (!dat_crstep_xchg_atomic(crstep, crste, new, gfn, walk->asce));
|
||||
|
||||
priv->young = 1;
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -391,14 +394,18 @@ static long _gmap_unmap_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct
|
|||
{
|
||||
struct gmap_unmap_priv *priv = walk->priv;
|
||||
struct folio *folio = NULL;
|
||||
union crste old = *crstep;
|
||||
|
||||
if (crstep->h.fc) {
|
||||
if (crstep->s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
|
||||
folio = phys_to_folio(crste_origin_large(*crstep));
|
||||
gmap_crstep_xchg(priv->gmap, crstep, _CRSTE_EMPTY(crstep->h.tt), gfn);
|
||||
if (folio)
|
||||
uv_convert_from_secure_folio(folio);
|
||||
}
|
||||
if (!old.h.fc)
|
||||
return 0;
|
||||
|
||||
if (old.s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
|
||||
folio = phys_to_folio(crste_origin_large(old));
|
||||
/* No races should happen because kvm->mmu_lock is held in write mode */
|
||||
KVM_BUG_ON(!gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn),
|
||||
priv->gmap->kvm);
|
||||
if (folio)
|
||||
uv_convert_from_secure_folio(folio);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -474,23 +481,24 @@ static long _crste_test_and_clear_softdirty(union crste *table, gfn_t gfn, gfn_t
|
|||
|
||||
if (fatal_signal_pending(current))
|
||||
return 1;
|
||||
crste = READ_ONCE(*table);
|
||||
if (!crste.h.fc)
|
||||
return 0;
|
||||
if (crste.h.p && !crste.s.fc1.sd)
|
||||
return 0;
|
||||
do {
|
||||
crste = READ_ONCE(*table);
|
||||
if (!crste.h.fc)
|
||||
return 0;
|
||||
if (crste.h.p && !crste.s.fc1.sd)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If this large page contains one or more prefixes of vCPUs that are
|
||||
* currently running, do not reset the protection, leave it marked as
|
||||
* dirty.
|
||||
*/
|
||||
if (!crste.s.fc1.prefix_notif || gmap_mkold_prefix(gmap, gfn, end)) {
|
||||
/*
|
||||
* If this large page contains one or more prefixes of vCPUs that are
|
||||
* currently running, do not reset the protection, leave it marked as
|
||||
* dirty.
|
||||
*/
|
||||
if (crste.s.fc1.prefix_notif && !gmap_mkold_prefix(gmap, gfn, end))
|
||||
break;
|
||||
new = crste;
|
||||
new.h.p = 1;
|
||||
new.s.fc1.sd = 0;
|
||||
gmap_crstep_xchg(gmap, table, new, gfn);
|
||||
}
|
||||
} while (!gmap_crstep_xchg_atomic(gmap, table, crste, new, gfn));
|
||||
|
||||
for ( ; gfn < end; gfn++)
|
||||
mark_page_dirty(gmap->kvm, gfn);
|
||||
|
|
@ -511,7 +519,7 @@ void gmap_sync_dirty_log(struct gmap *gmap, gfn_t start, gfn_t end)
|
|||
_dat_walk_gfn_range(start, end, gmap->asce, &walk_ops, 0, gmap);
|
||||
}
|
||||
|
||||
static int gmap_handle_minor_crste_fault(union asce asce, struct guest_fault *f)
|
||||
static int gmap_handle_minor_crste_fault(struct gmap *gmap, struct guest_fault *f)
|
||||
{
|
||||
union crste newcrste, oldcrste = READ_ONCE(*f->crstep);
|
||||
|
||||
|
|
@ -536,10 +544,8 @@ static int gmap_handle_minor_crste_fault(union asce asce, struct guest_fault *f)
|
|||
newcrste.s.fc1.d = 1;
|
||||
newcrste.s.fc1.sd = 1;
|
||||
}
|
||||
if (!oldcrste.s.fc1.d && newcrste.s.fc1.d)
|
||||
SetPageDirty(phys_to_page(crste_origin_large(newcrste)));
|
||||
/* In case of races, let the slow path deal with it. */
|
||||
return !dat_crstep_xchg_atomic(f->crstep, oldcrste, newcrste, f->gfn, asce);
|
||||
return !gmap_crstep_xchg_atomic(gmap, f->crstep, oldcrste, newcrste, f->gfn);
|
||||
}
|
||||
/* Trying to write on a read-only page, let the slow path deal with it. */
|
||||
return 1;
|
||||
|
|
@ -568,8 +574,6 @@ static int _gmap_handle_minor_pte_fault(struct gmap *gmap, union pgste *pgste,
|
|||
newpte.s.d = 1;
|
||||
newpte.s.sd = 1;
|
||||
}
|
||||
if (!oldpte.s.d && newpte.s.d)
|
||||
SetPageDirty(pfn_to_page(newpte.h.pfra));
|
||||
*pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, *pgste, f->gfn);
|
||||
|
||||
return 0;
|
||||
|
|
@ -606,7 +610,7 @@ int gmap_try_fixup_minor(struct gmap *gmap, struct guest_fault *fault)
|
|||
fault->callback(fault);
|
||||
pgste_set_unlock(fault->ptep, pgste);
|
||||
} else {
|
||||
rc = gmap_handle_minor_crste_fault(gmap->asce, fault);
|
||||
rc = gmap_handle_minor_crste_fault(gmap, fault);
|
||||
if (!rc && fault->callback)
|
||||
fault->callback(fault);
|
||||
}
|
||||
|
|
@ -623,10 +627,61 @@ static inline bool gmap_1m_allowed(struct gmap *gmap, gfn_t gfn)
|
|||
return test_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &gmap->flags);
|
||||
}
|
||||
|
||||
static int _gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, int level,
|
||||
struct guest_fault *f)
|
||||
{
|
||||
union crste oldval, newval;
|
||||
union pte newpte, oldpte;
|
||||
union pgste pgste;
|
||||
int rc = 0;
|
||||
|
||||
rc = dat_entry_walk(mc, f->gfn, gmap->asce, DAT_WALK_ALLOC_CONTINUE, level,
|
||||
&f->crstep, &f->ptep);
|
||||
if (rc == -ENOMEM)
|
||||
return rc;
|
||||
if (KVM_BUG_ON(rc == -EINVAL, gmap->kvm))
|
||||
return rc;
|
||||
if (rc)
|
||||
return -EAGAIN;
|
||||
if (KVM_BUG_ON(get_level(f->crstep, f->ptep) > level, gmap->kvm))
|
||||
return -EINVAL;
|
||||
|
||||
if (f->ptep) {
|
||||
pgste = pgste_get_lock(f->ptep);
|
||||
oldpte = *f->ptep;
|
||||
newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page);
|
||||
newpte.s.sd = oldpte.s.sd;
|
||||
oldpte.s.sd = 0;
|
||||
if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) {
|
||||
pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, pgste, f->gfn);
|
||||
if (f->callback)
|
||||
f->callback(f);
|
||||
} else {
|
||||
rc = -EAGAIN;
|
||||
}
|
||||
pgste_set_unlock(f->ptep, pgste);
|
||||
} else {
|
||||
do {
|
||||
oldval = READ_ONCE(*f->crstep);
|
||||
newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable,
|
||||
f->write_attempt | oldval.s.fc1.d);
|
||||
newval.s.fc1.s = !f->page;
|
||||
newval.s.fc1.sd = oldval.s.fc1.sd;
|
||||
if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
|
||||
crste_origin_large(oldval) != crste_origin_large(newval))
|
||||
return -EAGAIN;
|
||||
} while (!gmap_crstep_xchg_atomic(gmap, f->crstep, oldval, newval, f->gfn));
|
||||
if (f->callback)
|
||||
f->callback(f);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fault *f)
|
||||
{
|
||||
unsigned int order;
|
||||
int rc, level;
|
||||
int level;
|
||||
|
||||
lockdep_assert_held(&gmap->kvm->mmu_lock);
|
||||
|
||||
|
|
@ -638,16 +693,14 @@ int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fau
|
|||
else if (order >= get_order(_SEGMENT_SIZE) && gmap_1m_allowed(gmap, f->gfn))
|
||||
level = TABLE_TYPE_SEGMENT;
|
||||
}
|
||||
rc = dat_link(mc, gmap->asce, level, uses_skeys(gmap), f);
|
||||
KVM_BUG_ON(rc == -EINVAL, gmap->kvm);
|
||||
return rc;
|
||||
return _gmap_link(mc, gmap, level, f);
|
||||
}
|
||||
|
||||
static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
|
||||
gfn_t p_gfn, gfn_t c_gfn, bool force_alloc)
|
||||
{
|
||||
union crste newcrste, oldcrste;
|
||||
struct page_table *pt;
|
||||
union crste newcrste;
|
||||
union crste *crstep;
|
||||
union pte *ptep;
|
||||
int rc;
|
||||
|
|
@ -673,7 +726,11 @@ static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
|
|||
&crstep, &ptep);
|
||||
if (rc)
|
||||
return rc;
|
||||
dat_crstep_xchg(crstep, newcrste, c_gfn, gmap->asce);
|
||||
do {
|
||||
oldcrste = READ_ONCE(*crstep);
|
||||
if (oldcrste.val == newcrste.val)
|
||||
break;
|
||||
} while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, c_gfn, gmap->asce));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -777,8 +834,10 @@ static void gmap_ucas_unmap_one(struct gmap *gmap, gfn_t c_gfn)
|
|||
int rc;
|
||||
|
||||
rc = dat_entry_walk(NULL, c_gfn, gmap->asce, 0, TABLE_TYPE_SEGMENT, &crstep, &ptep);
|
||||
if (!rc)
|
||||
dat_crstep_xchg(crstep, _PMD_EMPTY, c_gfn, gmap->asce);
|
||||
if (rc)
|
||||
return;
|
||||
while (!dat_crstep_xchg_atomic(crstep, READ_ONCE(*crstep), _PMD_EMPTY, c_gfn, gmap->asce))
|
||||
;
|
||||
}
|
||||
|
||||
void gmap_ucas_unmap(struct gmap *gmap, gfn_t c_gfn, unsigned long count)
|
||||
|
|
@ -1017,8 +1076,8 @@ static void gmap_unshadow_level(struct gmap *sg, gfn_t r_gfn, int level)
|
|||
dat_ptep_xchg(ptep, _PTE_EMPTY, r_gfn, sg->asce, uses_skeys(sg));
|
||||
return;
|
||||
}
|
||||
crste = READ_ONCE(*crstep);
|
||||
dat_crstep_clear(crstep, r_gfn, sg->asce);
|
||||
|
||||
crste = dat_crstep_clear_atomic(crstep, r_gfn, sg->asce);
|
||||
if (crste_leaf(crste) || crste.h.i)
|
||||
return;
|
||||
if (is_pmd(crste))
|
||||
|
|
@ -1101,6 +1160,7 @@ struct gmap_protect_asce_top_level {
|
|||
static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
|
||||
struct gmap_protect_asce_top_level *context)
|
||||
{
|
||||
struct gmap *parent;
|
||||
int rc, i;
|
||||
|
||||
guard(write_lock)(&sg->kvm->mmu_lock);
|
||||
|
|
@ -1108,7 +1168,12 @@ static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, s
|
|||
if (kvm_s390_array_needs_retry_safe(sg->kvm, context->seq, context->f))
|
||||
return -EAGAIN;
|
||||
|
||||
scoped_guard(spinlock, &sg->parent->children_lock) {
|
||||
parent = READ_ONCE(sg->parent);
|
||||
if (!parent)
|
||||
return -EAGAIN;
|
||||
scoped_guard(spinlock, &parent->children_lock) {
|
||||
if (READ_ONCE(sg->parent) != parent)
|
||||
return -EAGAIN;
|
||||
for (i = 0; i < CRST_TABLE_PAGES; i++) {
|
||||
if (!context->f[i].valid)
|
||||
continue;
|
||||
|
|
@ -1191,6 +1256,9 @@ struct gmap *gmap_create_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *pare
|
|||
struct gmap *sg, *new;
|
||||
int rc;
|
||||
|
||||
if (WARN_ON(!parent))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
scoped_guard(spinlock, &parent->children_lock) {
|
||||
sg = gmap_find_shadow(parent, asce, edat_level);
|
||||
if (sg) {
|
||||
|
|
|
|||
|
|
@ -185,6 +185,8 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un
|
|||
else
|
||||
_gmap_handle_vsie_unshadow_event(gmap, gfn);
|
||||
}
|
||||
if (!ptep->s.d && newpte.s.d && !newpte.s.s)
|
||||
SetPageDirty(pfn_to_page(newpte.h.pfra));
|
||||
return __dat_ptep_xchg(ptep, pgste, newpte, gfn, gmap->asce, uses_skeys(gmap));
|
||||
}
|
||||
|
||||
|
|
@ -194,35 +196,42 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni
|
|||
return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
|
||||
}
|
||||
|
||||
static inline void _gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
|
||||
gfn_t gfn, bool needs_lock)
|
||||
static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
|
||||
union crste oldcrste, union crste newcrste,
|
||||
gfn_t gfn, bool needs_lock)
|
||||
{
|
||||
unsigned long align = 8 + (is_pmd(*crstep) ? 0 : 11);
|
||||
unsigned long align = is_pmd(newcrste) ? _PAGE_ENTRIES : _PAGE_ENTRIES * _CRST_ENTRIES;
|
||||
|
||||
if (KVM_BUG_ON(crstep->h.tt != oldcrste.h.tt || newcrste.h.tt != oldcrste.h.tt, gmap->kvm))
|
||||
return true;
|
||||
|
||||
lockdep_assert_held(&gmap->kvm->mmu_lock);
|
||||
if (!needs_lock)
|
||||
lockdep_assert_held(&gmap->children_lock);
|
||||
|
||||
gfn = ALIGN_DOWN(gfn, align);
|
||||
if (crste_prefix(*crstep) && (ne.h.p || ne.h.i || !crste_prefix(ne))) {
|
||||
ne.s.fc1.prefix_notif = 0;
|
||||
if (crste_prefix(oldcrste) && (newcrste.h.p || newcrste.h.i || !crste_prefix(newcrste))) {
|
||||
newcrste.s.fc1.prefix_notif = 0;
|
||||
gmap_unmap_prefix(gmap, gfn, gfn + align);
|
||||
}
|
||||
if (crste_leaf(*crstep) && crstep->s.fc1.vsie_notif &&
|
||||
(ne.h.p || ne.h.i || !ne.s.fc1.vsie_notif)) {
|
||||
ne.s.fc1.vsie_notif = 0;
|
||||
if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif &&
|
||||
(newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) {
|
||||
newcrste.s.fc1.vsie_notif = 0;
|
||||
if (needs_lock)
|
||||
gmap_handle_vsie_unshadow_event(gmap, gfn);
|
||||
else
|
||||
_gmap_handle_vsie_unshadow_event(gmap, gfn);
|
||||
}
|
||||
dat_crstep_xchg(crstep, ne, gfn, gmap->asce);
|
||||
if (!oldcrste.s.fc1.d && newcrste.s.fc1.d && !newcrste.s.fc1.s)
|
||||
SetPageDirty(phys_to_page(crste_origin_large(newcrste)));
|
||||
return dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce);
|
||||
}
|
||||
|
||||
static inline void gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
|
||||
gfn_t gfn)
|
||||
static inline bool __must_check gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
|
||||
union crste oldcrste, union crste newcrste,
|
||||
gfn_t gfn)
|
||||
{
|
||||
return _gmap_crstep_xchg(gmap, crstep, ne, gfn, true);
|
||||
return _gmap_crstep_xchg_atomic(gmap, crstep, oldcrste, newcrste, gfn, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -5520,9 +5520,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
|||
}
|
||||
#endif
|
||||
case KVM_S390_VCPU_FAULT: {
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
r = vcpu_dat_fault_handler(vcpu, arg, 0);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
gpa_t gaddr = arg;
|
||||
|
||||
scoped_guard(srcu, &vcpu->kvm->srcu) {
|
||||
r = vcpu_ucontrol_translate(vcpu, &gaddr);
|
||||
if (r)
|
||||
break;
|
||||
|
||||
r = kvm_s390_faultin_gfn_simple(vcpu, NULL, gpa_to_gfn(gaddr), false);
|
||||
if (r == PGM_ADDRESSING)
|
||||
r = -EFAULT;
|
||||
if (r <= 0)
|
||||
break;
|
||||
r = -EIO;
|
||||
KVM_BUG_ON(r, vcpu->kvm);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case KVM_ENABLE_CAP:
|
||||
|
|
|
|||
|
|
@ -1328,7 +1328,7 @@ static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
|
|||
static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
{
|
||||
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
|
||||
struct gmap *sg;
|
||||
struct gmap *sg = NULL;
|
||||
int rc = 0;
|
||||
|
||||
while (1) {
|
||||
|
|
@ -1368,6 +1368,8 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
|||
sg = gmap_put(sg);
|
||||
cond_resched();
|
||||
}
|
||||
if (sg)
|
||||
sg = gmap_put(sg);
|
||||
|
||||
if (rc == -EFAULT) {
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -3044,12 +3044,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
|
|||
bool prefetch = !fault || fault->prefetch;
|
||||
bool write_fault = fault && fault->write;
|
||||
|
||||
if (unlikely(is_noslot_pfn(pfn))) {
|
||||
vcpu->stat.pf_mmio_spte_created++;
|
||||
mark_mmio_spte(vcpu, sptep, gfn, pte_access);
|
||||
return RET_PF_EMULATE;
|
||||
}
|
||||
|
||||
if (is_shadow_present_pte(*sptep)) {
|
||||
if (prefetch && is_last_spte(*sptep, level) &&
|
||||
pfn == spte_to_pfn(*sptep))
|
||||
|
|
@ -3066,13 +3060,22 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
|
|||
child = spte_to_child_sp(pte);
|
||||
drop_parent_pte(vcpu->kvm, child, sptep);
|
||||
flush = true;
|
||||
} else if (WARN_ON_ONCE(pfn != spte_to_pfn(*sptep))) {
|
||||
} else if (pfn != spte_to_pfn(*sptep)) {
|
||||
WARN_ON_ONCE(vcpu->arch.mmu->root_role.direct);
|
||||
drop_spte(vcpu->kvm, sptep);
|
||||
flush = true;
|
||||
} else
|
||||
was_rmapped = 1;
|
||||
}
|
||||
|
||||
if (unlikely(is_noslot_pfn(pfn))) {
|
||||
vcpu->stat.pf_mmio_spte_created++;
|
||||
mark_mmio_spte(vcpu, sptep, gfn, pte_access);
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
|
||||
return RET_PF_EMULATE;
|
||||
}
|
||||
|
||||
wrprot = make_spte(vcpu, sp, slot, pte_access, gfn, pfn, *sptep, prefetch,
|
||||
false, host_writable, &spte);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue