diff --git a/arch/Kconfig b/arch/Kconfig index 31220f512b16..102ddbd4298e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1056,6 +1056,9 @@ config HAVE_IRQ_TIME_ACCOUNTING Archs need to ensure they use a high enough resolution clock to support irq time accounting and then call enable_sched_clock_irqtime(). +config HAVE_PV_STEAL_CLOCK_GEN + bool + config HAVE_MOVE_PUD bool help diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fa83c040ee2d..fc9b5b7016c3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1320,6 +1320,7 @@ config UACCESS_WITH_MEMCPY config PARAVIRT bool "Enable paravirtualization code" + select HAVE_PV_STEAL_CLOCK_GEN help This changes the kernel so it can modify itself when it is run under a hypervisor, potentially improving performance significantly diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h deleted file mode 100644 index 95d5b0d625cd..000000000000 --- a/arch/arm/include/asm/paravirt.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_ARM_PARAVIRT_H -#define _ASM_ARM_PARAVIRT_H - -#ifdef CONFIG_PARAVIRT -#include - -struct static_key; -extern struct static_key paravirt_steal_enabled; -extern struct static_key paravirt_steal_rq_enabled; - -u64 dummy_steal_clock(int cpu); - -DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); - -static inline u64 paravirt_steal_clock(int cpu) -{ - return static_call(pv_steal_clock)(cpu); -} -#endif - -#endif diff --git a/arch/arm/include/asm/paravirt_api_clock.h b/arch/arm/include/asm/paravirt_api_clock.h deleted file mode 100644 index 65ac7cee0dad..000000000000 --- a/arch/arm/include/asm/paravirt_api_clock.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index afc9de7ef9a1..b36cf0cfd4a7 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -83,7 +83,6 @@ AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o obj-$(CONFIG_VDSO) += vdso.o obj-$(CONFIG_EFI) += efi.o -obj-$(CONFIG_PARAVIRT) += paravirt.o obj-y += head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o diff --git a/arch/arm/kernel/paravirt.c b/arch/arm/kernel/paravirt.c deleted file mode 100644 index 7dd9806369fb..000000000000 --- a/arch/arm/kernel/paravirt.c +++ /dev/null @@ -1,23 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * - * Copyright (C) 2013 Citrix Systems - * - * Author: Stefano Stabellini - */ - -#include -#include -#include -#include -#include - -struct static_key paravirt_steal_enabled; -struct static_key paravirt_steal_rq_enabled; - -static u64 native_steal_clock(int cpu) -{ - return 0; -} - -DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1d5e86068bd6..100e75dc656e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1580,6 +1580,7 @@ config CC_HAVE_SHADOW_CALL_STACK config PARAVIRT bool "Enable paravirtualization code" + select HAVE_PV_STEAL_CLOCK_GEN help This changes the kernel so it can modify itself when it is run under a hypervisor, potentially improving performance significantly diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index 9aa193e0e8f2..cb037e742372 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -3,20 +3,6 @@ #define _ASM_ARM64_PARAVIRT_H #ifdef CONFIG_PARAVIRT -#include - -struct static_key; -extern struct static_key paravirt_steal_enabled; -extern struct static_key paravirt_steal_rq_enabled; - -u64 dummy_steal_clock(int cpu); - -DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); - -static inline u64 paravirt_steal_clock(int cpu) -{ - return static_call(pv_steal_clock)(cpu); -} int __init pv_time_init(void); diff --git a/arch/arm64/include/asm/paravirt_api_clock.h b/arch/arm64/include/asm/paravirt_api_clock.h deleted file mode 100644 index 65ac7cee0dad..000000000000 --- a/arch/arm64/include/asm/paravirt_api_clock.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index aa718d6a9274..572efb96b23f 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -19,21 +19,12 @@ #include #include #include +#include #include #include #include -struct static_key paravirt_steal_enabled; -struct static_key paravirt_steal_rq_enabled; - -static u64 native_steal_clock(int cpu) -{ - return 0; -} - -DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); - struct pv_time_stolen_time_region { struct pvclock_vcpu_stolen_time __rcu *kaddr; }; diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 730f34214519..19f08082a782 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -687,6 +687,7 @@ source "kernel/livepatch/Kconfig" config PARAVIRT bool "Enable paravirtualization code" depends on AS_HAS_LVZ_EXTENSION + select HAVE_PV_STEAL_CLOCK_GEN help This changes the kernel so it can modify itself when it is run under a hypervisor, potentially improving performance significantly diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h index 3f4323603e6a..0111f0ad5f73 100644 --- a/arch/loongarch/include/asm/paravirt.h +++ b/arch/loongarch/include/asm/paravirt.h @@ -4,19 +4,6 @@ #ifdef CONFIG_PARAVIRT -#include -struct static_key; -extern struct static_key paravirt_steal_enabled; -extern struct static_key paravirt_steal_rq_enabled; - -u64 dummy_steal_clock(int cpu); -DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); - -static inline u64 paravirt_steal_clock(int cpu) -{ - return static_call(pv_steal_clock)(cpu); -} - int __init pv_ipi_init(void); int __init pv_time_init(void); int __init pv_spinlock_init(void); diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h b/arch/loongarch/include/asm/paravirt_api_clock.h deleted file mode 100644 index 65ac7cee0dad..000000000000 --- a/arch/loongarch/include/asm/paravirt_api_clock.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index b1b51f920b23..c5e526098c0b 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -6,21 +6,13 @@ #include #include #include +#include #include static int has_steal_clock; -struct static_key paravirt_steal_enabled; -struct static_key paravirt_steal_rq_enabled; static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key); -static u64 native_steal_clock(int cpu) -{ - return 0; -} - -DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); - static bool steal_acc = true; static int __init parse_no_stealacc(char *arg) diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index b78b82d66057..92343a23ad15 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -23,9 +23,6 @@ static inline bool is_shared_processor(void) } #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING -extern struct static_key paravirt_steal_enabled; -extern struct static_key paravirt_steal_rq_enabled; - u64 pseries_paravirt_steal_clock(int cpu); static inline u64 paravirt_steal_clock(int cpu) diff --git a/arch/powerpc/include/asm/paravirt_api_clock.h b/arch/powerpc/include/asm/paravirt_api_clock.h deleted file mode 100644 index d25ca7ac57c7..000000000000 --- a/arch/powerpc/include/asm/paravirt_api_clock.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index b10a25325238..50b26ed8432d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -83,9 +84,6 @@ DEFINE_STATIC_KEY_FALSE(shared_processor); EXPORT_SYMBOL(shared_processor); #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING -struct static_key paravirt_steal_enabled; -struct static_key paravirt_steal_rq_enabled; - static bool steal_acc = true; static int __init parse_no_stealacc(char *arg) { diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6b39f37f769a..80a4cd557198 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -1111,6 +1111,7 @@ config COMPAT config PARAVIRT bool "Enable paravirtualization code" depends on RISCV_SBI + select HAVE_PV_STEAL_CLOCK_GEN help This changes the kernel so it can modify itself when it is run under a hypervisor, potentially improving performance significantly diff --git a/arch/riscv/include/asm/paravirt.h b/arch/riscv/include/asm/paravirt.h index c0abde70fc2c..c49c55b266f3 100644 --- a/arch/riscv/include/asm/paravirt.h +++ b/arch/riscv/include/asm/paravirt.h @@ -3,20 +3,6 @@ #define _ASM_RISCV_PARAVIRT_H #ifdef CONFIG_PARAVIRT -#include - -struct static_key; -extern struct static_key paravirt_steal_enabled; -extern struct static_key paravirt_steal_rq_enabled; - -u64 dummy_steal_clock(int cpu); - -DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); - -static inline u64 paravirt_steal_clock(int cpu) -{ - return static_call(pv_steal_clock)(cpu); -} int __init pv_time_init(void); diff --git a/arch/riscv/include/asm/paravirt_api_clock.h b/arch/riscv/include/asm/paravirt_api_clock.h deleted file mode 100644 index 65ac7cee0dad..000000000000 --- a/arch/riscv/include/asm/paravirt_api_clock.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c index fa6b0339a65d..5f56be79cd06 100644 --- a/arch/riscv/kernel/paravirt.c +++ b/arch/riscv/kernel/paravirt.c @@ -16,22 +16,13 @@ #include #include #include +#include #include #include #include #include -struct static_key paravirt_steal_enabled; -struct static_key paravirt_steal_rq_enabled; - -static u64 native_steal_clock(int cpu) -{ - return 0; -} - -DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); - static bool steal_acc = true; static int __init parse_no_stealacc(char *arg) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bf076e3609f4..66446220afe8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -813,6 +813,7 @@ if HYPERVISOR_GUEST config PARAVIRT bool "Enable paravirtualization code" depends on HAVE_STATIC_CALL + select HAVE_PV_STEAL_CLOCK_GEN help This changes the kernel so it can modify itself when it is run under a hypervisor, potentially improving performance significantly @@ -823,13 +824,6 @@ config PARAVIRT_XXL bool depends on X86_64 -config PARAVIRT_DEBUG - bool "paravirt-ops debugging" - depends on PARAVIRT && DEBUG_KERNEL - help - Enable to debug paravirt_ops internals. Specifically, BUG if - a paravirt_op is missing when it is called. - config PARAVIRT_SPINLOCKS bool "Paravirtualization layer for spinlocks" depends on PARAVIRT && SMP diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f9983a1907bf..42447b1e1dff 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 6e6c0a740837..4bd1e271bb22 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -37,7 +37,6 @@ #include #include #include -#include #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c index 81b006601370..210b494e4de0 100644 --- a/arch/x86/hyperv/hv_spinlock.c +++ b/arch/x86/hyperv/hv_spinlock.c @@ -13,7 +13,6 @@ #include #include -#include #include #include @@ -79,11 +78,11 @@ void __init hv_init_spinlocks(void) pr_info("PV spinlocks enabled\n"); __pv_init_lock_hash(); - pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; - pv_ops.lock.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); - pv_ops.lock.wait = hv_qlock_wait; - pv_ops.lock.kick = hv_qlock_kick; - pv_ops.lock.vcpu_is_preempted = PV_CALLEE_SAVE(hv_vcpu_is_preempted); + pv_ops_lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_ops_lock.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); + pv_ops_lock.wait = hv_qlock_wait; + pv_ops_lock.kick = hv_qlock_kick; + pv_ops_lock.vcpu_is_preempted = PV_CALLEE_SAVE(hv_vcpu_is_preempted); } static __init int hv_parse_nopvspin(char *arg) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a26e66d66444..9cd493d467d4 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -90,10 +90,6 @@ static inline bool apic_from_smp_config(void) /* * Basic functions accessing APICs. */ -#ifdef CONFIG_PARAVIRT -#include -#endif - static inline void native_apic_mem_write(u32 reg, u32 v) { volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index 585bdadba47d..decfaaf52326 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index eef4c3a5ba28..f64393e853ee 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/include/asm/paravirt-base.h b/arch/x86/include/asm/paravirt-base.h new file mode 100644 index 000000000000..982a0b93bc76 --- /dev/null +++ b/arch/x86/include/asm/paravirt-base.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_X86_PARAVIRT_BASE_H +#define _ASM_X86_PARAVIRT_BASE_H + +/* + * Wrapper type for pointers to code which uses the non-standard + * calling convention. See PV_CALL_SAVE_REGS_THUNK below. + */ +struct paravirt_callee_save { + void *func; +}; + +struct pv_info { +#ifdef CONFIG_PARAVIRT_XXL + u16 extra_user_64bit_cs; /* __USER_CS if none */ +#endif + const char *name; +}; + +void default_banner(void); +extern struct pv_info pv_info; +unsigned long paravirt_ret0(void); +#ifdef CONFIG_PARAVIRT_XXL +u64 _paravirt_ident_64(u64); +#endif +#define paravirt_nop ((void *)nop_func) + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void paravirt_set_cap(void); +#else +static inline void paravirt_set_cap(void) { } +#endif + +#endif /* _ASM_X86_PARAVIRT_BASE_H */ diff --git a/arch/x86/include/asm/paravirt-spinlock.h b/arch/x86/include/asm/paravirt-spinlock.h new file mode 100644 index 000000000000..7beffcb08ed6 --- /dev/null +++ b/arch/x86/include/asm/paravirt-spinlock.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_X86_PARAVIRT_SPINLOCK_H +#define _ASM_X86_PARAVIRT_SPINLOCK_H + +#include + +#ifdef CONFIG_SMP +#include +#endif + +struct qspinlock; + +struct pv_lock_ops { + void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); + struct paravirt_callee_save queued_spin_unlock; + + void (*wait)(u8 *ptr, u8 val); + void (*kick)(int cpu); + + struct paravirt_callee_save vcpu_is_preempted; +} __no_randomize_layout; + +extern struct pv_lock_ops pv_ops_lock; + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); +extern bool nopvspin; + +static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, + u32 val) +{ + PVOP_VCALL2(pv_ops_lock, queued_spin_lock_slowpath, lock, val); +} + +static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock) +{ + PVOP_ALT_VCALLEE1(pv_ops_lock, queued_spin_unlock, lock, + "movb $0, (%%" _ASM_ARG1 ")", + ALT_NOT(X86_FEATURE_PVUNLOCK)); +} + +static __always_inline bool pv_vcpu_is_preempted(long cpu) +{ + return PVOP_ALT_CALLEE1(bool, pv_ops_lock, vcpu_is_preempted, cpu, + "xor %%eax, %%eax", + ALT_NOT(X86_FEATURE_VCPUPREEMPT)); +} + +#define queued_spin_unlock queued_spin_unlock +/** + * queued_spin_unlock - release a queued spinlock + * @lock : Pointer to queued spinlock structure + * + * A smp_store_release() on the least-significant byte. + */ +static inline void native_queued_spin_unlock(struct qspinlock *lock) +{ + smp_store_release(&lock->locked, 0); +} + +static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + pv_queued_spin_lock_slowpath(lock, val); +} + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + kcsan_release(); + pv_queued_spin_unlock(lock); +} + +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(long cpu) +{ + return pv_vcpu_is_preempted(cpu); +} + +static __always_inline void pv_wait(u8 *ptr, u8 val) +{ + PVOP_VCALL2(pv_ops_lock, wait, ptr, val); +} + +static __always_inline void pv_kick(int cpu) +{ + PVOP_VCALL1(pv_ops_lock, kick, cpu); +} + +void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock); +bool __raw_callee_save___native_vcpu_is_preempted(long cpu); +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + +void __init native_pv_lock_init(void); +__visible void __native_queued_spin_unlock(struct qspinlock *lock); +bool pv_is_native_spin_unlock(void); +__visible bool __native_vcpu_is_preempted(long cpu); +bool pv_is_native_vcpu_is_preempted(void); + +/* + * virt_spin_lock_key - disables by default the virt_spin_lock() hijack. + * + * Native (and PV wanting native due to vCPU pinning) should keep this key + * disabled. Native does not touch the key. + * + * When in a guest then native_pv_lock_init() enables the key first and + * KVM/XEN might conditionally disable it later in the boot process again. + */ +DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); + +/* + * Shortcut for the queued_spin_lock_slowpath() function that allows + * virt to hijack it. + * + * Returns: + * true - lock has been negotiated, all done; + * false - queued_spin_lock_slowpath() will do its thing. + */ +#define virt_spin_lock virt_spin_lock +static inline bool virt_spin_lock(struct qspinlock *lock) +{ + int val; + + if (!static_branch_likely(&virt_spin_lock_key)) + return false; + + /* + * On hypervisors without PARAVIRT_SPINLOCKS support we fall + * back to a Test-and-Set spinlock, because fair locks have + * horrible lock 'holder' preemption issues. + */ + + __retry: + val = atomic_read(&lock->val); + + if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) { + cpu_relax(); + goto __retry; + } + + return true; +} + +#endif /* _ASM_X86_PARAVIRT_SPINLOCK_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index b5e59a7ba0d0..3d0b92a8a557 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -4,11 +4,10 @@ /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ -#include - #ifndef __ASSEMBLER__ -struct mm_struct; +#include #endif +#include #ifdef CONFIG_PARAVIRT #include @@ -16,51 +15,18 @@ struct mm_struct; #include #ifndef __ASSEMBLER__ -#include #include #include -#include #include -u64 dummy_steal_clock(int cpu); -u64 dummy_sched_clock(void); - -DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); -DECLARE_STATIC_CALL(pv_sched_clock, dummy_sched_clock); - -void paravirt_set_sched_clock(u64 (*func)(void)); - -static __always_inline u64 paravirt_sched_clock(void) -{ - return static_call(pv_sched_clock)(); -} - -struct static_key; -extern struct static_key paravirt_steal_enabled; -extern struct static_key paravirt_steal_rq_enabled; - -__visible void __native_queued_spin_unlock(struct qspinlock *lock); -bool pv_is_native_spin_unlock(void); -__visible bool __native_vcpu_is_preempted(long cpu); -bool pv_is_native_vcpu_is_preempted(void); - -static inline u64 paravirt_steal_clock(int cpu) -{ - return static_call(pv_steal_clock)(cpu); -} - -#ifdef CONFIG_PARAVIRT_SPINLOCKS -void __init paravirt_set_cap(void); -#endif - /* The paravirtualized I/O functions */ static inline void slow_down_io(void) { - PVOP_VCALL0(cpu.io_delay); + PVOP_VCALL0(pv_ops, cpu.io_delay); #ifdef REALLY_SLOW_IO - PVOP_VCALL0(cpu.io_delay); - PVOP_VCALL0(cpu.io_delay); - PVOP_VCALL0(cpu.io_delay); + PVOP_VCALL0(pv_ops, cpu.io_delay); + PVOP_VCALL0(pv_ops, cpu.io_delay); + PVOP_VCALL0(pv_ops, cpu.io_delay); #endif } @@ -72,57 +38,57 @@ void native_flush_tlb_multi(const struct cpumask *cpumask, static inline void __flush_tlb_local(void) { - PVOP_VCALL0(mmu.flush_tlb_user); + PVOP_VCALL0(pv_ops, mmu.flush_tlb_user); } static inline void __flush_tlb_global(void) { - PVOP_VCALL0(mmu.flush_tlb_kernel); + PVOP_VCALL0(pv_ops, mmu.flush_tlb_kernel); } static inline void __flush_tlb_one_user(unsigned long addr) { - PVOP_VCALL1(mmu.flush_tlb_one_user, addr); + PVOP_VCALL1(pv_ops, mmu.flush_tlb_one_user, addr); } static inline void __flush_tlb_multi(const struct cpumask *cpumask, const struct flush_tlb_info *info) { - PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info); + PVOP_VCALL2(pv_ops, mmu.flush_tlb_multi, cpumask, info); } static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) { - PVOP_VCALL1(mmu.exit_mmap, mm); + PVOP_VCALL1(pv_ops, mmu.exit_mmap, mm); } static inline void notify_page_enc_status_changed(unsigned long pfn, int npages, bool enc) { - PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc); + PVOP_VCALL3(pv_ops, mmu.notify_page_enc_status_changed, pfn, npages, enc); } static __always_inline void arch_safe_halt(void) { - PVOP_VCALL0(irq.safe_halt); + PVOP_VCALL0(pv_ops, irq.safe_halt); } static inline void halt(void) { - PVOP_VCALL0(irq.halt); + PVOP_VCALL0(pv_ops, irq.halt); } #ifdef CONFIG_PARAVIRT_XXL static inline void load_sp0(unsigned long sp0) { - PVOP_VCALL1(cpu.load_sp0, sp0); + PVOP_VCALL1(pv_ops, cpu.load_sp0, sp0); } /* The paravirtualized CPUID instruction. */ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { - PVOP_VCALL4(cpu.cpuid, eax, ebx, ecx, edx); + PVOP_VCALL4(pv_ops, cpu.cpuid, eax, ebx, ecx, edx); } /* @@ -130,69 +96,69 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, */ static __always_inline unsigned long paravirt_get_debugreg(int reg) { - return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg); + return PVOP_CALL1(unsigned long, pv_ops, cpu.get_debugreg, reg); } #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) static __always_inline void set_debugreg(unsigned long val, int reg) { - PVOP_VCALL2(cpu.set_debugreg, reg, val); + PVOP_VCALL2(pv_ops, cpu.set_debugreg, reg, val); } static inline unsigned long read_cr0(void) { - return PVOP_CALL0(unsigned long, cpu.read_cr0); + return PVOP_CALL0(unsigned long, pv_ops, cpu.read_cr0); } static inline void write_cr0(unsigned long x) { - PVOP_VCALL1(cpu.write_cr0, x); + PVOP_VCALL1(pv_ops, cpu.write_cr0, x); } static __always_inline unsigned long read_cr2(void) { - return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2, - "mov %%cr2, %%rax;", ALT_NOT_XEN); + return PVOP_ALT_CALLEE0(unsigned long, pv_ops, mmu.read_cr2, + "mov %%cr2, %%rax", ALT_NOT_XEN); } static __always_inline void write_cr2(unsigned long x) { - PVOP_VCALL1(mmu.write_cr2, x); + PVOP_VCALL1(pv_ops, mmu.write_cr2, x); } static inline unsigned long __read_cr3(void) { - return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3, - "mov %%cr3, %%rax;", ALT_NOT_XEN); + return PVOP_ALT_CALL0(unsigned long, pv_ops, mmu.read_cr3, + "mov %%cr3, %%rax", ALT_NOT_XEN); } static inline void write_cr3(unsigned long x) { - PVOP_ALT_VCALL1(mmu.write_cr3, x, "mov %%rdi, %%cr3", ALT_NOT_XEN); + PVOP_ALT_VCALL1(pv_ops, mmu.write_cr3, x, "mov %%rdi, %%cr3", ALT_NOT_XEN); } static inline void __write_cr4(unsigned long x) { - PVOP_VCALL1(cpu.write_cr4, x); + PVOP_VCALL1(pv_ops, cpu.write_cr4, x); } static inline u64 paravirt_read_msr(u32 msr) { - return PVOP_CALL1(u64, cpu.read_msr, msr); + return PVOP_CALL1(u64, pv_ops, cpu.read_msr, msr); } static inline void paravirt_write_msr(u32 msr, u64 val) { - PVOP_VCALL2(cpu.write_msr, msr, val); + PVOP_VCALL2(pv_ops, cpu.write_msr, msr, val); } static inline int paravirt_read_msr_safe(u32 msr, u64 *val) { - return PVOP_CALL2(int, cpu.read_msr_safe, msr, val); + return PVOP_CALL2(int, pv_ops, cpu.read_msr_safe, msr, val); } static inline int paravirt_write_msr_safe(u32 msr, u64 val) { - return PVOP_CALL2(int, cpu.write_msr_safe, msr, val); + return PVOP_CALL2(int, pv_ops, cpu.write_msr_safe, msr, val); } #define rdmsr(msr, val1, val2) \ @@ -239,154 +205,154 @@ static __always_inline int rdmsrq_safe(u32 msr, u64 *p) static __always_inline u64 rdpmc(int counter) { - return PVOP_CALL1(u64, cpu.read_pmc, counter); + return PVOP_CALL1(u64, pv_ops, cpu.read_pmc, counter); } static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) { - PVOP_VCALL2(cpu.alloc_ldt, ldt, entries); + PVOP_VCALL2(pv_ops, cpu.alloc_ldt, ldt, entries); } static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) { - PVOP_VCALL2(cpu.free_ldt, ldt, entries); + PVOP_VCALL2(pv_ops, cpu.free_ldt, ldt, entries); } static inline void load_TR_desc(void) { - PVOP_VCALL0(cpu.load_tr_desc); + PVOP_VCALL0(pv_ops, cpu.load_tr_desc); } static inline void load_gdt(const struct desc_ptr *dtr) { - PVOP_VCALL1(cpu.load_gdt, dtr); + PVOP_VCALL1(pv_ops, cpu.load_gdt, dtr); } static inline void load_idt(const struct desc_ptr *dtr) { - PVOP_VCALL1(cpu.load_idt, dtr); + PVOP_VCALL1(pv_ops, cpu.load_idt, dtr); } static inline void set_ldt(const void *addr, unsigned entries) { - PVOP_VCALL2(cpu.set_ldt, addr, entries); + PVOP_VCALL2(pv_ops, cpu.set_ldt, addr, entries); } static inline unsigned long paravirt_store_tr(void) { - return PVOP_CALL0(unsigned long, cpu.store_tr); + return PVOP_CALL0(unsigned long, pv_ops, cpu.store_tr); } #define store_tr(tr) ((tr) = paravirt_store_tr()) static inline void load_TLS(struct thread_struct *t, unsigned cpu) { - PVOP_VCALL2(cpu.load_tls, t, cpu); + PVOP_VCALL2(pv_ops, cpu.load_tls, t, cpu); } static inline void load_gs_index(unsigned int gs) { - PVOP_VCALL1(cpu.load_gs_index, gs); + PVOP_VCALL1(pv_ops, cpu.load_gs_index, gs); } static inline void write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) { - PVOP_VCALL3(cpu.write_ldt_entry, dt, entry, desc); + PVOP_VCALL3(pv_ops, cpu.write_ldt_entry, dt, entry, desc); } static inline void write_gdt_entry(struct desc_struct *dt, int entry, void *desc, int type) { - PVOP_VCALL4(cpu.write_gdt_entry, dt, entry, desc, type); + PVOP_VCALL4(pv_ops, cpu.write_gdt_entry, dt, entry, desc, type); } static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) { - PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); + PVOP_VCALL3(pv_ops, cpu.write_idt_entry, dt, entry, g); } #ifdef CONFIG_X86_IOPL_IOPERM static inline void tss_invalidate_io_bitmap(void) { - PVOP_VCALL0(cpu.invalidate_io_bitmap); + PVOP_VCALL0(pv_ops, cpu.invalidate_io_bitmap); } static inline void tss_update_io_bitmap(void) { - PVOP_VCALL0(cpu.update_io_bitmap); + PVOP_VCALL0(pv_ops, cpu.update_io_bitmap); } #endif static inline void paravirt_enter_mmap(struct mm_struct *next) { - PVOP_VCALL1(mmu.enter_mmap, next); + PVOP_VCALL1(pv_ops, mmu.enter_mmap, next); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) { - return PVOP_CALL1(int, mmu.pgd_alloc, mm); + return PVOP_CALL1(int, pv_ops, mmu.pgd_alloc, mm); } static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) { - PVOP_VCALL2(mmu.pgd_free, mm, pgd); + PVOP_VCALL2(pv_ops, mmu.pgd_free, mm, pgd); } static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) { - PVOP_VCALL2(mmu.alloc_pte, mm, pfn); + PVOP_VCALL2(pv_ops, mmu.alloc_pte, mm, pfn); } static inline void paravirt_release_pte(unsigned long pfn) { - PVOP_VCALL1(mmu.release_pte, pfn); + PVOP_VCALL1(pv_ops, mmu.release_pte, pfn); } static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) { - PVOP_VCALL2(mmu.alloc_pmd, mm, pfn); + PVOP_VCALL2(pv_ops, mmu.alloc_pmd, mm, pfn); } static inline void paravirt_release_pmd(unsigned long pfn) { - PVOP_VCALL1(mmu.release_pmd, pfn); + PVOP_VCALL1(pv_ops, mmu.release_pmd, pfn); } static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) { - PVOP_VCALL2(mmu.alloc_pud, mm, pfn); + PVOP_VCALL2(pv_ops, mmu.alloc_pud, mm, pfn); } static inline void paravirt_release_pud(unsigned long pfn) { - PVOP_VCALL1(mmu.release_pud, pfn); + PVOP_VCALL1(pv_ops, mmu.release_pud, pfn); } static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn) { - PVOP_VCALL2(mmu.alloc_p4d, mm, pfn); + PVOP_VCALL2(pv_ops, mmu.alloc_p4d, mm, pfn); } static inline void paravirt_release_p4d(unsigned long pfn) { - PVOP_VCALL1(mmu.release_p4d, pfn); + PVOP_VCALL1(pv_ops, mmu.release_p4d, pfn); } static inline pte_t __pte(pteval_t val) { - return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val, + return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, pv_ops, mmu.make_pte, val, "mov %%rdi, %%rax", ALT_NOT_XEN) }; } static inline pteval_t pte_val(pte_t pte) { - return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte, + return PVOP_ALT_CALLEE1(pteval_t, pv_ops, mmu.pte_val, pte.pte, "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline pgd_t __pgd(pgdval_t val) { - return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val, + return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, pv_ops, mmu.make_pgd, val, "mov %%rdi, %%rax", ALT_NOT_XEN) }; } static inline pgdval_t pgd_val(pgd_t pgd) { - return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd, + return PVOP_ALT_CALLEE1(pgdval_t, pv_ops, mmu.pgd_val, pgd.pgd, "mov %%rdi, %%rax", ALT_NOT_XEN); } @@ -396,7 +362,7 @@ static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned { pteval_t ret; - ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, vma, addr, ptep); + ret = PVOP_CALL3(pteval_t, pv_ops, mmu.ptep_modify_prot_start, vma, addr, ptep); return (pte_t) { .pte = ret }; } @@ -405,41 +371,41 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned pte_t *ptep, pte_t old_pte, pte_t pte) { - PVOP_VCALL4(mmu.ptep_modify_prot_commit, vma, addr, ptep, pte.pte); + PVOP_VCALL4(pv_ops, mmu.ptep_modify_prot_commit, vma, addr, ptep, pte.pte); } static inline void set_pte(pte_t *ptep, pte_t pte) { - PVOP_VCALL2(mmu.set_pte, ptep, pte.pte); + PVOP_VCALL2(pv_ops, mmu.set_pte, ptep, pte.pte); } static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { - PVOP_VCALL2(mmu.set_pmd, pmdp, native_pmd_val(pmd)); + PVOP_VCALL2(pv_ops, mmu.set_pmd, pmdp, native_pmd_val(pmd)); } static inline pmd_t __pmd(pmdval_t val) { - return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val, + return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, pv_ops, mmu.make_pmd, val, "mov %%rdi, %%rax", ALT_NOT_XEN) }; } static inline pmdval_t pmd_val(pmd_t pmd) { - return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd, + return PVOP_ALT_CALLEE1(pmdval_t, pv_ops, mmu.pmd_val, pmd.pmd, "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline void set_pud(pud_t *pudp, pud_t pud) { - PVOP_VCALL2(mmu.set_pud, pudp, native_pud_val(pud)); + PVOP_VCALL2(pv_ops, mmu.set_pud, pudp, native_pud_val(pud)); } static inline pud_t __pud(pudval_t val) { pudval_t ret; - ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val, + ret = PVOP_ALT_CALLEE1(pudval_t, pv_ops, mmu.make_pud, val, "mov %%rdi, %%rax", ALT_NOT_XEN); return (pud_t) { ret }; @@ -447,7 +413,7 @@ static inline pud_t __pud(pudval_t val) static inline pudval_t pud_val(pud_t pud) { - return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud, + return PVOP_ALT_CALLEE1(pudval_t, pv_ops, mmu.pud_val, pud.pud, "mov %%rdi, %%rax", ALT_NOT_XEN); } @@ -460,12 +426,12 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { p4dval_t val = native_p4d_val(p4d); - PVOP_VCALL2(mmu.set_p4d, p4dp, val); + PVOP_VCALL2(pv_ops, mmu.set_p4d, p4dp, val); } static inline p4d_t __p4d(p4dval_t val) { - p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val, + p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, pv_ops, mmu.make_p4d, val, "mov %%rdi, %%rax", ALT_NOT_XEN); return (p4d_t) { ret }; @@ -473,13 +439,13 @@ static inline p4d_t __p4d(p4dval_t val) static inline p4dval_t p4d_val(p4d_t p4d) { - return PVOP_ALT_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d, + return PVOP_ALT_CALLEE1(p4dval_t, pv_ops, mmu.p4d_val, p4d.p4d, "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) { - PVOP_VCALL2(mmu.set_pgd, pgdp, native_pgd_val(pgd)); + PVOP_VCALL2(pv_ops, mmu.set_pgd, pgdp, native_pgd_val(pgd)); } #define set_pgd(pgdp, pgdval) do { \ @@ -518,28 +484,28 @@ static inline void pmd_clear(pmd_t *pmdp) #define __HAVE_ARCH_START_CONTEXT_SWITCH static inline void arch_start_context_switch(struct task_struct *prev) { - PVOP_VCALL1(cpu.start_context_switch, prev); + PVOP_VCALL1(pv_ops, cpu.start_context_switch, prev); } static inline void arch_end_context_switch(struct task_struct *next) { - PVOP_VCALL1(cpu.end_context_switch, next); + PVOP_VCALL1(pv_ops, cpu.end_context_switch, next); } #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) { - PVOP_VCALL0(mmu.lazy_mode.enter); + PVOP_VCALL0(pv_ops, mmu.lazy_mode.enter); } static inline void arch_leave_lazy_mmu_mode(void) { - PVOP_VCALL0(mmu.lazy_mode.leave); + PVOP_VCALL0(pv_ops, mmu.lazy_mode.leave); } static inline void arch_flush_lazy_mmu_mode(void) { - PVOP_VCALL0(mmu.lazy_mode.flush); + PVOP_VCALL0(pv_ops, mmu.lazy_mode.flush); } static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, @@ -547,128 +513,21 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, { pv_ops.mmu.set_fixmap(idx, phys, flags); } -#endif -#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) - -static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, - u32 val) -{ - PVOP_VCALL2(lock.queued_spin_lock_slowpath, lock, val); -} - -static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock) -{ - PVOP_ALT_VCALLEE1(lock.queued_spin_unlock, lock, - "movb $0, (%%" _ASM_ARG1 ");", - ALT_NOT(X86_FEATURE_PVUNLOCK)); -} - -static __always_inline void pv_wait(u8 *ptr, u8 val) -{ - PVOP_VCALL2(lock.wait, ptr, val); -} - -static __always_inline void pv_kick(int cpu) -{ - PVOP_VCALL1(lock.kick, cpu); -} - -static __always_inline bool pv_vcpu_is_preempted(long cpu) -{ - return PVOP_ALT_CALLEE1(bool, lock.vcpu_is_preempted, cpu, - "xor %%" _ASM_AX ", %%" _ASM_AX ";", - ALT_NOT(X86_FEATURE_VCPUPREEMPT)); -} - -void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock); -bool __raw_callee_save___native_vcpu_is_preempted(long cpu); - -#endif /* SMP && PARAVIRT_SPINLOCKS */ - -#ifdef CONFIG_X86_32 -/* save and restore all caller-save registers, except return value */ -#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" -#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" -#else -/* save and restore all caller-save registers, except return value */ -#define PV_SAVE_ALL_CALLER_REGS \ - "push %rcx;" \ - "push %rdx;" \ - "push %rsi;" \ - "push %rdi;" \ - "push %r8;" \ - "push %r9;" \ - "push %r10;" \ - "push %r11;" -#define PV_RESTORE_ALL_CALLER_REGS \ - "pop %r11;" \ - "pop %r10;" \ - "pop %r9;" \ - "pop %r8;" \ - "pop %rdi;" \ - "pop %rsi;" \ - "pop %rdx;" \ - "pop %rcx;" -#endif - -/* - * Generate a thunk around a function which saves all caller-save - * registers except for the return value. This allows C functions to - * be called from assembler code where fewer than normal registers are - * available. It may also help code generation around calls from C - * code if the common case doesn't use many registers. - * - * When a callee is wrapped in a thunk, the caller can assume that all - * arg regs and all scratch registers are preserved across the - * call. The return value in rax/eax will not be saved, even for void - * functions. - */ -#define PV_THUNK_NAME(func) "__raw_callee_save_" #func -#define __PV_CALLEE_SAVE_REGS_THUNK(func, section) \ - extern typeof(func) __raw_callee_save_##func; \ - \ - asm(".pushsection " section ", \"ax\";" \ - ".globl " PV_THUNK_NAME(func) ";" \ - ".type " PV_THUNK_NAME(func) ", @function;" \ - ASM_FUNC_ALIGN \ - PV_THUNK_NAME(func) ":" \ - ASM_ENDBR \ - FRAME_BEGIN \ - PV_SAVE_ALL_CALLER_REGS \ - "call " #func ";" \ - PV_RESTORE_ALL_CALLER_REGS \ - FRAME_END \ - ASM_RET \ - ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \ - ".popsection") - -#define PV_CALLEE_SAVE_REGS_THUNK(func) \ - __PV_CALLEE_SAVE_REGS_THUNK(func, ".text") - -/* Get a reference to a callee-save function */ -#define PV_CALLEE_SAVE(func) \ - ((struct paravirt_callee_save) { __raw_callee_save_##func }) - -/* Promise that "func" already uses the right calling convention */ -#define __PV_IS_CALLEE_SAVE(func) \ - ((struct paravirt_callee_save) { func }) - -#ifdef CONFIG_PARAVIRT_XXL static __always_inline unsigned long arch_local_save_flags(void) { - return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;", + return PVOP_ALT_CALLEE0(unsigned long, pv_ops, irq.save_fl, "pushf; pop %%rax", ALT_NOT_XEN); } static __always_inline void arch_local_irq_disable(void) { - PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT_XEN); + PVOP_ALT_VCALLEE0(pv_ops, irq.irq_disable, "cli", ALT_NOT_XEN); } static __always_inline void arch_local_irq_enable(void) { - PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT_XEN); + PVOP_ALT_VCALLEE0(pv_ops, irq.irq_enable, "sti", ALT_NOT_XEN); } static __always_inline unsigned long arch_local_irq_save(void) @@ -681,25 +540,6 @@ static __always_inline unsigned long arch_local_irq_save(void) } #endif - -/* Make sure as little as possible of this mess escapes. */ -#undef PARAVIRT_CALL -#undef __PVOP_CALL -#undef __PVOP_VCALL -#undef PVOP_VCALL0 -#undef PVOP_CALL0 -#undef PVOP_VCALL1 -#undef PVOP_CALL1 -#undef PVOP_VCALL2 -#undef PVOP_CALL2 -#undef PVOP_VCALL3 -#undef PVOP_CALL3 -#undef PVOP_VCALL4 -#undef PVOP_CALL4 - -extern void default_banner(void); -void native_pv_lock_init(void) __init; - #else /* __ASSEMBLER__ */ #ifdef CONFIG_X86_64 @@ -713,9 +553,9 @@ void native_pv_lock_init(void) __init; call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); .endm -#define SAVE_FLAGS ALTERNATIVE_2 "PARA_IRQ_save_fl;", \ - "ALT_CALL_INSTR;", ALT_CALL_ALWAYS, \ - "pushf; pop %rax;", ALT_NOT_XEN +#define SAVE_FLAGS ALTERNATIVE_2 "PARA_IRQ_save_fl", \ + "ALT_CALL_INSTR", ALT_CALL_ALWAYS, \ + "pushf; pop %rax", ALT_NOT_XEN #endif #endif /* CONFIG_PARAVIRT_XXL */ #endif /* CONFIG_X86_64 */ @@ -723,12 +563,6 @@ void native_pv_lock_init(void) __init; #endif /* __ASSEMBLER__ */ #else /* CONFIG_PARAVIRT */ # define default_banner x86_init_noop - -#ifndef __ASSEMBLER__ -static inline void native_pv_lock_init(void) -{ -} -#endif #endif /* !CONFIG_PARAVIRT */ #ifndef __ASSEMBLER__ @@ -744,10 +578,5 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) } #endif -#ifndef CONFIG_PARAVIRT_SPINLOCKS -static inline void paravirt_set_cap(void) -{ -} -#endif #endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PARAVIRT_H */ diff --git a/arch/x86/include/asm/paravirt_api_clock.h b/arch/x86/include/asm/paravirt_api_clock.h deleted file mode 100644 index 65ac7cee0dad..000000000000 --- a/arch/x86/include/asm/paravirt_api_clock.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 3502939415ad..9bcf6bce88f6 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -7,39 +7,18 @@ #ifndef __ASSEMBLER__ #include +#include #include #include #include -struct page; struct thread_struct; -struct desc_ptr; -struct tss_struct; struct mm_struct; -struct desc_struct; struct task_struct; struct cpumask; struct flush_tlb_info; -struct mmu_gather; struct vm_area_struct; -/* - * Wrapper type for pointers to code which uses the non-standard - * calling convention. See PV_CALL_SAVE_REGS_THUNK below. - */ -struct paravirt_callee_save { - void *func; -}; - -/* general info */ -struct pv_info { -#ifdef CONFIG_PARAVIRT_XXL - u16 extra_user_64bit_cs; /* __USER_CS if none */ -#endif - - const char *name; -}; - #ifdef CONFIG_PARAVIRT_XXL struct pv_lazy_ops { /* Set deferred update mode, used for batching operations. */ @@ -205,23 +184,6 @@ struct pv_mmu_ops { #endif } __no_randomize_layout; -struct arch_spinlock; -#ifdef CONFIG_SMP -#include -#endif - -struct qspinlock; - -struct pv_lock_ops { - void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); - struct paravirt_callee_save queued_spin_unlock; - - void (*wait)(u8 *ptr, u8 val); - void (*kick)(int cpu); - - struct paravirt_callee_save vcpu_is_preempted; -} __no_randomize_layout; - /* This contains all the paravirt structures: we get a convenient * number for each function using the offset which we use to indicate * what to patch. */ @@ -229,13 +191,11 @@ struct paravirt_patch_template { struct pv_cpu_ops cpu; struct pv_irq_ops irq; struct pv_mmu_ops mmu; - struct pv_lock_ops lock; } __no_randomize_layout; -extern struct pv_info pv_info; extern struct paravirt_patch_template pv_ops; -#define paravirt_ptr(op) [paravirt_opptr] "m" (pv_ops.op) +#define paravirt_ptr(array, op) [paravirt_opptr] "m" (array.op) /* * This generates an indirect call based on the operation type number. @@ -250,7 +210,7 @@ extern struct paravirt_patch_template pv_ops; */ #define PARAVIRT_CALL \ ANNOTATE_RETPOLINE_SAFE "\n\t" \ - "call *%[paravirt_opptr];" + "call *%[paravirt_opptr]" /* * These macros are intended to wrap calls through one of the paravirt @@ -360,12 +320,6 @@ extern struct paravirt_patch_template pv_ops; #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_PARAVIRT_DEBUG -#define PVOP_TEST_NULL(op) BUG_ON(pv_ops.op == NULL) -#else -#define PVOP_TEST_NULL(op) ((void)pv_ops.op) -#endif - #define PVOP_RETVAL(rettype) \ ({ unsigned long __mask = ~0UL; \ BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long)); \ @@ -391,140 +345,195 @@ extern struct paravirt_patch_template pv_ops; * feature is not active, the direct call is used as above via the * ALT_FLAG_DIRECT_CALL special case and the "always on" feature. */ -#define ____PVOP_CALL(ret, op, call_clbr, extra_clbr, ...) \ +#define ____PVOP_CALL(ret, array, op, call_clbr, extra_clbr, ...) \ ({ \ PVOP_CALL_ARGS; \ - PVOP_TEST_NULL(op); \ asm volatile(ALTERNATIVE(PARAVIRT_CALL, ALT_CALL_INSTR, \ ALT_CALL_ALWAYS) \ : call_clbr, ASM_CALL_CONSTRAINT \ - : paravirt_ptr(op), \ + : paravirt_ptr(array, op), \ ##__VA_ARGS__ \ : "memory", "cc" extra_clbr); \ ret; \ }) -#define ____PVOP_ALT_CALL(ret, op, alt, cond, call_clbr, \ +#define ____PVOP_ALT_CALL(ret, array, op, alt, cond, call_clbr, \ extra_clbr, ...) \ ({ \ PVOP_CALL_ARGS; \ - PVOP_TEST_NULL(op); \ asm volatile(ALTERNATIVE_2(PARAVIRT_CALL, \ ALT_CALL_INSTR, ALT_CALL_ALWAYS, \ alt, cond) \ : call_clbr, ASM_CALL_CONSTRAINT \ - : paravirt_ptr(op), \ + : paravirt_ptr(array, op), \ ##__VA_ARGS__ \ : "memory", "cc" extra_clbr); \ ret; \ }) -#define __PVOP_CALL(rettype, op, ...) \ - ____PVOP_CALL(PVOP_RETVAL(rettype), op, \ +#define __PVOP_CALL(rettype, array, op, ...) \ + ____PVOP_CALL(PVOP_RETVAL(rettype), array, op, \ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__) -#define __PVOP_ALT_CALL(rettype, op, alt, cond, ...) \ - ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, \ +#define __PVOP_ALT_CALL(rettype, array, op, alt, cond, ...) \ + ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), array, op, alt, cond, \ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, \ ##__VA_ARGS__) -#define __PVOP_CALLEESAVE(rettype, op, ...) \ - ____PVOP_CALL(PVOP_RETVAL(rettype), op.func, \ +#define __PVOP_CALLEESAVE(rettype, array, op, ...) \ + ____PVOP_CALL(PVOP_RETVAL(rettype), array, op.func, \ PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__) -#define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...) \ - ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond, \ +#define __PVOP_ALT_CALLEESAVE(rettype, array, op, alt, cond, ...) \ + ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), array, op.func, alt, cond, \ PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__) -#define __PVOP_VCALL(op, ...) \ - (void)____PVOP_CALL(, op, PVOP_VCALL_CLOBBERS, \ +#define __PVOP_VCALL(array, op, ...) \ + (void)____PVOP_CALL(, array, op, PVOP_VCALL_CLOBBERS, \ VEXTRA_CLOBBERS, ##__VA_ARGS__) -#define __PVOP_ALT_VCALL(op, alt, cond, ...) \ - (void)____PVOP_ALT_CALL(, op, alt, cond, \ +#define __PVOP_ALT_VCALL(array, op, alt, cond, ...) \ + (void)____PVOP_ALT_CALL(, array, op, alt, cond, \ PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS, \ ##__VA_ARGS__) -#define __PVOP_VCALLEESAVE(op, ...) \ - (void)____PVOP_CALL(, op.func, \ +#define __PVOP_VCALLEESAVE(array, op, ...) \ + (void)____PVOP_CALL(, array, op.func, \ PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__) -#define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...) \ - (void)____PVOP_ALT_CALL(, op.func, alt, cond, \ +#define __PVOP_ALT_VCALLEESAVE(array, op, alt, cond, ...) \ + (void)____PVOP_ALT_CALL(, array, op.func, alt, cond, \ PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__) -#define PVOP_CALL0(rettype, op) \ - __PVOP_CALL(rettype, op) -#define PVOP_VCALL0(op) \ - __PVOP_VCALL(op) -#define PVOP_ALT_CALL0(rettype, op, alt, cond) \ - __PVOP_ALT_CALL(rettype, op, alt, cond) -#define PVOP_ALT_VCALL0(op, alt, cond) \ - __PVOP_ALT_VCALL(op, alt, cond) +#define PVOP_CALL0(rettype, array, op) \ + __PVOP_CALL(rettype, array, op) +#define PVOP_VCALL0(array, op) \ + __PVOP_VCALL(array, op) +#define PVOP_ALT_CALL0(rettype, array, op, alt, cond) \ + __PVOP_ALT_CALL(rettype, array, op, alt, cond) +#define PVOP_ALT_VCALL0(array, op, alt, cond) \ + __PVOP_ALT_VCALL(array, op, alt, cond) -#define PVOP_CALLEE0(rettype, op) \ - __PVOP_CALLEESAVE(rettype, op) -#define PVOP_VCALLEE0(op) \ - __PVOP_VCALLEESAVE(op) -#define PVOP_ALT_CALLEE0(rettype, op, alt, cond) \ - __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond) -#define PVOP_ALT_VCALLEE0(op, alt, cond) \ - __PVOP_ALT_VCALLEESAVE(op, alt, cond) +#define PVOP_CALLEE0(rettype, array, op) \ + __PVOP_CALLEESAVE(rettype, array, op) +#define PVOP_VCALLEE0(array, op) \ + __PVOP_VCALLEESAVE(array, op) +#define PVOP_ALT_CALLEE0(rettype, array, op, alt, cond) \ + __PVOP_ALT_CALLEESAVE(rettype, array, op, alt, cond) +#define PVOP_ALT_VCALLEE0(array, op, alt, cond) \ + __PVOP_ALT_VCALLEESAVE(array, op, alt, cond) -#define PVOP_CALL1(rettype, op, arg1) \ - __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1)) -#define PVOP_VCALL1(op, arg1) \ - __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1)) -#define PVOP_ALT_VCALL1(op, arg1, alt, cond) \ - __PVOP_ALT_VCALL(op, alt, cond, PVOP_CALL_ARG1(arg1)) +#define PVOP_CALL1(rettype, array, op, arg1) \ + __PVOP_CALL(rettype, array, op, PVOP_CALL_ARG1(arg1)) +#define PVOP_VCALL1(array, op, arg1) \ + __PVOP_VCALL(array, op, PVOP_CALL_ARG1(arg1)) +#define PVOP_ALT_VCALL1(array, op, arg1, alt, cond) \ + __PVOP_ALT_VCALL(array, op, alt, cond, PVOP_CALL_ARG1(arg1)) -#define PVOP_CALLEE1(rettype, op, arg1) \ - __PVOP_CALLEESAVE(rettype, op, PVOP_CALL_ARG1(arg1)) -#define PVOP_VCALLEE1(op, arg1) \ - __PVOP_VCALLEESAVE(op, PVOP_CALL_ARG1(arg1)) -#define PVOP_ALT_CALLEE1(rettype, op, arg1, alt, cond) \ - __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, PVOP_CALL_ARG1(arg1)) -#define PVOP_ALT_VCALLEE1(op, arg1, alt, cond) \ - __PVOP_ALT_VCALLEESAVE(op, alt, cond, PVOP_CALL_ARG1(arg1)) +#define PVOP_CALLEE1(rettype, array, op, arg1) \ + __PVOP_CALLEESAVE(rettype, array, op, PVOP_CALL_ARG1(arg1)) +#define PVOP_VCALLEE1(array, op, arg1) \ + __PVOP_VCALLEESAVE(array, op, PVOP_CALL_ARG1(arg1)) +#define PVOP_ALT_CALLEE1(rettype, array, op, arg1, alt, cond) \ + __PVOP_ALT_CALLEESAVE(rettype, array, op, alt, cond, PVOP_CALL_ARG1(arg1)) +#define PVOP_ALT_VCALLEE1(array, op, arg1, alt, cond) \ + __PVOP_ALT_VCALLEESAVE(array, op, alt, cond, PVOP_CALL_ARG1(arg1)) -#define PVOP_CALL2(rettype, op, arg1, arg2) \ - __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2)) -#define PVOP_VCALL2(op, arg1, arg2) \ - __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2)) +#define PVOP_CALL2(rettype, array, op, arg1, arg2) \ + __PVOP_CALL(rettype, array, op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2)) +#define PVOP_VCALL2(array, op, arg1, arg2) \ + __PVOP_VCALL(array, op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2)) -#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ - __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), \ +#define PVOP_CALL3(rettype, array, op, arg1, arg2, arg3) \ + __PVOP_CALL(rettype, array, op, PVOP_CALL_ARG1(arg1), \ PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) -#define PVOP_VCALL3(op, arg1, arg2, arg3) \ - __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), \ +#define PVOP_VCALL3(array, op, arg1, arg2, arg3) \ + __PVOP_VCALL(array, op, PVOP_CALL_ARG1(arg1), \ PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) -#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ - __PVOP_CALL(rettype, op, \ +#define PVOP_CALL4(rettype, array, op, arg1, arg2, arg3, arg4) \ + __PVOP_CALL(rettype, array, op, \ PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) -#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ - __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ +#define PVOP_VCALL4(array, op, arg1, arg2, arg3, arg4) \ + __PVOP_VCALL(array, op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) -unsigned long paravirt_ret0(void); -#ifdef CONFIG_PARAVIRT_XXL -u64 _paravirt_ident_64(u64); -unsigned long pv_native_save_fl(void); -void pv_native_irq_disable(void); -void pv_native_irq_enable(void); -unsigned long pv_native_read_cr2(void); -#endif - -#define paravirt_nop ((void *)nop_func) - #endif /* __ASSEMBLER__ */ #define ALT_NOT_XEN ALT_NOT(X86_FEATURE_XENPV) +#ifdef CONFIG_X86_32 +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" +#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" +#else +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS \ + "push %rcx;" \ + "push %rdx;" \ + "push %rsi;" \ + "push %rdi;" \ + "push %r8;" \ + "push %r9;" \ + "push %r10;" \ + "push %r11;" +#define PV_RESTORE_ALL_CALLER_REGS \ + "pop %r11;" \ + "pop %r10;" \ + "pop %r9;" \ + "pop %r8;" \ + "pop %rdi;" \ + "pop %rsi;" \ + "pop %rdx;" \ + "pop %rcx;" +#endif + +/* + * Generate a thunk around a function which saves all caller-save + * registers except for the return value. This allows C functions to + * be called from assembler code where fewer than normal registers are + * available. It may also help code generation around calls from C + * code if the common case doesn't use many registers. + * + * When a callee is wrapped in a thunk, the caller can assume that all + * arg regs and all scratch registers are preserved across the + * call. The return value in rax/eax will not be saved, even for void + * functions. + */ +#define PV_THUNK_NAME(func) "__raw_callee_save_" #func +#define __PV_CALLEE_SAVE_REGS_THUNK(func, section) \ + extern typeof(func) __raw_callee_save_##func; \ + \ + asm(".pushsection " section ", \"ax\";" \ + ".globl " PV_THUNK_NAME(func) ";" \ + ".type " PV_THUNK_NAME(func) ", @function;" \ + ASM_FUNC_ALIGN \ + PV_THUNK_NAME(func) ":" \ + ASM_ENDBR \ + FRAME_BEGIN \ + PV_SAVE_ALL_CALLER_REGS \ + "call " #func ";" \ + PV_RESTORE_ALL_CALLER_REGS \ + FRAME_END \ + ASM_RET \ + ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \ + ".popsection") + +#define PV_CALLEE_SAVE_REGS_THUNK(func) \ + __PV_CALLEE_SAVE_REGS_THUNK(func, ".text") + +/* Get a reference to a callee-save function */ +#define PV_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { __raw_callee_save_##func }) + +/* Promise that "func" already uses the right calling convention */ +#define __PV_IS_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { func }) + #endif /* CONFIG_PARAVIRT */ #endif /* _ASM_X86_PARAVIRT_TYPES_H */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index b612cc57a4d3..acea0cfa2460 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -16,7 +16,6 @@ #ifndef __ASSEMBLER__ #include #include -#include #include #include diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 35d062a2e304..7bb7bd90355d 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -172,7 +172,7 @@ struct pt_regs { #endif /* !__i386__ */ #ifdef CONFIG_PARAVIRT -#include +#include #endif #include diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 68da67df304d..25a1919542d9 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -7,6 +7,9 @@ #include #include #include +#ifdef CONFIG_PARAVIRT +#include +#endif #define _Q_PENDING_LOOPS (1 << 9) @@ -27,90 +30,10 @@ static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lo return val; } -#ifdef CONFIG_PARAVIRT_SPINLOCKS -extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __pv_init_lock_hash(void); -extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); -extern bool nopvspin; - -#define queued_spin_unlock queued_spin_unlock -/** - * queued_spin_unlock - release a queued spinlock - * @lock : Pointer to queued spinlock structure - * - * A smp_store_release() on the least-significant byte. - */ -static inline void native_queued_spin_unlock(struct qspinlock *lock) -{ - smp_store_release(&lock->locked, 0); -} - -static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) -{ - pv_queued_spin_lock_slowpath(lock, val); -} - -static inline void queued_spin_unlock(struct qspinlock *lock) -{ - kcsan_release(); - pv_queued_spin_unlock(lock); -} - -#define vcpu_is_preempted vcpu_is_preempted -static inline bool vcpu_is_preempted(long cpu) -{ - return pv_vcpu_is_preempted(cpu); -} +#ifndef CONFIG_PARAVIRT +static inline void native_pv_lock_init(void) { } #endif -#ifdef CONFIG_PARAVIRT -/* - * virt_spin_lock_key - disables by default the virt_spin_lock() hijack. - * - * Native (and PV wanting native due to vCPU pinning) should keep this key - * disabled. Native does not touch the key. - * - * When in a guest then native_pv_lock_init() enables the key first and - * KVM/XEN might conditionally disable it later in the boot process again. - */ -DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); - -/* - * Shortcut for the queued_spin_lock_slowpath() function that allows - * virt to hijack it. - * - * Returns: - * true - lock has been negotiated, all done; - * false - queued_spin_lock_slowpath() will do its thing. - */ -#define virt_spin_lock virt_spin_lock -static inline bool virt_spin_lock(struct qspinlock *lock) -{ - int val; - - if (!static_branch_likely(&virt_spin_lock_key)) - return false; - - /* - * On hypervisors without PARAVIRT_SPINLOCKS support we fall - * back to a Test-and-Set spinlock, because fair locks have - * horrible lock 'holder' preemption issues. - */ - - __retry: - val = atomic_read(&lock->val); - - if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) { - cpu_relax(); - goto __retry; - } - - return true; -} - -#endif /* CONFIG_PARAVIRT */ - #include #endif /* _ASM_X86_QSPINLOCK_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 5b6bc7016c22..934632b78d09 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -7,7 +7,6 @@ #include #include #include -#include #include /* diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 23baf8c9b34c..fda18bcb19b4 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -12,6 +12,7 @@ extern void recalibrate_cpu_khz(void); extern int no_timer_check; extern bool using_native_sched_clock(void); +void paravirt_set_sched_clock(u64 (*func)(void)); /* * We use the full linear equation: f(x) = a + b*x, in order to allow diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 5114bf50c911..5a3cdc439e38 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -303,10 +303,6 @@ static inline void mm_clear_asid_transition(struct mm_struct *mm) { } static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; } #endif /* CONFIG_BROADCAST_TLB_FLUSH */ -#ifdef CONFIG_PARAVIRT -#include -#endif - #define flush_tlb_mm(mm) \ flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bc184dd38d99..e9aeeeafad17 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -126,7 +126,7 @@ obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o -obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o +obj-$(CONFIG_PARAVIRT) += paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index b37ab1095707..3175d7c134e9 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -229,7 +229,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index a951333c5995..e37728f70322 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d0a2847a4bb0..83f51cab0b1e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index cb3f900c46fc..a3e6936839b1 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 37dc8465e0f5..26ab6f8e36df 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -841,8 +842,10 @@ static void __init kvm_guest_init(void) has_steal_clock = 1; static_call_update(pv_steal_clock, kvm_steal_clock); - pv_ops.lock.vcpu_is_preempted = +#ifdef CONFIG_PARAVIRT_SPINLOCKS + pv_ops_lock.vcpu_is_preempted = PV_CALLEE_SAVE(__kvm_vcpu_is_preempted); +#endif } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) @@ -1138,11 +1141,11 @@ void __init kvm_spinlock_init(void) pr_info("PV spinlocks enabled\n"); __pv_init_lock_hash(); - pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; - pv_ops.lock.queued_spin_unlock = + pv_ops_lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_ops_lock.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); - pv_ops.lock.wait = kvm_wait; - pv_ops.lock.kick = kvm_kick_cpu; + pv_ops_lock.wait = kvm_wait; + pv_ops_lock.kick = kvm_kick_cpu; /* * When PV spinlock is enabled which is preferred over diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index ca0a49eeac4a..b5991d53fc0e 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -19,6 +19,7 @@ #include #include +#include #include #include diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 9e1ea99ad9df..95452444868f 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -3,12 +3,22 @@ * Split spinlock implementation out into its own file, so it can be * compiled in a FTRACE-compatible way. */ +#include #include #include #include -#include +DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key); +#ifdef CONFIG_SMP +void __init native_pv_lock_init(void) +{ + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + static_branch_enable(&virt_spin_lock_key); +} +#endif + +#ifdef CONFIG_PARAVIRT_SPINLOCKS __visible void __native_queued_spin_unlock(struct qspinlock *lock) { native_queued_spin_unlock(lock); @@ -17,7 +27,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock); bool pv_is_native_spin_unlock(void) { - return pv_ops.lock.queued_spin_unlock.func == + return pv_ops_lock.queued_spin_unlock.func == __raw_callee_save___native_queued_spin_unlock; } @@ -29,7 +39,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__native_vcpu_is_preempted); bool pv_is_native_vcpu_is_preempted(void) { - return pv_ops.lock.vcpu_is_preempted.func == + return pv_ops_lock.vcpu_is_preempted.func == __raw_callee_save___native_vcpu_is_preempted; } @@ -41,3 +51,13 @@ void __init paravirt_set_cap(void) if (!pv_is_native_vcpu_is_preempted()) setup_force_cpu_cap(X86_FEATURE_VCPUPREEMPT); } + +struct pv_lock_ops pv_ops_lock = { + .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, + .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock), + .wait = paravirt_nop, + .kick = paravirt_nop, + .vcpu_is_preempted = PV_CALLEE_SAVE(__native_vcpu_is_preempted), +}; +EXPORT_SYMBOL(pv_ops_lock); +#endif diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index ab3e172dcc69..a6ed52cae003 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -45,6 +45,11 @@ void __init default_banner(void) } #ifdef CONFIG_PARAVIRT_XXL +unsigned long pv_native_save_fl(void); +void pv_native_irq_disable(void); +void pv_native_irq_enable(void); +unsigned long pv_native_read_cr2(void); + DEFINE_ASM_FUNC(_paravirt_ident_64, "mov %rdi, %rax", .text); DEFINE_ASM_FUNC(pv_native_save_fl, "pushf; pop %rax", .noinstr.text); DEFINE_ASM_FUNC(pv_native_irq_disable, "cli", .noinstr.text); @@ -52,30 +57,6 @@ DEFINE_ASM_FUNC(pv_native_irq_enable, "sti", .noinstr.text); DEFINE_ASM_FUNC(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text); #endif -DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key); - -void __init native_pv_lock_init(void) -{ - if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) - static_branch_enable(&virt_spin_lock_key); -} - -struct static_key paravirt_steal_enabled; -struct static_key paravirt_steal_rq_enabled; - -static u64 native_steal_clock(int cpu) -{ - return 0; -} - -DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); -DEFINE_STATIC_CALL(pv_sched_clock, native_sched_clock); - -void paravirt_set_sched_clock(u64 (*func)(void)) -{ - static_call_update(pv_sched_clock, func); -} - static noinstr void pv_native_safe_halt(void) { native_safe_halt(); @@ -232,19 +213,6 @@ struct paravirt_patch_template pv_ops = { .mmu.set_fixmap = native_set_fixmap, #endif /* CONFIG_PARAVIRT_XXL */ - -#if defined(CONFIG_PARAVIRT_SPINLOCKS) - /* Lock ops. */ -#ifdef CONFIG_SMP - .lock.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, - .lock.queued_spin_unlock = - PV_CALLEE_SAVE(__native_queued_spin_unlock), - .lock.wait = paravirt_nop, - .lock.kick = paravirt_nop, - .lock.vcpu_is_preempted = - PV_CALLEE_SAVE(__native_vcpu_is_preempted), -#endif /* SMP */ -#endif }; #ifdef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7be44b5198cf..d9aa694e43f3 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -267,19 +267,27 @@ u64 native_sched_clock_from_tsc(u64 tsc) /* We need to define a real function for sched_clock, to override the weak default version */ #ifdef CONFIG_PARAVIRT +DEFINE_STATIC_CALL(pv_sched_clock, native_sched_clock); + noinstr u64 sched_clock_noinstr(void) { - return paravirt_sched_clock(); + return static_call(pv_sched_clock)(); } bool using_native_sched_clock(void) { return static_call_query(pv_sched_clock) == native_sched_clock; } + +void paravirt_set_sched_clock(u64 (*func)(void)) +{ + static_call_update(pv_sched_clock, func); +} #else u64 sched_clock_noinstr(void) __attribute__((alias("native_sched_clock"))); bool using_native_sched_clock(void) { return true; } +void paravirt_set_sched_clock(u64 (*func)(void)) { } #endif notrace u64 sched_clock(void) diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 73511332bb67..25625e3fc183 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #define TOPOLOGY_REGISTER_OFFSET 0x10 diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c index 824664c0ecbd..7d3edd6deb6b 100644 --- a/arch/x86/lib/cache-smp.c +++ b/arch/x86/lib/cache-smp.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include #include #include #include diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 8bf6ad4b9400..76537d40493c 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -27,7 +27,6 @@ #include #include #include -#include #include /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index b74ff8bc7f2a..8a19a88190ee 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1212,54 +1212,6 @@ static const struct pv_info xen_info __initconst = { .name = "Xen", }; -static const typeof(pv_ops) xen_cpu_ops __initconst = { - .cpu = { - .cpuid = xen_cpuid, - - .set_debugreg = xen_set_debugreg, - .get_debugreg = xen_get_debugreg, - - .read_cr0 = xen_read_cr0, - .write_cr0 = xen_write_cr0, - - .write_cr4 = xen_write_cr4, - - .read_msr = xen_read_msr, - .write_msr = xen_write_msr, - - .read_msr_safe = xen_read_msr_safe, - .write_msr_safe = xen_write_msr_safe, - - .read_pmc = xen_read_pmc, - - .load_tr_desc = paravirt_nop, - .set_ldt = xen_set_ldt, - .load_gdt = xen_load_gdt, - .load_idt = xen_load_idt, - .load_tls = xen_load_tls, - .load_gs_index = xen_load_gs_index, - - .alloc_ldt = xen_alloc_ldt, - .free_ldt = xen_free_ldt, - - .store_tr = xen_store_tr, - - .write_ldt_entry = xen_write_ldt_entry, - .write_gdt_entry = xen_write_gdt_entry, - .write_idt_entry = xen_write_idt_entry, - .load_sp0 = xen_load_sp0, - -#ifdef CONFIG_X86_IOPL_IOPERM - .invalidate_io_bitmap = xen_invalidate_io_bitmap, - .update_io_bitmap = xen_update_io_bitmap, -#endif - .io_delay = xen_io_delay, - - .start_context_switch = xen_start_context_switch, - .end_context_switch = xen_end_context_switch, - }, -}; - static void xen_restart(char *msg) { xen_reboot(SHUTDOWN_reboot); @@ -1411,7 +1363,39 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si) /* Install Xen paravirt ops */ pv_info = xen_info; - pv_ops.cpu = xen_cpu_ops.cpu; + + pv_ops.cpu.cpuid = xen_cpuid; + pv_ops.cpu.set_debugreg = xen_set_debugreg; + pv_ops.cpu.get_debugreg = xen_get_debugreg; + pv_ops.cpu.read_cr0 = xen_read_cr0; + pv_ops.cpu.write_cr0 = xen_write_cr0; + pv_ops.cpu.write_cr4 = xen_write_cr4; + pv_ops.cpu.read_msr = xen_read_msr; + pv_ops.cpu.write_msr = xen_write_msr; + pv_ops.cpu.read_msr_safe = xen_read_msr_safe; + pv_ops.cpu.write_msr_safe = xen_write_msr_safe; + pv_ops.cpu.read_pmc = xen_read_pmc; + pv_ops.cpu.load_tr_desc = paravirt_nop; + pv_ops.cpu.set_ldt = xen_set_ldt; + pv_ops.cpu.load_gdt = xen_load_gdt; + pv_ops.cpu.load_idt = xen_load_idt; + pv_ops.cpu.load_tls = xen_load_tls; + pv_ops.cpu.load_gs_index = xen_load_gs_index; + pv_ops.cpu.alloc_ldt = xen_alloc_ldt; + pv_ops.cpu.free_ldt = xen_free_ldt; + pv_ops.cpu.store_tr = xen_store_tr; + pv_ops.cpu.write_ldt_entry = xen_write_ldt_entry; + pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry; + pv_ops.cpu.write_idt_entry = xen_write_idt_entry; + pv_ops.cpu.load_sp0 = xen_load_sp0; +#ifdef CONFIG_X86_IOPL_IOPERM + pv_ops.cpu.invalidate_io_bitmap = xen_invalidate_io_bitmap; + pv_ops.cpu.update_io_bitmap = xen_update_io_bitmap; +#endif + pv_ops.cpu.io_delay = xen_io_delay; + pv_ops.cpu.start_context_switch = xen_start_context_switch; + pv_ops.cpu.end_context_switch = xen_end_context_switch; + xen_init_irq_ops(); /* diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 39982f955cfe..d8678c3d3971 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -40,20 +40,14 @@ static void xen_halt(void) xen_safe_halt(); } -static const typeof(pv_ops) xen_irq_ops __initconst = { - .irq = { - /* Initial interrupt flag handling only called while interrupts off. */ - .save_fl = __PV_IS_CALLEE_SAVE(paravirt_ret0), - .irq_disable = __PV_IS_CALLEE_SAVE(paravirt_nop), - .irq_enable = __PV_IS_CALLEE_SAVE(BUG_func), - - .safe_halt = xen_safe_halt, - .halt = xen_halt, - }, -}; - void __init xen_init_irq_ops(void) { - pv_ops.irq = xen_irq_ops.irq; + /* Initial interrupt flag handling only called while interrupts off. */ + pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(paravirt_ret0); + pv_ops.irq.irq_disable = __PV_IS_CALLEE_SAVE(paravirt_nop); + pv_ops.irq.irq_enable = __PV_IS_CALLEE_SAVE(BUG_func); + pv_ops.irq.safe_halt = xen_safe_halt; + pv_ops.irq.halt = xen_halt; + x86_init.irqs.intr_init = xen_init_IRQ; } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 2a4a8deaf612..9fa00c4a8858 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2175,73 +2175,49 @@ static void xen_leave_lazy_mmu(void) preempt_enable(); } -static const typeof(pv_ops) xen_mmu_ops __initconst = { - .mmu = { - .read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2), - .write_cr2 = xen_write_cr2, - - .read_cr3 = xen_read_cr3, - .write_cr3 = xen_write_cr3_init, - - .flush_tlb_user = xen_flush_tlb, - .flush_tlb_kernel = xen_flush_tlb, - .flush_tlb_one_user = xen_flush_tlb_one_user, - .flush_tlb_multi = xen_flush_tlb_multi, - - .pgd_alloc = xen_pgd_alloc, - .pgd_free = xen_pgd_free, - - .alloc_pte = xen_alloc_pte_init, - .release_pte = xen_release_pte_init, - .alloc_pmd = xen_alloc_pmd_init, - .release_pmd = xen_release_pmd_init, - - .set_pte = xen_set_pte_init, - .set_pmd = xen_set_pmd_hyper, - - .ptep_modify_prot_start = xen_ptep_modify_prot_start, - .ptep_modify_prot_commit = xen_ptep_modify_prot_commit, - - .pte_val = PV_CALLEE_SAVE(xen_pte_val), - .pgd_val = PV_CALLEE_SAVE(xen_pgd_val), - - .make_pte = PV_CALLEE_SAVE(xen_make_pte_init), - .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), - - .set_pud = xen_set_pud_hyper, - - .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), - .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), - - .pud_val = PV_CALLEE_SAVE(xen_pud_val), - .make_pud = PV_CALLEE_SAVE(xen_make_pud), - .set_p4d = xen_set_p4d_hyper, - - .alloc_pud = xen_alloc_pmd_init, - .release_pud = xen_release_pmd_init, - - .p4d_val = PV_CALLEE_SAVE(xen_p4d_val), - .make_p4d = PV_CALLEE_SAVE(xen_make_p4d), - - .enter_mmap = xen_enter_mmap, - .exit_mmap = xen_exit_mmap, - - .lazy_mode = { - .enter = xen_enter_lazy_mmu, - .leave = xen_leave_lazy_mmu, - .flush = xen_flush_lazy_mmu, - }, - - .set_fixmap = xen_set_fixmap, - }, -}; - void __init xen_init_mmu_ops(void) { x86_init.paging.pagetable_init = xen_pagetable_init; x86_init.hyper.init_after_bootmem = xen_after_bootmem; - pv_ops.mmu = xen_mmu_ops.mmu; + pv_ops.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2); + pv_ops.mmu.write_cr2 = xen_write_cr2; + pv_ops.mmu.read_cr3 = xen_read_cr3; + pv_ops.mmu.write_cr3 = xen_write_cr3_init; + pv_ops.mmu.flush_tlb_user = xen_flush_tlb; + pv_ops.mmu.flush_tlb_kernel = xen_flush_tlb; + pv_ops.mmu.flush_tlb_one_user = xen_flush_tlb_one_user; + pv_ops.mmu.flush_tlb_multi = xen_flush_tlb_multi; + pv_ops.mmu.pgd_alloc = xen_pgd_alloc; + pv_ops.mmu.pgd_free = xen_pgd_free; + pv_ops.mmu.alloc_pte = xen_alloc_pte_init; + pv_ops.mmu.release_pte = xen_release_pte_init; + pv_ops.mmu.alloc_pmd = xen_alloc_pmd_init; + pv_ops.mmu.release_pmd = xen_release_pmd_init; + pv_ops.mmu.set_pte = xen_set_pte_init; + pv_ops.mmu.set_pmd = xen_set_pmd_hyper; + pv_ops.mmu.ptep_modify_prot_start = xen_ptep_modify_prot_start; + pv_ops.mmu.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; + pv_ops.mmu.pte_val = PV_CALLEE_SAVE(xen_pte_val); + pv_ops.mmu.pgd_val = PV_CALLEE_SAVE(xen_pgd_val); + pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte_init); + pv_ops.mmu.make_pgd = PV_CALLEE_SAVE(xen_make_pgd); + pv_ops.mmu.set_pud = xen_set_pud_hyper; + pv_ops.mmu.make_pmd = PV_CALLEE_SAVE(xen_make_pmd); + pv_ops.mmu.pmd_val = PV_CALLEE_SAVE(xen_pmd_val); + pv_ops.mmu.pud_val = PV_CALLEE_SAVE(xen_pud_val); + pv_ops.mmu.make_pud = PV_CALLEE_SAVE(xen_make_pud); + pv_ops.mmu.set_p4d = xen_set_p4d_hyper; + pv_ops.mmu.alloc_pud = xen_alloc_pmd_init; + pv_ops.mmu.release_pud = xen_release_pmd_init; + pv_ops.mmu.p4d_val = PV_CALLEE_SAVE(xen_p4d_val); + pv_ops.mmu.make_p4d = PV_CALLEE_SAVE(xen_make_p4d); + pv_ops.mmu.enter_mmap = xen_enter_mmap; + pv_ops.mmu.exit_mmap = xen_exit_mmap; + pv_ops.mmu.lazy_mode.enter = xen_enter_lazy_mmu; + pv_ops.mmu.lazy_mode.leave = xen_leave_lazy_mmu; + pv_ops.mmu.lazy_mode.flush = xen_flush_lazy_mmu; + pv_ops.mmu.set_fixmap = xen_set_fixmap; memset(dummy_mapping, 0xff, PAGE_SIZE); } diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 8e4efe0fb6f9..83ac24ead289 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -8,7 +8,6 @@ #include #include -#include #include #include @@ -135,10 +134,10 @@ void __init xen_init_spinlocks(void) printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); __pv_init_lock_hash(); - pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; - pv_ops.lock.queued_spin_unlock = + pv_ops_lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_ops_lock.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); - pv_ops.lock.wait = xen_qlock_wait; - pv_ops.lock.kick = xen_qlock_kick; - pv_ops.lock.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen); + pv_ops_lock.wait = xen_qlock_wait; + pv_ops_lock.kick = xen_qlock_kick; + pv_ops_lock.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen); } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 96521b1874ac..6f9f665bb7ae 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -16,8 +16,10 @@ #include #include #include +#include #include +#include #include #include #include diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 10356d4ec55c..e9f5034a1bc8 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -535,6 +535,8 @@ static __always_inline void hv_setup_sched_clock(void *sched_clock) sched_clock_register(sched_clock, 64, NSEC_PER_SEC); } #elif defined CONFIG_PARAVIRT +#include + static __always_inline void hv_setup_sched_clock(void *sched_clock) { /* We're on x86/x64 *and* using PV ops */ diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 5683383d2305..0b18d8a5a2dd 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -8,8 +8,8 @@ #include #include #include +#include -#include #include #include diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 5f8fd5b24a2e..e90efaf6d26e 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -2,6 +2,7 @@ #ifndef _LINUX_SCHED_CPUTIME_H #define _LINUX_SCHED_CPUTIME_H +#include #include /* @@ -180,4 +181,21 @@ static inline void prev_cputime_init(struct prev_cputime *prev) extern unsigned long long task_sched_runtime(struct task_struct *task); +#ifdef CONFIG_PARAVIRT +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +#ifdef CONFIG_HAVE_PV_STEAL_CLOCK_GEN +u64 dummy_steal_clock(int cpu); + +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); + +static inline u64 paravirt_steal_clock(int cpu) +{ + return static_call(pv_steal_clock)(cpu); +} +#endif +#endif + #endif /* _LINUX_SCHED_CPUTIME_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 23406f037dde..7c8b769c0d0d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -778,6 +778,11 @@ struct rq *_task_rq_lock(struct task_struct *p, struct rq_flags *rf) * RQ-clock updating methods: */ +/* Use CONFIG_PARAVIRT as this will avoid more #ifdef in arch code. */ +#ifdef CONFIG_PARAVIRT +struct static_key paravirt_steal_rq_enabled; +#endif + static void update_rq_clock_task(struct rq *rq, s64 delta) { /* diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index ff0dfca95420..fbf31db0d2f3 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -252,6 +252,19 @@ void __account_forceidle_time(struct task_struct *p, u64 delta) * ticks are not redelivered later. Due to that, this function may on * occasion account more time than the calling functions think elapsed. */ +#ifdef CONFIG_PARAVIRT +struct static_key paravirt_steal_enabled; + +#ifdef CONFIG_HAVE_PV_STEAL_CLOCK_GEN +static u64 native_steal_clock(int cpu) +{ + return 0; +} + +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); +#endif +#endif + static __always_inline u64 steal_account_process_time(u64 maxtime) { #ifdef CONFIG_PARAVIRT diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 62f9278b1663..e51bfa3586fa 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -86,9 +86,8 @@ struct rt_rq; struct sched_group; struct cpuidle_state; -#ifdef CONFIG_PARAVIRT +#if defined(CONFIG_PARAVIRT) && !defined(CONFIG_HAVE_PV_STEAL_CLOCK_GEN) # include -# include #endif #include diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index f4af82508228..73bfea220d1b 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -711,10 +711,14 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec immr = find_reloc_by_dest(elf, (void *)sec, offset+3); disp = find_reloc_by_dest(elf, (void *)sec, offset+7); - if (!immr || strcmp(immr->sym->name, "pv_ops")) + if (!immr || strncmp(immr->sym->name, "pv_ops", 6)) break; - idx = (reloc_addend(immr) + 8) / sizeof(void *); + idx = pv_ops_idx_off(immr->sym->name); + if (idx < 0) + break; + + idx += (reloc_addend(immr) + 8) / sizeof(void *); func = disp->sym; if (disp->sym->type == STT_SECTION) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3fd98c5b6e1a..37f87c4a0134 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -521,21 +521,58 @@ static int decode_instructions(struct objtool_file *file) } /* - * Read the pv_ops[] .data table to find the static initialized values. + * Known pv_ops*[] arrays. */ -static int add_pv_ops(struct objtool_file *file, const char *symname) +static struct { + const char *name; + int idx_off; +} pv_ops_tables[] = { + { .name = "pv_ops", }, + { .name = "pv_ops_lock", }, + { .name = NULL, .idx_off = -1 } +}; + +/* + * Get index offset for a pv_ops* array. + */ +int pv_ops_idx_off(const char *symname) +{ + int idx; + + for (idx = 0; pv_ops_tables[idx].name; idx++) { + if (!strcmp(symname, pv_ops_tables[idx].name)) + break; + } + + return pv_ops_tables[idx].idx_off; +} + +/* + * Read a pv_ops*[] .data table to find the static initialized values. + */ +static int add_pv_ops(struct objtool_file *file, int pv_ops_idx) { struct symbol *sym, *func; unsigned long off, end; struct reloc *reloc; - int idx; + int idx, idx_off; + const char *symname; + symname = pv_ops_tables[pv_ops_idx].name; sym = find_symbol_by_name(file->elf, symname); - if (!sym) - return 0; + if (!sym) { + ERROR("Unknown pv_ops array %s", symname); + return -1; + } off = sym->offset; end = off + sym->len; + idx_off = pv_ops_tables[pv_ops_idx].idx_off; + if (idx_off < 0) { + ERROR("pv_ops array %s has unknown index offset", symname); + return -1; + } + for (;;) { reloc = find_reloc_by_dest_range(file->elf, sym->sec, off, end - off); if (!reloc) @@ -553,7 +590,7 @@ static int add_pv_ops(struct objtool_file *file, const char *symname) return -1; } - if (objtool_pv_add(file, idx, func)) + if (objtool_pv_add(file, idx + idx_off, func)) return -1; off = reloc_offset(reloc) + 1; @@ -569,14 +606,6 @@ static int add_pv_ops(struct objtool_file *file, const char *symname) */ static int init_pv_ops(struct objtool_file *file) { - static const char *pv_ops_tables[] = { - "pv_ops", - "xen_cpu_ops", - "xen_irq_ops", - "xen_mmu_ops", - NULL, - }; - const char *pv_ops; struct symbol *sym; int idx, nr; @@ -585,11 +614,20 @@ static int init_pv_ops(struct objtool_file *file) file->pv_ops = NULL; - sym = find_symbol_by_name(file->elf, "pv_ops"); - if (!sym) + nr = 0; + for (idx = 0; pv_ops_tables[idx].name; idx++) { + sym = find_symbol_by_name(file->elf, pv_ops_tables[idx].name); + if (!sym) { + pv_ops_tables[idx].idx_off = -1; + continue; + } + pv_ops_tables[idx].idx_off = nr; + nr += sym->len / sizeof(unsigned long); + } + + if (nr == 0) return 0; - nr = sym->len / sizeof(unsigned long); file->pv_ops = calloc(nr, sizeof(struct pv_state)); if (!file->pv_ops) { ERROR_GLIBC("calloc"); @@ -599,8 +637,10 @@ static int init_pv_ops(struct objtool_file *file) for (idx = 0; idx < nr; idx++) INIT_LIST_HEAD(&file->pv_ops[idx].targets); - for (idx = 0; (pv_ops = pv_ops_tables[idx]); idx++) { - if (add_pv_ops(file, pv_ops)) + for (idx = 0; pv_ops_tables[idx].name; idx++) { + if (pv_ops_tables[idx].idx_off < 0) + continue; + if (add_pv_ops(file, idx)) return -1; } diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index 2e1346ad5e92..5f2f77bd9b41 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -159,5 +159,6 @@ const char *objtool_disas_insn(struct instruction *insn); extern size_t sym_name_max_len; extern struct disas_context *objtool_disas_ctx; +int pv_ops_idx_off(const char *symname); #endif /* _CHECK_H */