2023-08-30 17:31:07 +02:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#include <linux/linkage.h>
|
|
|
|
#include <asm/asm.h>
|
|
|
|
#include <asm/bitsperlong.h>
|
|
|
|
#include <asm/kvm_vcpu_regs.h>
|
|
|
|
#include <asm/nospec-branch.h>
|
|
|
|
#include <asm/percpu.h>
|
|
|
|
#include <asm/segment.h>
|
|
|
|
#include "kvm-asm-offsets.h"
|
|
|
|
#include "run_flags.h"
|
|
|
|
|
|
|
|
#define WORD_SIZE (BITS_PER_LONG / 8)
|
|
|
|
|
|
|
|
#define VCPU_RAX __VCPU_REGS_RAX * WORD_SIZE
|
|
|
|
#define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE
|
|
|
|
#define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE
|
|
|
|
#define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE
|
|
|
|
/* Intentionally omit RSP as it's context switched by hardware */
|
|
|
|
#define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE
|
|
|
|
#define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE
|
|
|
|
#define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
#define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE
|
|
|
|
#define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE
|
|
|
|
#define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE
|
|
|
|
#define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE
|
|
|
|
#define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE
|
|
|
|
#define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE
|
|
|
|
#define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE
|
|
|
|
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
|
|
|
|
#endif
|
|
|
|
|
|
|
|
.macro VMX_DO_EVENT_IRQOFF call_insn call_target
|
|
|
|
/*
|
|
|
|
* Unconditionally create a stack frame, getting the correct RSP on the
|
|
|
|
* stack (for x86-64) would take two instructions anyways, and RBP can
|
|
|
|
* be used to restore RSP to make objtool happy (see below).
|
|
|
|
*/
|
|
|
|
push %_ASM_BP
|
|
|
|
mov %_ASM_SP, %_ASM_BP
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
/*
|
|
|
|
* Align RSP to a 16-byte boundary (to emulate CPU behavior) before
|
|
|
|
* creating the synthetic interrupt stack frame for the IRQ/NMI.
|
|
|
|
*/
|
|
|
|
and $-16, %rsp
|
|
|
|
push $__KERNEL_DS
|
|
|
|
push %rbp
|
|
|
|
#endif
|
|
|
|
pushf
|
|
|
|
push $__KERNEL_CS
|
|
|
|
\call_insn \call_target
|
|
|
|
|
|
|
|
/*
|
|
|
|
* "Restore" RSP from RBP, even though IRET has already unwound RSP to
|
|
|
|
* the correct value. objtool doesn't know the callee will IRET and,
|
|
|
|
* without the explicit restore, thinks the stack is getting walloped.
|
|
|
|
* Using an unwind hint is problematic due to x86-64's dynamic alignment.
|
|
|
|
*/
|
|
|
|
mov %_ASM_BP, %_ASM_SP
|
|
|
|
pop %_ASM_BP
|
|
|
|
RET
|
|
|
|
.endm
|
|
|
|
|
|
|
|
.section .noinstr.text, "ax"
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
|
|
|
|
* @vmx: struct vcpu_vmx *
|
|
|
|
* @regs: unsigned long * (to guest registers)
|
|
|
|
* @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
|
|
|
|
* VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
|
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* 0 on VM-Exit, 1 on VM-Fail
|
|
|
|
*/
|
|
|
|
SYM_FUNC_START(__vmx_vcpu_run)
|
|
|
|
push %_ASM_BP
|
|
|
|
mov %_ASM_SP, %_ASM_BP
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
push %r15
|
|
|
|
push %r14
|
|
|
|
push %r13
|
|
|
|
push %r12
|
|
|
|
#else
|
|
|
|
push %edi
|
|
|
|
push %esi
|
|
|
|
#endif
|
|
|
|
push %_ASM_BX
|
|
|
|
|
|
|
|
/* Save @vmx for SPEC_CTRL handling */
|
|
|
|
push %_ASM_ARG1
|
|
|
|
|
|
|
|
/* Save @flags for SPEC_CTRL handling */
|
|
|
|
push %_ASM_ARG3
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
|
|
|
|
* @regs is needed after VM-Exit to save the guest's register values.
|
|
|
|
*/
|
|
|
|
push %_ASM_ARG2
|
|
|
|
|
|
|
|
/* Copy @flags to EBX, _ASM_ARG3 is volatile. */
|
|
|
|
mov %_ASM_ARG3L, %ebx
|
|
|
|
|
|
|
|
lea (%_ASM_SP), %_ASM_ARG2
|
|
|
|
call vmx_update_host_rsp
|
|
|
|
|
|
|
|
ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
|
|
|
|
* host's, write the MSR.
|
|
|
|
*
|
|
|
|
* IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
|
|
|
|
* there must not be any returns or indirect branches between this code
|
|
|
|
* and vmentry.
|
|
|
|
*/
|
|
|
|
mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI
|
|
|
|
movl VMX_spec_ctrl(%_ASM_DI), %edi
|
|
|
|
movl PER_CPU_VAR(x86_spec_ctrl_current), %esi
|
|
|
|
cmp %edi, %esi
|
|
|
|
je .Lspec_ctrl_done
|
|
|
|
mov $MSR_IA32_SPEC_CTRL, %ecx
|
|
|
|
xor %edx, %edx
|
|
|
|
mov %edi, %eax
|
|
|
|
wrmsr
|
|
|
|
|
|
|
|
.Lspec_ctrl_done:
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Since vmentry is serializing on affected CPUs, there's no need for
|
|
|
|
* an LFENCE to stop speculation from skipping the wrmsr.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Load @regs to RAX. */
|
|
|
|
mov (%_ASM_SP), %_ASM_AX
|
|
|
|
|
|
|
|
/* Check if vmlaunch or vmresume is needed */
|
|
|
|
test $VMX_RUN_VMRESUME, %ebx
|
|
|
|
|
|
|
|
/* Load guest registers. Don't clobber flags. */
|
|
|
|
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
|
|
|
|
mov VCPU_RDX(%_ASM_AX), %_ASM_DX
|
|
|
|
mov VCPU_RBX(%_ASM_AX), %_ASM_BX
|
|
|
|
mov VCPU_RBP(%_ASM_AX), %_ASM_BP
|
|
|
|
mov VCPU_RSI(%_ASM_AX), %_ASM_SI
|
|
|
|
mov VCPU_RDI(%_ASM_AX), %_ASM_DI
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
mov VCPU_R8 (%_ASM_AX), %r8
|
|
|
|
mov VCPU_R9 (%_ASM_AX), %r9
|
|
|
|
mov VCPU_R10(%_ASM_AX), %r10
|
|
|
|
mov VCPU_R11(%_ASM_AX), %r11
|
|
|
|
mov VCPU_R12(%_ASM_AX), %r12
|
|
|
|
mov VCPU_R13(%_ASM_AX), %r13
|
|
|
|
mov VCPU_R14(%_ASM_AX), %r14
|
|
|
|
mov VCPU_R15(%_ASM_AX), %r15
|
|
|
|
#endif
|
|
|
|
/* Load guest RAX. This kills the @regs pointer! */
|
|
|
|
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
|
|
|
|
|
|
|
|
/* Check EFLAGS.ZF from 'test VMX_RUN_VMRESUME' above */
|
|
|
|
jz .Lvmlaunch
|
|
|
|
|
|
|
|
/*
|
|
|
|
* After a successful VMRESUME/VMLAUNCH, control flow "magically"
|
|
|
|
* resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.
|
|
|
|
* So this isn't a typical function and objtool needs to be told to
|
|
|
|
* save the unwind state here and restore it below.
|
|
|
|
*/
|
|
|
|
UNWIND_HINT_SAVE
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
|
|
|
|
* the 'vmx_vmexit' label below.
|
|
|
|
*/
|
|
|
|
.Lvmresume:
|
|
|
|
vmresume
|
|
|
|
jmp .Lvmfail
|
|
|
|
|
|
|
|
.Lvmlaunch:
|
|
|
|
vmlaunch
|
|
|
|
jmp .Lvmfail
|
|
|
|
|
|
|
|
_ASM_EXTABLE(.Lvmresume, .Lfixup)
|
|
|
|
_ASM_EXTABLE(.Lvmlaunch, .Lfixup)
|
|
|
|
|
2023-10-24 12:59:35 +02:00
|
|
|
SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
|
2023-08-30 17:31:07 +02:00
|
|
|
|
|
|
|
/* Restore unwind state from before the VMRESUME/VMLAUNCH. */
|
|
|
|
UNWIND_HINT_RESTORE
|
|
|
|
ENDBR
|
|
|
|
|
|
|
|
/* Temporarily save guest's RAX. */
|
|
|
|
push %_ASM_AX
|
|
|
|
|
|
|
|
/* Reload @regs to RAX. */
|
|
|
|
mov WORD_SIZE(%_ASM_SP), %_ASM_AX
|
|
|
|
|
|
|
|
/* Save all guest registers, including RAX from the stack */
|
|
|
|
pop VCPU_RAX(%_ASM_AX)
|
|
|
|
mov %_ASM_CX, VCPU_RCX(%_ASM_AX)
|
|
|
|
mov %_ASM_DX, VCPU_RDX(%_ASM_AX)
|
|
|
|
mov %_ASM_BX, VCPU_RBX(%_ASM_AX)
|
|
|
|
mov %_ASM_BP, VCPU_RBP(%_ASM_AX)
|
|
|
|
mov %_ASM_SI, VCPU_RSI(%_ASM_AX)
|
|
|
|
mov %_ASM_DI, VCPU_RDI(%_ASM_AX)
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
mov %r8, VCPU_R8 (%_ASM_AX)
|
|
|
|
mov %r9, VCPU_R9 (%_ASM_AX)
|
|
|
|
mov %r10, VCPU_R10(%_ASM_AX)
|
|
|
|
mov %r11, VCPU_R11(%_ASM_AX)
|
|
|
|
mov %r12, VCPU_R12(%_ASM_AX)
|
|
|
|
mov %r13, VCPU_R13(%_ASM_AX)
|
|
|
|
mov %r14, VCPU_R14(%_ASM_AX)
|
|
|
|
mov %r15, VCPU_R15(%_ASM_AX)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
|
|
|
|
xor %ebx, %ebx
|
|
|
|
|
|
|
|
.Lclear_regs:
|
|
|
|
/* Discard @regs. The register is irrelevant, it just can't be RBX. */
|
|
|
|
pop %_ASM_AX
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Clear all general purpose registers except RSP and RBX to prevent
|
|
|
|
* speculative use of the guest's values, even those that are reloaded
|
|
|
|
* via the stack. In theory, an L1 cache miss when restoring registers
|
|
|
|
* could lead to speculative execution with the guest's values.
|
|
|
|
* Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
|
|
|
|
* free. RSP and RBX are exempt as RSP is restored by hardware during
|
|
|
|
* VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
|
|
|
|
* value.
|
|
|
|
*/
|
|
|
|
xor %eax, %eax
|
|
|
|
xor %ecx, %ecx
|
|
|
|
xor %edx, %edx
|
|
|
|
xor %ebp, %ebp
|
|
|
|
xor %esi, %esi
|
|
|
|
xor %edi, %edi
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
xor %r8d, %r8d
|
|
|
|
xor %r9d, %r9d
|
|
|
|
xor %r10d, %r10d
|
|
|
|
xor %r11d, %r11d
|
|
|
|
xor %r12d, %r12d
|
|
|
|
xor %r13d, %r13d
|
|
|
|
xor %r14d, %r14d
|
|
|
|
xor %r15d, %r15d
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
|
|
|
|
* the first unbalanced RET after vmexit!
|
|
|
|
*
|
|
|
|
* For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
|
|
|
|
* entries and (in some cases) RSB underflow.
|
|
|
|
*
|
|
|
|
* eIBRS has its own protection against poisoned RSB, so it doesn't
|
|
|
|
* need the RSB filling sequence. But it does need to be enabled, and a
|
|
|
|
* single call to retire, before the first unbalanced RET.
|
|
|
|
*/
|
|
|
|
|
|
|
|
FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
|
|
|
|
X86_FEATURE_RSB_VMEXIT_LITE
|
|
|
|
|
|
|
|
pop %_ASM_ARG2 /* @flags */
|
|
|
|
pop %_ASM_ARG1 /* @vmx */
|
|
|
|
|
|
|
|
call vmx_spec_ctrl_restore_host
|
|
|
|
|
|
|
|
/* Put return value in AX */
|
|
|
|
mov %_ASM_BX, %_ASM_AX
|
|
|
|
|
|
|
|
pop %_ASM_BX
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
pop %r12
|
|
|
|
pop %r13
|
|
|
|
pop %r14
|
|
|
|
pop %r15
|
|
|
|
#else
|
|
|
|
pop %esi
|
|
|
|
pop %edi
|
|
|
|
#endif
|
|
|
|
pop %_ASM_BP
|
|
|
|
RET
|
|
|
|
|
|
|
|
.Lfixup:
|
|
|
|
cmpb $0, kvm_rebooting
|
|
|
|
jne .Lvmfail
|
|
|
|
ud2
|
|
|
|
.Lvmfail:
|
|
|
|
/* VM-Fail: set return value to 1 */
|
|
|
|
mov $1, %_ASM_BX
|
|
|
|
jmp .Lclear_regs
|
|
|
|
|
|
|
|
SYM_FUNC_END(__vmx_vcpu_run)
|
|
|
|
|
|
|
|
SYM_FUNC_START(vmx_do_nmi_irqoff)
|
|
|
|
VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
|
|
|
|
SYM_FUNC_END(vmx_do_nmi_irqoff)
|
|
|
|
|
|
|
|
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
|
2023-10-24 12:59:35 +02:00
|
|
|
|
2023-08-30 17:31:07 +02:00
|
|
|
/**
|
|
|
|
* vmread_error_trampoline - Trampoline from inline asm to vmread_error()
|
|
|
|
* @field: VMCS field encoding that failed
|
|
|
|
* @fault: %true if the VMREAD faulted, %false if it failed
|
|
|
|
*
|
|
|
|
* Save and restore volatile registers across a call to vmread_error(). Note,
|
|
|
|
* all parameters are passed on the stack.
|
|
|
|
*/
|
|
|
|
SYM_FUNC_START(vmread_error_trampoline)
|
|
|
|
push %_ASM_BP
|
|
|
|
mov %_ASM_SP, %_ASM_BP
|
|
|
|
|
|
|
|
push %_ASM_AX
|
|
|
|
push %_ASM_CX
|
|
|
|
push %_ASM_DX
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
push %rdi
|
|
|
|
push %rsi
|
|
|
|
push %r8
|
|
|
|
push %r9
|
|
|
|
push %r10
|
|
|
|
push %r11
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Load @field and @fault to arg1 and arg2 respectively. */
|
|
|
|
mov 3*WORD_SIZE(%_ASM_BP), %_ASM_ARG2
|
|
|
|
mov 2*WORD_SIZE(%_ASM_BP), %_ASM_ARG1
|
|
|
|
|
2023-10-24 12:59:35 +02:00
|
|
|
call vmread_error_trampoline2
|
2023-08-30 17:31:07 +02:00
|
|
|
|
|
|
|
/* Zero out @fault, which will be popped into the result register. */
|
|
|
|
_ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
pop %r11
|
|
|
|
pop %r10
|
|
|
|
pop %r9
|
|
|
|
pop %r8
|
|
|
|
pop %rsi
|
|
|
|
pop %rdi
|
|
|
|
#endif
|
|
|
|
pop %_ASM_DX
|
|
|
|
pop %_ASM_CX
|
|
|
|
pop %_ASM_AX
|
|
|
|
pop %_ASM_BP
|
|
|
|
|
|
|
|
RET
|
|
|
|
SYM_FUNC_END(vmread_error_trampoline)
|
|
|
|
#endif
|
|
|
|
|
2023-10-24 12:59:35 +02:00
|
|
|
.section .text, "ax"
|
|
|
|
|
2023-08-30 17:31:07 +02:00
|
|
|
SYM_FUNC_START(vmx_do_interrupt_irqoff)
|
|
|
|
VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
|
|
|
|
SYM_FUNC_END(vmx_do_interrupt_irqoff)
|