123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463 |
- use core::{fmt::Debug, sync::atomic::AtomicU32};
- use alloc::{boxed::Box, vec::Vec};
- use bit_field::BitField;
- use bitmap::{traits::BitMapOps, AllocBitmap};
- use system_error::SystemError;
- use x86::{
- bits64::rflags::RFlags,
- controlregs::{Cr0, Cr4},
- dtables::DescriptorTablePointer,
- };
- use x86_64::registers::control::EferFlags;
- use crate::{
- smp::cpu::ProcessorId,
- virt::vm::{
- kvm_host::{
- vcpu::VirtCpu, Vm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, KVM_USERSAPCE_IRQ_SOURCE_ID,
- },
- user_api::UapiKvmSegment,
- },
- };
- use crate::arch::VirtCpuArch;
- use super::{
- asm::{MsrData, VcpuSegment, VmxMsrEntry},
- vmx::{exit::ExitFastpathCompletion, vmx_info},
- x86_kvm_manager, x86_kvm_ops,
- };
- pub mod lapic;
- pub mod page;
- pub mod vcpu;
- #[allow(dead_code)]
- pub const TSS_IOPB_BASE_OFFSET: usize = 0x66;
- pub const TSS_BASE_SIZE: usize = 0x68;
- pub const TSS_IOPB_SIZE: usize = 65536 / 8;
- pub const TSS_REDIRECTION_SIZE: usize = 256 / 8;
- pub const RMODE_TSS_SIZE: usize = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1;
- pub const KVM_PFN_NOSLOT: u64 = 0x1 << 63;
- #[allow(dead_code)]
- #[derive(Debug, Default)]
- pub struct X86KvmArch {
- /// 中断芯片模式
- pub irqchip_mode: KvmIrqChipMode,
- /// 负责引导(bootstrap)kvm的vcpu_id
- bsp_vcpu_id: usize,
- pub pause_in_guest: bool,
- pub cstate_in_guest: bool,
- pub mwait_in_guest: bool,
- pub hlt_in_guest: bool,
- pub bus_lock_detection_enabled: bool,
- irq_sources_bitmap: u64,
- default_tsc_khz: u64,
- guest_can_read_msr_platform_info: bool,
- apicv_inhibit_reasons: usize,
- pub max_vcpu_ids: usize,
- pub notify_vmexit_flags: NotifyVmExitFlags,
- pub notify_window: u32,
- msr_fliter: Option<Box<KvmX86MsrFilter>>,
- pub noncoherent_dma_count: AtomicU32,
- pub active_mmu_pages: Vec<u64>,
- pub n_max_mmu_pages: usize,
- pub n_used_mmu_pages: usize,
- }
- impl X86KvmArch {
- pub fn init(kvm_type: usize) -> Result<Self, SystemError> {
- if kvm_type != 0 {
- return Err(SystemError::EINVAL);
- }
- let mut arch = x86_kvm_ops().vm_init();
- // 设置中断源位图
- arch.irq_sources_bitmap
- .set_bit(KVM_USERSAPCE_IRQ_SOURCE_ID, true)
- .set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, true);
- arch.default_tsc_khz = x86_kvm_manager().max_tsc_khz;
- arch.guest_can_read_msr_platform_info = true;
- arch.apicv_init();
- Ok(arch)
- }
- fn apicv_init(&mut self) {
- self.apicv_inhibit_reasons
- .set_bit(KvmApicvInhibit::ABSENT, true);
- if !vmx_info().enable_apicv {
- self.apicv_inhibit_reasons
- .set_bit(KvmApicvInhibit::DISABLE, true);
- }
- }
- pub fn msr_allowed(&self, msr: u32, ftype: MsrFilterType) -> bool {
- // x2APIC MSRs
- if (0x800..=0x8ff).contains(&msr) {
- return true;
- }
- if let Some(msr_filter) = &self.msr_fliter {
- let mut allowed = msr_filter.default_allow;
- for i in 0..msr_filter.count as usize {
- let range = &msr_filter.ranges[i];
- let start = range.base;
- let end = start + range.nmsrs;
- let flags = range.flags;
- let bitmap = &range.bitmap;
- if msr >= start && msr < end && flags.contains(ftype) {
- allowed = bitmap.get((msr - start) as usize).unwrap_or(false);
- break;
- }
- }
- return allowed;
- } else {
- return true;
- }
- }
- }
- #[derive(Debug, Clone, Copy, PartialEq)]
- #[allow(dead_code)]
- pub enum KvmIrqChipMode {
- None,
- Kernel,
- Split,
- }
- impl Default for KvmIrqChipMode {
- fn default() -> Self {
- Self::None
- }
- }
- #[allow(dead_code)]
- pub trait KvmInitFunc {
- fn hardware_setup(&self) -> Result<(), SystemError>;
- fn handle_intel_pt_intr(&self) -> u32;
- fn runtime_funcs(&self) -> &'static dyn KvmFunc;
- }
- pub trait KvmFunc: Send + Sync + Debug {
- /// 返回该硬件支持的名字,例如“Vmx”
- fn name(&self) -> &'static str;
- /// 启用硬件支持
- fn hardware_enable(&self) -> Result<(), SystemError>;
- fn vm_init(&self) -> X86KvmArch;
- fn vcpu_precreate(&self, vm: &mut Vm) -> Result<(), SystemError>;
- fn vcpu_create(&self, vcpu: &mut VirtCpu, vm: &Vm);
- fn vcpu_load(&self, vcpu: &mut VirtCpu, cpu: ProcessorId);
- fn load_mmu_pgd(&self, vcpu: &mut VirtCpu, vm: &Vm, root_hpa: u64, root_level: u32);
- fn cache_reg(&self, vcpu: &mut VirtCpuArch, reg: KvmReg);
- fn apicv_pre_state_restore(&self, vcpu: &mut VirtCpu);
- fn set_msr(&self, vcpu: &mut VirtCpu, msr: MsrData) -> Result<(), SystemError>;
- fn set_rflags(&self, vcpu: &mut VirtCpu, rflags: RFlags);
- fn get_rflags(&self, vcpu: &mut VirtCpu) -> RFlags;
- fn set_cr0(&self, vm: &Vm, vcpu: &mut VirtCpu, cr0: Cr0);
- fn is_vaild_cr0(&self, vcpu: &VirtCpu, cr0: Cr0) -> bool;
- fn set_cr4(&self, vcpu: &mut VirtCpu, cr4: Cr4);
- fn post_set_cr3(&self, vcpu: &VirtCpu, cr3: u64);
- fn is_vaild_cr4(&self, vcpu: &VirtCpu, cr4: Cr4) -> bool;
- fn set_efer(&self, vcpu: &mut VirtCpu, efer: EferFlags);
- fn set_segment(&self, vcpu: &mut VirtCpu, var: &mut UapiKvmSegment, seg: VcpuSegment);
- fn get_segment(
- &self,
- vcpu: &mut VirtCpu,
- var: UapiKvmSegment,
- seg: VcpuSegment,
- ) -> UapiKvmSegment;
- /// 这个函数不会用到VCPU,这里拿到只是为了确保上一层拿到锁
- fn get_idt(&self, _vcpu: &mut VirtCpu, dt: &mut DescriptorTablePointer<u8>);
- fn set_idt(&self, _vcpu: &mut VirtCpu, dt: &DescriptorTablePointer<u8>);
- fn get_gdt(&self, _vcpu: &mut VirtCpu, dt: &mut DescriptorTablePointer<u8>);
- fn set_gdt(&self, _vcpu: &mut VirtCpu, dt: &DescriptorTablePointer<u8>);
- fn update_exception_bitmap(&self, vcpu: &mut VirtCpu);
- fn vcpu_reset(&self, vcpu: &mut VirtCpu, vm: &Vm, init_event: bool);
- fn has_emulated_msr(&self, msr: u32) -> bool;
- fn get_msr_feature(&self, msr: &mut VmxMsrEntry) -> bool;
- fn prepare_switch_to_guest(&self, vcpu: &mut VirtCpu);
- fn flush_tlb_all(&self, vcpu: &mut VirtCpu);
- fn vcpu_run(&self, vcpu: &mut VirtCpu) -> ExitFastpathCompletion;
- fn handle_exit_irqoff(&self, vcpu: &mut VirtCpu);
- fn handle_exit(
- &self,
- vcpu: &mut VirtCpu,
- vm: &Vm,
- fastpath: ExitFastpathCompletion,
- ) -> Result<i32, SystemError>;
- }
- /// ## 中断抑制的原因位
- #[derive(Debug)]
- pub struct KvmApicvInhibit;
- #[allow(dead_code)]
- impl KvmApicvInhibit {
- // Intel与AMD共用
- /// APIC 加速功能被模块参数禁用,或者硬件不支持
- pub const DISABLE: usize = 0;
- /// Hyper-V 客户机正在使用 AutoEOI 功能,导致 APIC 加速被禁用。
- pub const HYPERV: usize = 1;
- /// 因为用户空间尚未启用内核或分裂的中断控制器,导致 APIC 加速被禁用。
- pub const ABSENT: usize = 2;
- /// KVM_GUESTDBG_BLOCKIRQ(一种调试措施,用于阻止该 vCPU 上的所有中断)被启用,以避免 AVIC/APICv 绕过此功能。
- pub const BLOCKIRQ: usize = 3;
- /// 当所有 vCPU 的 APIC ID 和 vCPU 的 1:1 映射被更改且 KVM 未应用其 x2APIC 热插拔修补程序时,APIC 加速被禁用。
- pub const PHYSICAL_ID_ALIASED: usize = 4;
- /// 当 vCPU 的 APIC ID 或 APIC 基址从其复位值更改时,首次禁用 APIC 加速。
- pub const APIC_ID_MODIFIED: usize = 5;
- /// 当 vCPU 的 APIC ID 或 APIC 基址从其复位值更改时,首次禁用 APIC 加速。
- pub const APIC_BASE_MODIFIED: usize = 6;
- // 仅仅对AMD适用
- /// 当 vCPU 运行嵌套客户机时,AVIC 被禁用。因为与 APICv 不同,当 vCPU 运行嵌套时,该 vCPU 的同级无法使用门铃机制通过 AVIC 信号中断。
- pub const NESTED: usize = 7;
- /// 在 SVM 上,等待 IRQ 窗口的实现使用挂起的虚拟中断,而在 KVM 等待 IRQ 窗口时无法注入这些虚拟中断,因此在等待 IRQ 窗口时 AVIC 被禁用。
- pub const IRQWIN: usize = 8;
- /// PIT(i8254)的“重新注入”模式依赖于 EOI 拦截,而 AVIC 不支持边沿触发中断的 EOI 拦截。
- pub const PIT_REINJ: usize = 9;
- /// SEV 不支持 AVIC,因此 AVIC 被禁用。
- pub const SEV: usize = 10;
- /// 当所有带有有效 LDR 的 vCPU 之间的逻辑 ID 和 vCPU 的 1:1 映射被更改时,AVIC 被禁用。
- pub const LOGICAL_ID_ALIASED: usize = 11;
- }
- #[derive(Debug)]
- pub struct KvmX86MsrFilter {
- count: u8,
- default_allow: bool,
- ranges: Vec<KernelMsrRange>,
- }
- #[derive(Debug)]
- pub struct KernelMsrRange {
- pub flags: MsrFilterType,
- pub nmsrs: u32,
- pub base: u32,
- pub bitmap: AllocBitmap,
- }
- #[repr(C)]
- #[allow(dead_code)]
- pub struct PosixMsrFilterRange {
- pub flags: u32,
- pub nmsrs: u32,
- pub base: u32,
- pub bitmap: *const u8,
- }
- bitflags! {
- pub struct MsrFilterType: u8 {
- const KVM_MSR_FILTER_READ = 1 << 0;
- const KVM_MSR_FILTER_WRITE = 1 << 1;
- }
- pub struct NotifyVmExitFlags: u8 {
- const KVM_X86_NOTIFY_VMEXIT_ENABLED = 1 << 0;
- const KVM_X86_NOTIFY_VMEXIT_USER = 1 << 1;
- }
- }
- impl Default for NotifyVmExitFlags {
- fn default() -> Self {
- NotifyVmExitFlags::empty()
- }
- }
- #[derive(Debug, Clone, Copy)]
- pub enum KvmReg {
- VcpuRegsRax = 0,
- VcpuRegsRcx = 1,
- VcpuRegsRdx = 2,
- VcpuRegsRbx = 3,
- VcpuRegsRsp = 4,
- VcpuRegsRbp = 5,
- VcpuRegsRsi = 6,
- VcpuRegsRdi = 7,
- VcpuRegsR8 = 8,
- VcpuRegsR9 = 9,
- VcpuRegsR10 = 10,
- VcpuRegsR11 = 11,
- VcpuRegsR12 = 12,
- VcpuRegsR13 = 13,
- VcpuRegsR14 = 14,
- VcpuRegsR15 = 15,
- VcpuRegsRip = 16,
- NrVcpuRegs = 17,
- //VcpuExregPdptr = NrVcpuRegs,
- VcpuExregCr0,
- VcpuExregCr3,
- VcpuExregCr4,
- VcpuExregRflags,
- VcpuExregSegments,
- VcpuExregExitInfo1, //EXITINFO1 provides the linear address of the memory operand.
- VcpuExregExitInfo2, //EXITINFO2 provides the contents of the register operand.
- }
- bitflags! {
- pub struct HFlags: u8 {
- const HF_GUEST_MASK = 1 << 0; /* VCPU is in guest-mode */
- const HF_SMM_MASK = 1 << 1;
- const HF_SMM_INSIDE_NMI_MASK = 1 << 2;
- }
- }
- /// ### 虚拟机的通用寄存器
- #[derive(Debug, Default, Clone, Copy)]
- #[repr(C)]
- pub struct KvmCommonRegs {
- rax: u64,
- rbx: u64,
- rcx: u64,
- rdx: u64,
- rsi: u64,
- rdi: u64,
- rsp: u64,
- rbp: u64,
- r8: u64,
- r9: u64,
- r10: u64,
- r11: u64,
- r12: u64,
- r13: u64,
- r14: u64,
- r15: u64,
- rip: u64,
- rflags: u64,
- }
- impl Vm {
- pub fn vcpu_precreate(&mut self, id: usize) -> Result<(), SystemError> {
- if self.arch.max_vcpu_ids == 0 {
- self.arch.max_vcpu_ids = 1024 * 4;
- }
- if id >= self.arch.max_vcpu_ids {
- return Err(SystemError::EINVAL);
- }
- return x86_kvm_ops().vcpu_precreate(self);
- }
- }
- bitflags! {
- pub struct EmulType: u32 {
- const NO_DECODE = 1 << 0;
- const TRAP_UD = 1 << 1;
- const SKIP = 1 << 2;
- const ALLOW_RETRY_PF = 1 << 3;
- const TRAP_UD_FORCED = 1 << 4;
- const VMWARE_GP = 1 << 5;
- const PF = 1 << 6;
- const COMPLETE_USER_EXIT = 1 << 7;
- const WRITE_PF_TO_SP = 1 << 8;
- }
- }
- #[allow(dead_code)]
- #[derive(Default, Debug)]
- ///用于跟踪和记录VCPU的各种统计信息。
- pub struct KvmVcpuStat {
- //pub generic: KvmVcpuStatGeneric,
- pub pf_taken: u64,
- pub pf_fixed: u64,
- pub pf_emulate: u64,
- pub pf_spurious: u64,
- pub pf_fast: u64,
- pub pf_mmio_spte_created: u64,
- pub pf_guest: u64,
- pub tlb_flush: u64,
- pub invlpg: u64,
- pub exits: u64,
- pub io_exits: u64,
- pub mmio_exits: u64,
- pub signal_exits: u64,
- pub irq_window_exits: u64,
- pub nmi_window_exits: u64,
- pub l1d_flush: u64,
- pub halt_exits: u64,
- pub request_irq_exits: u64,
- pub irq_exits: u64,
- pub host_state_reload: u64,
- pub fpu_reload: u64,
- pub insn_emulation: u64,
- pub insn_emulation_fail: u64,
- pub hypercalls: u64,
- pub irq_injections: u64,
- pub nmi_injections: u64,
- pub req_event: u64,
- pub nested_run: u64,
- pub directed_yield_attempted: u64,
- pub directed_yield_successful: u64,
- pub preemption_reported: u64,
- pub preemption_other: u64,
- pub guest_mode: u64,
- pub notify_window_exits: u64,
- }
- #[inline]
- /// 将 GFN 转换为 GPA
- pub fn gfn_to_gpa(gfn: u64) -> u64 {
- gfn << 12
- }
- #[allow(dead_code)]
- #[inline]
- /// 将 GPA 转换为 GFN
- pub fn gpa_to_gfn(gfn: u64) -> u64 {
- gfn >> 12
- }
|