mmu_internal.rs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. use crate::{arch::vm::mmu::kvm_mmu::PAGE_SHIFT, mm::page::EntryFlags};
  2. use alloc::sync::Arc;
  3. use core::{intrinsics::unlikely, ops::Index};
  4. use log::{debug, warn};
  5. use x86::vmx::vmcs::{guest, host};
  6. use system_error::SystemError;
  7. use crate::{
  8. arch::{
  9. vm::{
  10. asm::VmxAsm,
  11. kvm_host::{EmulType, KVM_PFN_NOSLOT},
  12. mmu::kvm_mmu::{PFRet, PageLevel},
  13. mtrr::kvm_mtrr_check_gfn_range_consistency,
  14. vmx::{ept::EptPageMapper, PageFaultErr},
  15. },
  16. MMArch,
  17. },
  18. mm::PhysAddr,
  19. virt::vm::kvm_host::{
  20. mem::{LockedKvmMemSlot, LockedVmMemSlotSet, UserMemRegionFlag, __gfn_to_pfn_memslot},
  21. search_memslots,
  22. vcpu::VirtCpu,
  23. Vm,
  24. },
  25. };
  26. use super::kvm_mmu::{gfn_round_for_level, is_tdp_mmu_enabled, KvmMmuPageRole};
  27. #[allow(dead_code)]
  28. #[derive(Debug, Default)]
  29. pub struct KvmMmuPage {
  30. pub tdp_mmu_page: bool, // 标记是否为 TDP(Two-Dimensional Paging)页表页
  31. pub gfn: u64, // 客户机帧号(Guest Frame Number)
  32. /*
  33. * The following two entries are used to key the shadow page in the
  34. * hash table.暫時沒看出來
  35. */
  36. pub role: KvmMmuPageRole,
  37. pub spt: u64, // 指向页表条目(SPTE)的指针
  38. pub mmu_seq: u64,
  39. pub map_writable: bool,
  40. pub write_fault_to_shadow_pgtable: bool,
  41. }
  42. #[allow(dead_code)]
  43. #[derive(Debug, Default)]
  44. pub struct KvmPageFault {
  45. // vcpu.do_page_fault 的参数
  46. // addr是guestOS传进来的gpa
  47. addr: PhysAddr,
  48. error_code: u32,
  49. prefetch: bool,
  50. // 从 error_code 派生
  51. exec: bool,
  52. write: bool,
  53. present: bool,
  54. rsvd: bool,
  55. user: bool,
  56. // 从 mmu 和全局状态派生
  57. is_tdp: bool,
  58. nx_huge_page_workaround_enabled: bool,
  59. // 是否可以创建大于 4KB 的映射,或由于 NX 大页被禁止
  60. huge_page_disallowed: bool,
  61. // 此故障可以创建的最大页面大小
  62. max_level: u8,
  63. // 基于 max_level 和主机映射使用的页面大小可以创建的页面大小
  64. req_level: u8,
  65. // 基于 req_level 和 huge_page_disallowed 将创建的页面大小
  66. goal_level: u8,
  67. // 移位后的 addr,或如果 addr 是 gva 则是访客页表遍历的结果
  68. gfn: u64, // gfn_t 通常是一个 64 位地址
  69. // 包含 gfn 的 memslot。可能为 None
  70. slot: Option<Arc<LockedKvmMemSlot>>,
  71. // kvm_faultin_pfn 的输出
  72. mmu_seq: u64,
  73. // kvm_pfn_t 通常是一个 64 位地址,相当于知道了hpa
  74. pfn: u64,
  75. hva: u64, // hva_t 通常是一个 64 位地址
  76. map_writable: bool,
  77. // 表示访客正在尝试写入包含用于翻译写入本身的一个或多个 PTE 的 gfn
  78. write_fault_to_shadow_pgtable: bool,
  79. }
  80. #[allow(dead_code)]
  81. impl KvmPageFault {
  82. pub fn pfn(&self) -> u64 {
  83. self.pfn
  84. }
  85. pub fn gfn(&self) -> u64 {
  86. self.gfn
  87. }
  88. pub fn gpa(&self) -> u64 {
  89. self.addr.data() as u64
  90. }
  91. pub fn hva(&self) -> u64 {
  92. self.hva
  93. }
  94. }
  95. impl VirtCpu {
  96. #[inline(never)]
  97. pub fn page_fault(
  98. &mut self,
  99. vm: &Vm,
  100. cr2_or_gpa: u64,
  101. mut error_code: u64,
  102. _insn: Option<u64>,
  103. _insn_len: usize,
  104. ) -> Result<i32, SystemError> {
  105. let emulation_type = EmulType::PF;
  106. let _direct = self.arch.mmu().root_role.get_direct();
  107. if error_code & PageFaultErr::PFERR_IMPLICIT_ACCESS.bits() != 0 {
  108. warn!("Implicit access error code detected");
  109. error_code &= !PageFaultErr::PFERR_IMPLICIT_ACCESS.bits();
  110. }
  111. //if self.arch.mmu().root.hpa != KvmMmu::INVALID_PAGE {
  112. // return Ok(PFRet::Retry as u64);
  113. //}
  114. let mut r = PFRet::Invalid;
  115. if unlikely(error_code & PageFaultErr::PFERR_RSVD.bits() != 0) {
  116. todo!();
  117. // r = self.handle_mmio_page_fault(cr2_or_gpa, direct)?;
  118. // if r == PFRes::Emulate{
  119. // return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,insn_len) insn_len);
  120. // }
  121. }
  122. if r == PFRet::Invalid {
  123. r = self
  124. .do_page_fault(
  125. vm,
  126. cr2_or_gpa,
  127. (error_code & 0xFFFFFFFF) as u32,
  128. false,
  129. emulation_type,
  130. )?
  131. .into();
  132. if r == PFRet::Invalid {
  133. return Err(SystemError::EIO);
  134. }
  135. }
  136. if i32::from(r.clone()) < 0 {
  137. return Ok(i32::from(r));
  138. }
  139. if r != PFRet::Emulate {
  140. return Ok(1);
  141. }
  142. // 在模拟指令之前,检查错误代码是否由于在翻译客户机页面时的只读(RO)违规。
  143. // 这可能发生在使用嵌套虚拟化和嵌套分页的情况下。如果是这样,我们只需取消页面保护并恢复客户机。
  144. let pferr_nested_guest_page = PageFaultErr::PFERR_GUEST_PAGE
  145. | PageFaultErr::PFERR_WRITE
  146. | PageFaultErr::PFERR_PRESENT;
  147. if self.arch.mmu().root_role.get_direct()
  148. && (error_code & pferr_nested_guest_page.bits()) == pferr_nested_guest_page.bits()
  149. {
  150. todo!()
  151. }
  152. // self.arch.mmu.page_fault 返回 RET_PF_EMULATE,但我们仍然可以乐观地尝试取消页面保护,
  153. // 并让处理器重新执行导致页面故障的指令。不允许重试 MMIO 模拟,因为这不仅毫无意义,
  154. // 而且可能导致进入无限循环,因为处理器会不断在不存在的 MMIO 地址上发生故障。
  155. // 重试来自嵌套客户机的指令也是毫无意义且危险的,因为我们只显式地影子 L1 的页表,
  156. // 即为 L1 取消保护并不会神奇地修复导致 L2 失败的问题。
  157. // if !self.mmio_info_in_cache(cr2_or_gpa, direct) && !self.arch.is_guest_mode() {
  158. // emulation_type |= EmulType::ALLOW_RETRY_PF;
  159. // }
  160. // self.emulate_instruction(cr2_or_gpa, emulation_type, insn, insn_len)
  161. todo!("emulate_instruction")
  162. }
  163. fn do_page_fault(
  164. &mut self,
  165. vm: &Vm,
  166. cr2_or_gpa: u64,
  167. error_code: u32,
  168. prefetch: bool,
  169. mut emultype: EmulType,
  170. ) -> Result<i32, SystemError> {
  171. //初始化page fault
  172. let mut page_fault = KvmPageFault {
  173. addr: PhysAddr::new(cr2_or_gpa as usize),
  174. error_code,
  175. exec: error_code & PageFaultErr::PFERR_FETCH.bits() as u32 != 0,
  176. write: error_code & PageFaultErr::PFERR_WRITE.bits() as u32 != 0,
  177. present: error_code & PageFaultErr::PFERR_PRESENT.bits() as u32 != 0,
  178. rsvd: error_code & PageFaultErr::PFERR_RSVD.bits() as u32 != 0,
  179. user: error_code & PageFaultErr::PFERR_USER.bits() as u32 != 0,
  180. prefetch,
  181. is_tdp: true,
  182. nx_huge_page_workaround_enabled: false, //todo
  183. max_level: PageLevel::Level1G as u8,
  184. req_level: PageLevel::Level4K as u8,
  185. goal_level: PageLevel::Level4K as u8,
  186. ..Default::default()
  187. };
  188. //处理直接映射
  189. if self.arch.mmu().root_role.get_direct() {
  190. page_fault.gfn = (page_fault.addr.data() >> PAGE_SHIFT) as u64;
  191. debug!("page_fault.addr.data() : 0x{:x}", page_fault.addr.data());
  192. debug!("do_page_fault : gfn = 0x{:x}", page_fault.gfn);
  193. page_fault.slot = self.gfn_to_memslot(page_fault.gfn, vm); //kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn);没完成
  194. }
  195. //异步页面错误(Async #PF),也称为预取错误(prefetch faults),
  196. //从客机(guest)的角度来看并不是错误,并且已经在原始错误发生时被计数。
  197. if !prefetch {
  198. self.stat.pf_taken += 1;
  199. }
  200. let r = if page_fault.is_tdp {
  201. self.tdp_page_fault(vm, &mut page_fault).unwrap()
  202. } else {
  203. //目前的处理page_fault的方法只有tdp_page_fault,所以这里是不会执行的
  204. let handle = self.arch.mmu().page_fault.unwrap();
  205. handle(self, &page_fault).unwrap()
  206. };
  207. if page_fault.write_fault_to_shadow_pgtable {
  208. emultype |= EmulType::WRITE_PF_TO_SP;
  209. }
  210. //类似于上面的情况,预取错误并不是真正的虚假错误,并且异步页面错误路径不会进行仿真。
  211. //然而,确实要统计由异步页面错误处理程序修复的错误,否则它们将永远不会被统计。
  212. match PFRet::from(r) {
  213. PFRet::Fixed => self.stat.pf_fixed += 1,
  214. PFRet::Emulate => self.stat.pf_emulate += 1,
  215. PFRet::Spurious => self.stat.pf_spurious += 1,
  216. _ => {}
  217. }
  218. debug!("do_page_fault return r = {}", r);
  219. Ok(r)
  220. }
  221. pub fn gfn_to_memslot(&self, gfn: u64, vm: &Vm) -> Option<Arc<LockedKvmMemSlot>> {
  222. let slot_set: Arc<LockedVmMemSlotSet> = self.kvm_vcpu_memslots(vm);
  223. //...todo
  224. search_memslots(slot_set, gfn)
  225. }
  226. pub fn kvm_vcpu_memslots(&self, vm: &Vm) -> Arc<LockedVmMemSlotSet> {
  227. vm.memslots.index(0).clone()
  228. }
  229. fn tdp_page_fault(
  230. &mut self,
  231. vm: &Vm,
  232. page_fault: &mut KvmPageFault,
  233. ) -> Result<i32, SystemError> {
  234. // 如果 shadow_memtype_mask 为真,并且虚拟机有非一致性 DMA
  235. //if shadow_memtype_mask != 0 && self.kvm().lock().arch.noncoherent_dma_count > 0 {
  236. while page_fault.max_level > PageLevel::Level4K as u8 {
  237. let page_num = PageLevel::kvm_pages_per_hpage(page_fault.max_level);
  238. //低地址对齐
  239. let base = gfn_round_for_level(page_fault.gfn, page_fault.max_level);
  240. //检查给定 GFN 范围内的内存类型是否一致,暂未实现
  241. if kvm_mtrr_check_gfn_range_consistency(self, base, page_num) {
  242. break;
  243. }
  244. page_fault.max_level -= 1;
  245. }
  246. //}
  247. if is_tdp_mmu_enabled() {
  248. return self.kvm_tdp_mmu_page_fault(vm, page_fault);
  249. }
  250. //正常是不会执行到这里的,因为我们的是支持ept的
  251. self.direct_page_fault(page_fault)
  252. }
  253. fn kvm_tdp_mmu_page_fault(
  254. &self,
  255. vm: &Vm,
  256. page_fault: &mut KvmPageFault,
  257. ) -> Result<i32, SystemError> {
  258. //page_fault_handle_page_track(page_fault)
  259. //fast_page_fault(page_fault);
  260. //mmu_topup_memory_caches(false);
  261. let mut r = self
  262. .kvm_faultin_pfn(vm, page_fault, 1 | 1 << 1 | 1 << 2)
  263. .unwrap();
  264. if r != PFRet::Continue {
  265. return Ok(r.into());
  266. }
  267. //r = PFRet::Retry;
  268. //if self.is_page_fault_stale(page_fault) {return;}
  269. //实际的映射
  270. r = self.tdp_mmu_map(page_fault)?.into();
  271. Ok(r.into())
  272. }
  273. //没有实现huge page相关
  274. fn tdp_mmu_map(&self, page_fault: &mut KvmPageFault) -> Result<i32, SystemError> {
  275. // let ret = PFRet::Retry;//下面的逻辑和linux不一致,可能在判断返回值会有问题
  276. let mut mapper = EptPageMapper::lock();
  277. debug!("{:?}", &page_fault);
  278. //flags :rwx
  279. let page_flags: EntryFlags<MMArch> = unsafe { EntryFlags::from_data(0xb77) };
  280. mapper.map(PhysAddr::new(page_fault.gpa() as usize), page_flags);
  281. //debug_eptp();
  282. debug!("The ept_root_addr is {:?}", EptPageMapper::root_page_addr());
  283. //todo: 一些参数的更新
  284. Ok(PFRet::Fixed.into())
  285. //todo!()
  286. }
  287. fn direct_page_fault(&self, _page_fault: &KvmPageFault) -> Result<i32, SystemError> {
  288. todo!()
  289. }
  290. fn kvm_faultin_pfn(
  291. &self,
  292. vm: &Vm,
  293. page_fault: &mut KvmPageFault,
  294. _access: u32,
  295. ) -> Result<PFRet, SystemError> {
  296. page_fault.mmu_seq = vm.mmu_invalidate_seq;
  297. self.__kvm_faultin_pfn(page_fault)
  298. }
  299. fn __kvm_faultin_pfn(&self, page_fault: &mut KvmPageFault) -> Result<PFRet, SystemError> {
  300. let slot = &page_fault.slot;
  301. let mut is_async = false;
  302. if slot.is_none() {
  303. return Err(SystemError::KVM_HVA_ERR_BAD);
  304. }
  305. let slot = slot.as_ref().unwrap().read();
  306. if slot.get_flags().bits() & UserMemRegionFlag::KVM_MEMSLOT_INVALID.bits() != 0 {
  307. return Ok(PFRet::Retry);
  308. }
  309. if !slot.is_visible() {
  310. /* 不要将私有内存槽暴露给 L2。 */
  311. if self.arch.is_guest_mode() {
  312. drop(slot);
  313. page_fault.slot = None;
  314. page_fault.pfn = KVM_PFN_NOSLOT;
  315. page_fault.map_writable = false;
  316. return Ok(PFRet::Continue);
  317. }
  318. }
  319. // 尝试将 GFN 转换为 PFN
  320. let guest_cr3 = VmxAsm::vmx_vmread(guest::CR3);
  321. let host_cr3 = VmxAsm::vmx_vmread(host::CR3);
  322. debug!("guest_cr3={:x}, host_cr3={:x}", guest_cr3, host_cr3);
  323. page_fault.pfn = __gfn_to_pfn_memslot(
  324. Some(&slot),
  325. page_fault.gfn,
  326. (false, &mut is_async),
  327. false,
  328. page_fault.write,
  329. &mut page_fault.map_writable,
  330. &mut page_fault.hva,
  331. )?;
  332. if !is_async {
  333. return Ok(PFRet::Continue); /* *pfn 已经有正确的页面 */
  334. }
  335. // if !page_fault.prefetch && self.kvm_can_do_async_pf() {
  336. // self.trace_kvm_try_async_get_page(page_fault.addr, page_fault.gfn);
  337. // if self.kvm_find_async_pf_gfn(page_fault.gfn) {
  338. // self.trace_kvm_async_pf_repeated_fault(page_fault.addr, page_fault.gfn);
  339. // self.kvm_make_request(KVM_REQ_APF_HALT);
  340. // return Ok(PFRet::Retry);
  341. // } else if self.kvm_arch_setup_async_pf(page_fault.addr, page_fault.gfn) {
  342. // return Ok(PFRet::Retry);
  343. // }
  344. // }
  345. Ok(PFRet::Continue)
  346. }
  347. }