mmu.rs 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. use crate::{
  2. arch::kvm::vmx::ept::EptMapper,
  3. kdebug,
  4. libs::mutex::Mutex,
  5. mm::{page::PageFlags, syscall::ProtFlags},
  6. syscall::SystemError,
  7. virt::kvm::host_mem::{__gfn_to_pfn, kvm_vcpu_gfn_to_memslot, PAGE_MASK, PAGE_SHIFT},
  8. };
  9. use bitfield_struct::bitfield;
  10. use super::{
  11. ept::check_ept_features,
  12. vcpu::VmxVcpu,
  13. vmcs::VmcsFields,
  14. vmx_asm_wrapper::{vmx_vmread, vmx_vmwrite},
  15. };
  16. use crate::arch::kvm::vmx::mmu::VmcsFields::CTRL_EPTP_PTR;
  17. // pub const PT64_ROOT_LEVEL: u32 = 4;
  18. // pub const PT32_ROOT_LEVEL: u32 = 2;
  19. // pub const PT32E_ROOT_LEVEL: u32 = 3;
  20. // pub struct KvmMmuPage{
  21. // gfn: u64, // 管理地址范围的起始地址对应的 gfn
  22. // role: KvmMmuPageRole, // 基本信息,包括硬件特性和所属层级等
  23. // // spt: *mut u64, // spt: shadow page table,指向 struct page 的地址,其包含了所有页表项 (pte)。同时 page->private 会指向该 kvm_mmu_page
  24. // }
  25. #[bitfield(u32)]
  26. pub struct KvmMmuPageRole {
  27. #[bits(4)]
  28. level: usize, // 页所处的层级
  29. cr4_pae: bool, // cr4.pae,1 表示使用 64bit gpte
  30. #[bits(2)]
  31. quadrant: usize, // 如果 cr4.pae=0,则 gpte 为 32bit,但 spte 为 64bit,因此需要用多个 spte 来表示一个 gpte,该字段指示是 gpte 的第几块
  32. direct: bool,
  33. #[bits(3)]
  34. access: usize, // 访问权限
  35. invalid: bool, // 失效,一旦 unpin 就会被销毁
  36. nxe: bool, // efer.nxe,不可执行
  37. cr0_wp: bool, // cr0.wp, 写保护
  38. smep_andnot_wp: bool, // smep && !cr0.wp,SMEP启用,用户模式代码将无法执行位于内核地址空间中的指令。
  39. smap_andnot_wp: bool, // smap && !cr0.wp
  40. #[bits(8)]
  41. unused: usize,
  42. #[bits(8)]
  43. smm: usize, // 1 表示处于 system management mode, 0 表示非 SMM
  44. }
  45. // We don't want allocation failures within the mmu code, so we preallocate
  46. // enough memory for a single page fault in a cache.
  47. // pub struct KvmMmuMemoryCache {
  48. // num_objs: u32,
  49. // objs: [*mut u8; KVM_NR_MEM_OBJS as usize],
  50. // }
  51. #[derive(Default)]
  52. pub struct KvmMmu {
  53. pub root_hpa: u64,
  54. pub root_level: u32,
  55. pub base_role: KvmMmuPageRole,
  56. // ...还有一些变量不知道用来做什么
  57. pub get_cr3: Option<fn(&VmxVcpu) -> u64>,
  58. pub set_eptp: Option<fn(u64) -> Result<(), SystemError>>,
  59. pub page_fault: Option<
  60. fn(
  61. vcpu: &mut VmxVcpu,
  62. gpa: u64,
  63. error_code: u32,
  64. prefault: bool,
  65. ) -> Result<(), SystemError>,
  66. >,
  67. // get_pdptr: Option<fn(& VmxVcpu, index:u32) -> u64>, // Page Directory Pointer Table Register?暂时不知道和CR3的区别是什么
  68. // inject_page_fault: Option<fn(&mut VmxVcpu, fault: &X86Exception)>,
  69. // gva_to_gpa: Option<fn(&mut VmxVcpu, gva: u64, access: u32, exception: &X86Exception) -> u64>,
  70. // translate_gpa: Option<fn(&mut VmxVcpu, gpa: u64, access: u32, exception: &X86Exception) -> u64>,
  71. // sync_page: Option<fn(&mut VmxVcpu, &mut KvmMmuPage)>,
  72. // invlpg: Option<fn(&mut VmxVcpu, gva: u64)>, // invalid entry
  73. // update_pte: Option<fn(&mut VmxVcpu, sp: &KvmMmuPage, spte: u64, pte: u64)>,
  74. }
  75. impl core::fmt::Debug for KvmMmu {
  76. fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
  77. f.debug_struct("KvmMmu")
  78. .field("root_hpa", &self.root_hpa)
  79. .field("root_level", &self.root_level)
  80. .field("base_role", &self.base_role)
  81. .finish()
  82. }
  83. }
  84. fn tdp_get_cr3(_vcpu: &VmxVcpu) -> u64 {
  85. let guest_cr3 = vmx_vmread(VmcsFields::GUEST_CR3 as u32).expect("Failed to read eptp");
  86. return guest_cr3;
  87. }
  88. fn tdp_set_eptp(root_hpa: u64) -> Result<(), SystemError> {
  89. // 设置权限位,目前是写死的,可读可写可执行
  90. // EPT paging-structure memory type: Uncacheable
  91. let mut eptp = 0x0 as u64;
  92. // This value is 1 less than the EPT page-walk length. 3 means 4-level paging.
  93. eptp |= 0x3 << 3;
  94. eptp |= root_hpa & (PAGE_MASK as u64);
  95. vmx_vmwrite(CTRL_EPTP_PTR as u32, eptp)?;
  96. Ok(())
  97. }
  98. fn tdp_page_fault(
  99. vcpu: &mut VmxVcpu,
  100. gpa: u64,
  101. error_code: u32,
  102. prefault: bool,
  103. ) -> Result<(), SystemError> {
  104. kdebug!("tdp_page_fault");
  105. let gfn = gpa >> PAGE_SHIFT; // 物理地址右移12位得到物理页框号(相对于虚拟机而言)
  106. // 分配缓存池,为了避免在运行时分配空间失败,这里提前分配/填充足额的空间
  107. mmu_topup_memory_caches(vcpu)?;
  108. // TODO:获取gfn使用的level,处理hugepage的问题
  109. let level = 1; // 4KB page
  110. // TODO: 快速处理由读写操作引起violation,即present同时有写权限的非mmio page fault
  111. // fast_page_fault(vcpu, gpa, level, error_code)
  112. // gfn->pfn
  113. let mut map_writable = false;
  114. let write = error_code & ((1 as u32) << 1);
  115. let pfn = mmu_gfn_to_pfn_fast(vcpu, gpa, prefault, gfn, write == 0, &mut map_writable)?;
  116. // direct map就是映射ept页表的过程
  117. __direct_map(vcpu, gpa, write, map_writable, level, gfn, pfn, prefault)?;
  118. Ok(())
  119. }
  120. /*
  121. * Caculate mmu pages needed for kvm.
  122. */
  123. // pub fn kvm_mmu_calculate_mmu_pages() -> u32 {
  124. // let mut nr_mmu_pages:u32;
  125. // let mut nr_pages = 0;
  126. // let kvm = vm(0).unwrap();
  127. // for as_id in 0..KVM_ADDRESS_SPACE_NUM {
  128. // let slots = kvm.memslots[as_id];
  129. // for i in 0..KVM_MEM_SLOTS_NUM {
  130. // let memslot = slots.memslots[i as usize];
  131. // nr_pages += memslot.npages;
  132. // }
  133. // }
  134. // nr_mmu_pages = (nr_pages as u32)* KVM_PERMILLE_MMU_PAGES / 1000;
  135. // nr_mmu_pages = nr_mmu_pages.max(KVM_MIN_ALLOC_MMU_PAGES);
  136. // return nr_mmu_pages;
  137. // }
  138. // pub fn kvm_mmu_change_mmu_pages(mut goal_nr_mmu_pages: u32){
  139. // let kvm = KVM();
  140. // // 释放多余的mmu page
  141. // if kvm.lock().arch.n_used_mmu_pages > goal_nr_mmu_pages {
  142. // while kvm.lock().arch.n_used_mmu_pages > goal_nr_mmu_pages {
  143. // if !prepare_zap_oldest_mmu_page() {
  144. // break;
  145. // }
  146. // }
  147. // // kvm_mmu_commit_zap_page();
  148. // goal_nr_mmu_pages = kvm.lock().arch.n_used_mmu_pages;
  149. // }
  150. // kvm.lock().arch.n_max_mmu_pages = goal_nr_mmu_pages;
  151. // }
  152. // pub fn prepare_zap_oldest_mmu_page() -> bool {
  153. // return false;
  154. // }
  155. pub fn kvm_mmu_setup(vcpu: &Mutex<VmxVcpu>) {
  156. // TODO: init_kvm_softmmu(vcpu), init_kvm_nested_mmu(vcpu)
  157. init_kvm_tdp_mmu(vcpu);
  158. }
  159. pub fn kvm_vcpu_mtrr_init(_vcpu: &Mutex<VmxVcpu>) -> Result<(), SystemError> {
  160. check_ept_features()?;
  161. Ok(())
  162. }
  163. pub fn init_kvm_tdp_mmu(vcpu: &Mutex<VmxVcpu>) {
  164. let context = &mut vcpu.lock().mmu;
  165. context.page_fault = Some(tdp_page_fault);
  166. context.get_cr3 = Some(tdp_get_cr3);
  167. context.set_eptp = Some(tdp_set_eptp);
  168. // context.inject_page_fault = kvm_inject_page_fault; TODO: inject_page_fault
  169. // context.invlpg = nonpaging_invlpg;
  170. // context.sync_page = nonpaging_sync_page;
  171. // context.update_pte = nonpaging_update_pte;
  172. // TODO: gva to gpa in kvm
  173. // if !is_paging(vcpu) { // vcpu不分页
  174. // context.gva_to_gpa = nonpaging_gva_to_gpa;
  175. // context.root_level = 0;
  176. // } else if (is_long_mode(vcpu)) {
  177. // context.gva_to_gpa = paging64_gva_to_gpa;
  178. // context.root_level = PT64_ROOT_LEVEL;
  179. // TODO:: different paging strategy
  180. // } else if (is_pae(vcpu)) {
  181. // context.gva_to_gpa = paging64_gva_to_gpa;
  182. // context.root_level = PT32E_ROOT_LEVEL;
  183. // } else {
  184. // context.gva_to_gpa = paging32_gva_to_gpa;
  185. // context.root_level = PT32_ROOT_LEVEL;
  186. // }
  187. }
  188. pub fn __direct_map(
  189. vcpu: &mut VmxVcpu,
  190. gpa: u64,
  191. _write: u32,
  192. _map_writable: bool,
  193. _level: i32,
  194. _gfn: u64,
  195. pfn: u64,
  196. _prefault: bool,
  197. ) -> Result<u32, SystemError> {
  198. kdebug!("gpa={}, pfn={}, root_hpa={:x}", gpa, pfn, vcpu.mmu.root_hpa);
  199. // 判断vcpu.mmu.root_hpa是否有效
  200. if vcpu.mmu.root_hpa == 0 {
  201. return Err(SystemError::KVM_HVA_ERR_BAD);
  202. }
  203. // 把gpa映射到hpa
  204. let mut ept_mapper = EptMapper::lock();
  205. let page_flags = PageFlags::from_prot_flags(ProtFlags::from_bits_truncate(0x7 as u64), false);
  206. unsafe {
  207. assert!(ept_mapper.walk(gpa, pfn << PAGE_SHIFT, page_flags).is_ok());
  208. }
  209. drop(ept_mapper);
  210. return Ok(0);
  211. }
  212. pub fn mmu_gfn_to_pfn_fast(
  213. vcpu: &mut VmxVcpu,
  214. _gpa: u64,
  215. _prefault: bool,
  216. gfn: u64,
  217. write: bool,
  218. writable: &mut bool,
  219. ) -> Result<u64, SystemError> {
  220. let slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
  221. let pfn = __gfn_to_pfn(slot, gfn, false, write, writable)?;
  222. Ok(pfn)
  223. }
  224. // TODO: 添加cache
  225. pub fn mmu_topup_memory_caches(_vcpu: &mut VmxVcpu) -> Result<(), SystemError> {
  226. // 如果 vcpu->arch.mmu_page_header_cache 不足,从 mmu_page_header_cache 中分配
  227. // pte_list_desc_cache 和 mmu_page_header_cache 两块全局 slab cache 在 kvm_mmu_module_init 中被创建
  228. // mmu_topup_memory_cache(vcpu.mmu_page_header_cache,
  229. // mmu_page_header_cache, 4);
  230. Ok(())
  231. }