mod.rs 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002
  1. pub mod clock;
  2. pub mod completion;
  3. pub mod cputime;
  4. pub mod fair;
  5. pub mod idle;
  6. pub mod pelt;
  7. pub mod prio;
  8. pub mod syscall;
  9. use core::{
  10. intrinsics::{likely, unlikely},
  11. sync::atomic::{compiler_fence, fence, AtomicUsize, Ordering},
  12. };
  13. use alloc::{
  14. boxed::Box,
  15. collections::LinkedList,
  16. sync::{Arc, Weak},
  17. vec::Vec,
  18. };
  19. use system_error::SystemError;
  20. use crate::{
  21. arch::{interrupt::ipi::send_ipi, CurrentIrqArch},
  22. exception::{
  23. ipi::{IpiKind, IpiTarget},
  24. InterruptArch,
  25. },
  26. libs::{
  27. lazy_init::Lazy,
  28. spinlock::{SpinLock, SpinLockGuard},
  29. },
  30. mm::percpu::{PerCpu, PerCpuVar},
  31. process::{ProcessControlBlock, ProcessFlags, ProcessManager, ProcessState, SchedInfo},
  32. sched::idle::IdleScheduler,
  33. smp::{core::smp_get_processor_id, cpu::ProcessorId},
  34. time::{clocksource::HZ, timer::clock},
  35. };
  36. use self::{
  37. clock::{ClockUpdataFlag, SchedClock},
  38. cputime::{irq_time_read, CpuTimeFunc, IrqTime},
  39. fair::{CfsRunQueue, CompletelyFairScheduler, FairSchedEntity},
  40. prio::PrioUtil,
  41. };
  42. static mut CPU_IRQ_TIME: Option<Vec<&'static mut IrqTime>> = None;
  43. // 这里虽然rq是percpu的,但是在负载均衡的时候需要修改对端cpu的rq,所以仍需加锁
  44. static CPU_RUNQUEUE: Lazy<PerCpuVar<Arc<CpuRunQueue>>> = PerCpuVar::define_lazy();
  45. /// 用于记录系统中所有 CPU 的可执行进程数量的总和。
  46. static CALCULATE_LOAD_TASKS: AtomicUsize = AtomicUsize::new(0);
  47. const LOAD_FREQ: usize = HZ as usize * 5 + 1;
  48. pub const SCHED_FIXEDPOINT_SHIFT: u64 = 10;
  49. #[allow(dead_code)]
  50. pub const SCHED_FIXEDPOINT_SCALE: u64 = 1 << SCHED_FIXEDPOINT_SHIFT;
  51. #[allow(dead_code)]
  52. pub const SCHED_CAPACITY_SHIFT: u64 = SCHED_FIXEDPOINT_SHIFT;
  53. #[allow(dead_code)]
  54. pub const SCHED_CAPACITY_SCALE: u64 = 1 << SCHED_CAPACITY_SHIFT;
  55. #[inline]
  56. pub fn cpu_irq_time(cpu: ProcessorId) -> &'static mut IrqTime {
  57. unsafe { CPU_IRQ_TIME.as_mut().unwrap()[cpu.data() as usize] }
  58. }
  59. #[inline]
  60. pub fn cpu_rq(cpu: usize) -> Arc<CpuRunQueue> {
  61. CPU_RUNQUEUE.ensure();
  62. unsafe {
  63. CPU_RUNQUEUE
  64. .get()
  65. .force_get(ProcessorId::new(cpu as u32))
  66. .clone()
  67. }
  68. }
  69. lazy_static! {
  70. pub static ref SCHED_FEATURES: SchedFeature = SchedFeature::GENTLE_FAIR_SLEEPERS
  71. | SchedFeature::START_DEBIT
  72. | SchedFeature::LAST_BUDDY
  73. | SchedFeature::CACHE_HOT_BUDDY
  74. | SchedFeature::WAKEUP_PREEMPTION
  75. | SchedFeature::NONTASK_CAPACITY
  76. | SchedFeature::TTWU_QUEUE
  77. | SchedFeature::SIS_UTIL
  78. | SchedFeature::RT_PUSH_IPI
  79. | SchedFeature::ALT_PERIOD
  80. | SchedFeature::BASE_SLICE
  81. | SchedFeature::UTIL_EST
  82. | SchedFeature::UTIL_EST_FASTUP;
  83. }
  84. pub trait Scheduler {
  85. /// ## 加入当任务进入可运行状态时调用。它将调度实体(任务)放到红黑树中,增加nr_running变量的值。
  86. fn enqueue(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, flags: EnqueueFlag);
  87. /// ## 当任务不再可运行时被调用,对应的调度实体被移出红黑树。它减少nr_running变量的值。
  88. fn dequeue(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag);
  89. /// ## 主动让出cpu,这个函数的行为基本上是出队,紧接着入队
  90. fn yield_task(rq: &mut CpuRunQueue);
  91. /// ## 检查进入可运行状态的任务能否抢占当前正在运行的任务
  92. fn check_preempt_currnet(
  93. rq: &mut CpuRunQueue,
  94. pcb: &Arc<ProcessControlBlock>,
  95. flags: WakeupFlags,
  96. );
  97. /// ## 选择接下来最适合运行的任务
  98. #[allow(dead_code)]
  99. fn pick_task(rq: &mut CpuRunQueue) -> Option<Arc<ProcessControlBlock>>;
  100. /// ## 选择接下来最适合运行的任务
  101. fn pick_next_task(
  102. rq: &mut CpuRunQueue,
  103. pcb: Option<Arc<ProcessControlBlock>>,
  104. ) -> Option<Arc<ProcessControlBlock>>;
  105. /// ## 被时间滴答函数调用,它可能导致进程切换。驱动了运行时抢占。
  106. fn tick(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, queued: bool);
  107. /// ## 在进程fork时,如需加入cfs,则调用
  108. fn task_fork(pcb: Arc<ProcessControlBlock>);
  109. fn put_prev_task(rq: &mut CpuRunQueue, prev: Arc<ProcessControlBlock>);
  110. }
  111. /// 调度策略
  112. #[allow(dead_code)]
  113. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
  114. pub enum SchedPolicy {
  115. /// 实时进程
  116. RT,
  117. /// 先进先出调度
  118. FIFO,
  119. /// 完全公平调度
  120. CFS,
  121. /// IDLE
  122. IDLE,
  123. }
  124. #[allow(dead_code)]
  125. pub struct TaskGroup {
  126. /// CFS管理的调度实体,percpu的
  127. entitys: Vec<Arc<FairSchedEntity>>,
  128. /// 每个CPU的CFS运行队列
  129. cfs: Vec<Arc<CfsRunQueue>>,
  130. /// 父节点
  131. parent: Option<Arc<TaskGroup>>,
  132. shares: u64,
  133. }
  134. #[derive(Debug, Default)]
  135. pub struct LoadWeight {
  136. /// 负载权重
  137. pub weight: u64,
  138. /// weight的倒数,方便计算
  139. pub inv_weight: u32,
  140. }
  141. impl LoadWeight {
  142. /// 用于限制权重在一个合适的区域内
  143. pub const SCHED_FIXEDPOINT_SHIFT: u32 = 10;
  144. pub const WMULT_SHIFT: u32 = 32;
  145. pub const WMULT_CONST: u32 = !0;
  146. pub const NICE_0_LOAD_SHIFT: u32 = Self::SCHED_FIXEDPOINT_SHIFT + Self::SCHED_FIXEDPOINT_SHIFT;
  147. pub fn update_load_add(&mut self, inc: u64) {
  148. self.weight += inc;
  149. self.inv_weight = 0;
  150. }
  151. pub fn update_load_sub(&mut self, dec: u64) {
  152. self.weight -= dec;
  153. self.inv_weight = 0;
  154. }
  155. pub fn update_load_set(&mut self, weight: u64) {
  156. self.weight = weight;
  157. self.inv_weight = 0;
  158. }
  159. /// ## 更新负载权重的倒数
  160. pub fn update_inv_weight(&mut self) {
  161. // 已经更新
  162. if likely(self.inv_weight != 0) {
  163. return;
  164. }
  165. let w = Self::scale_load_down(self.weight);
  166. if unlikely(w >= Self::WMULT_CONST as u64) {
  167. // 高位有数据
  168. self.inv_weight = 1;
  169. } else if unlikely(w == 0) {
  170. // 倒数去最大
  171. self.inv_weight = Self::WMULT_CONST;
  172. } else {
  173. // 计算倒数
  174. self.inv_weight = Self::WMULT_CONST / w as u32;
  175. }
  176. }
  177. /// ## 计算任务的执行时间差
  178. ///
  179. /// 计算公式:(delta_exec * (weight * self.inv_weight)) >> WMULT_SHIFT
  180. pub fn calculate_delta(&mut self, delta_exec: u64, weight: u64) -> u64 {
  181. // 降低精度
  182. let mut fact = Self::scale_load_down(weight);
  183. // 记录fact高32位
  184. let mut fact_hi = (fact >> 32) as u32;
  185. // 用于恢复
  186. let mut shift = Self::WMULT_SHIFT;
  187. self.update_inv_weight();
  188. if unlikely(fact_hi != 0) {
  189. // 这里表示高32位还有数据
  190. // 需要计算最高位,然后继续调整fact
  191. let fs = 32 - fact_hi.leading_zeros();
  192. shift -= fs;
  193. // 确保高32位全为0
  194. fact >>= fs;
  195. }
  196. // 这里确定了fact已经在32位内
  197. fact *= self.inv_weight as u64;
  198. fact_hi = (fact >> 32) as u32;
  199. if fact_hi != 0 {
  200. // 这里表示高32位还有数据
  201. // 需要计算最高位,然后继续调整fact
  202. let fs = 32 - fact_hi.leading_zeros();
  203. shift -= fs;
  204. // 确保高32位全为0
  205. fact >>= fs;
  206. }
  207. return ((delta_exec as u128 * fact as u128) >> shift) as u64;
  208. }
  209. /// ## 将负载权重缩小到到一个小的范围中计算,相当于减小精度计算
  210. pub const fn scale_load_down(mut weight: u64) -> u64 {
  211. if weight != 0 {
  212. weight >>= Self::SCHED_FIXEDPOINT_SHIFT;
  213. if weight < 2 {
  214. weight = 2;
  215. }
  216. }
  217. weight
  218. }
  219. #[allow(dead_code)]
  220. pub const fn scale_load(weight: u64) -> u64 {
  221. weight << Self::SCHED_FIXEDPOINT_SHIFT
  222. }
  223. }
  224. pub trait SchedArch {
  225. /// 开启当前核心的调度
  226. fn enable_sched_local();
  227. /// 关闭当前核心的调度
  228. #[allow(dead_code)]
  229. fn disable_sched_local();
  230. /// 在第一次开启调度之前,进行初始化工作。
  231. ///
  232. /// 注意区别于sched_init,这个函数只是做初始化时钟的工作等等。
  233. fn initial_setup_sched_local() {}
  234. }
  235. /// ## PerCpu的运行队列,其中维护了各个调度器对应的rq
  236. #[allow(dead_code)]
  237. #[derive(Debug)]
  238. pub struct CpuRunQueue {
  239. lock: SpinLock<()>,
  240. lock_on_who: AtomicUsize,
  241. cpu: ProcessorId,
  242. clock_task: u64,
  243. clock: u64,
  244. prev_irq_time: u64,
  245. clock_updata_flags: ClockUpdataFlag,
  246. /// 过载
  247. overload: bool,
  248. next_balance: u64,
  249. /// 运行任务数
  250. nr_running: usize,
  251. /// 被阻塞的任务数量
  252. nr_uninterruptible: usize,
  253. /// 记录上次更新负载时间
  254. cala_load_update: usize,
  255. cala_load_active: usize,
  256. /// CFS调度器
  257. cfs: Arc<CfsRunQueue>,
  258. clock_pelt: u64,
  259. lost_idle_time: u64,
  260. clock_idle: u64,
  261. cfs_tasks: LinkedList<Arc<FairSchedEntity>>,
  262. /// 最近一次的调度信息
  263. sched_info: SchedInfo,
  264. /// 当前在运行队列上执行的进程
  265. current: Weak<ProcessControlBlock>,
  266. idle: Weak<ProcessControlBlock>,
  267. }
  268. impl CpuRunQueue {
  269. pub fn new(cpu: ProcessorId) -> Self {
  270. Self {
  271. lock: SpinLock::new(()),
  272. lock_on_who: AtomicUsize::new(usize::MAX),
  273. cpu,
  274. clock_task: 0,
  275. clock: 0,
  276. prev_irq_time: 0,
  277. clock_updata_flags: ClockUpdataFlag::empty(),
  278. overload: false,
  279. next_balance: 0,
  280. nr_running: 0,
  281. nr_uninterruptible: 0,
  282. cala_load_update: (clock() + (5 * HZ + 1)) as usize,
  283. cala_load_active: 0,
  284. cfs: Arc::new(CfsRunQueue::new()),
  285. clock_pelt: 0,
  286. lost_idle_time: 0,
  287. clock_idle: 0,
  288. cfs_tasks: LinkedList::new(),
  289. sched_info: SchedInfo::default(),
  290. current: Weak::new(),
  291. idle: Weak::new(),
  292. }
  293. }
  294. /// 此函数只能在关中断的情况下使用!!!
  295. /// 获取到rq的可变引用,需要注意的是返回的第二个值需要确保其生命周期
  296. /// 所以可以说这个函数是unsafe的,需要确保正确性
  297. /// 在中断上下文,关中断的情况下,此函数是安全的
  298. pub fn self_lock(&self) -> (&mut Self, Option<SpinLockGuard<()>>) {
  299. if self.lock.is_locked()
  300. && smp_get_processor_id().data() as usize == self.lock_on_who.load(Ordering::SeqCst)
  301. {
  302. // 在本cpu已上锁则可以直接拿
  303. (
  304. unsafe {
  305. (self as *const Self as usize as *mut Self)
  306. .as_mut()
  307. .unwrap()
  308. },
  309. None,
  310. )
  311. } else {
  312. // 否则先上锁再拿
  313. let guard = self.lock();
  314. (
  315. unsafe {
  316. (self as *const Self as usize as *mut Self)
  317. .as_mut()
  318. .unwrap()
  319. },
  320. Some(guard),
  321. )
  322. }
  323. }
  324. fn lock(&self) -> SpinLockGuard<()> {
  325. let guard = self.lock.lock_irqsave();
  326. // 更新在哪一个cpu上锁
  327. self.lock_on_who
  328. .store(smp_get_processor_id().data() as usize, Ordering::SeqCst);
  329. guard
  330. }
  331. pub fn enqueue_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: EnqueueFlag) {
  332. if !flags.contains(EnqueueFlag::ENQUEUE_NOCLOCK) {
  333. self.update_rq_clock();
  334. }
  335. if !flags.contains(EnqueueFlag::ENQUEUE_RESTORE) {
  336. let sched_info = pcb.sched_info().sched_stat.upgradeable_read_irqsave();
  337. if sched_info.last_queued == 0 {
  338. sched_info.upgrade().last_queued = self.clock;
  339. }
  340. }
  341. match pcb.sched_info().policy() {
  342. SchedPolicy::CFS => CompletelyFairScheduler::enqueue(self, pcb, flags),
  343. SchedPolicy::FIFO => todo!(),
  344. SchedPolicy::RT => todo!(),
  345. SchedPolicy::IDLE => IdleScheduler::enqueue(self, pcb, flags),
  346. }
  347. // TODO:https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/core.c#239
  348. }
  349. pub fn dequeue_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag) {
  350. // TODO:sched_core
  351. if !flags.contains(DequeueFlag::DEQUEUE_NOCLOCK) {
  352. self.update_rq_clock()
  353. }
  354. if !flags.contains(DequeueFlag::DEQUEUE_SAVE) {
  355. let sched_info = pcb.sched_info().sched_stat.upgradeable_read_irqsave();
  356. if sched_info.last_queued > 0 {
  357. let delta = self.clock - sched_info.last_queued;
  358. let mut sched_info = sched_info.upgrade();
  359. sched_info.last_queued = 0;
  360. sched_info.run_delay += delta as usize;
  361. self.sched_info.run_delay += delta as usize;
  362. }
  363. }
  364. match pcb.sched_info().policy() {
  365. SchedPolicy::CFS => CompletelyFairScheduler::dequeue(self, pcb, flags),
  366. SchedPolicy::FIFO => todo!(),
  367. SchedPolicy::RT => todo!(),
  368. SchedPolicy::IDLE => IdleScheduler::dequeue(self, pcb, flags),
  369. }
  370. }
  371. /// 启用一个任务,将加入队列
  372. pub fn activate_task(&mut self, pcb: &Arc<ProcessControlBlock>, mut flags: EnqueueFlag) {
  373. if *pcb.sched_info().on_rq.lock_irqsave() == OnRq::Migrating {
  374. flags |= EnqueueFlag::ENQUEUE_MIGRATED;
  375. }
  376. if flags.contains(EnqueueFlag::ENQUEUE_MIGRATED) {
  377. todo!()
  378. }
  379. self.enqueue_task(pcb.clone(), flags);
  380. *pcb.sched_info().on_rq.lock_irqsave() = OnRq::Queued;
  381. pcb.sched_info().set_on_cpu(Some(self.cpu));
  382. }
  383. /// 检查对应的task是否可以抢占当前运行的task
  384. #[allow(clippy::comparison_chain)]
  385. pub fn check_preempt_currnet(&mut self, pcb: &Arc<ProcessControlBlock>, flags: WakeupFlags) {
  386. if pcb.sched_info().policy() == self.current().sched_info().policy() {
  387. match self.current().sched_info().policy() {
  388. SchedPolicy::CFS => {
  389. CompletelyFairScheduler::check_preempt_currnet(self, pcb, flags)
  390. }
  391. SchedPolicy::FIFO => todo!(),
  392. SchedPolicy::RT => todo!(),
  393. SchedPolicy::IDLE => IdleScheduler::check_preempt_currnet(self, pcb, flags),
  394. }
  395. } else if pcb.sched_info().policy() < self.current().sched_info().policy() {
  396. // 调度优先级更高
  397. self.resched_current();
  398. }
  399. if *self.current().sched_info().on_rq.lock_irqsave() == OnRq::Queued
  400. && self.current().flags().contains(ProcessFlags::NEED_SCHEDULE)
  401. {
  402. self.clock_updata_flags
  403. .insert(ClockUpdataFlag::RQCF_REQ_SKIP);
  404. }
  405. }
  406. /// 禁用一个任务,将离开队列
  407. pub fn deactivate_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag) {
  408. *pcb.sched_info().on_rq.lock_irqsave() = if flags.contains(DequeueFlag::DEQUEUE_SLEEP) {
  409. OnRq::None
  410. } else {
  411. OnRq::Migrating
  412. };
  413. self.dequeue_task(pcb, flags);
  414. }
  415. #[inline]
  416. pub fn cfs_rq(&self) -> Arc<CfsRunQueue> {
  417. self.cfs.clone()
  418. }
  419. /// 更新rq时钟
  420. pub fn update_rq_clock(&mut self) {
  421. // 需要跳过这次时钟更新
  422. if self
  423. .clock_updata_flags
  424. .contains(ClockUpdataFlag::RQCF_ACT_SKIP)
  425. {
  426. return;
  427. }
  428. let clock = SchedClock::sched_clock_cpu(self.cpu);
  429. if clock < self.clock {
  430. return;
  431. }
  432. let delta = clock - self.clock;
  433. self.clock += delta;
  434. // error!("clock {}", self.clock);
  435. self.update_rq_clock_task(delta);
  436. }
  437. /// 更新任务时钟
  438. pub fn update_rq_clock_task(&mut self, mut delta: u64) {
  439. let mut irq_delta = irq_time_read(self.cpu) - self.prev_irq_time;
  440. // if self.cpu == 0 {
  441. // error!(
  442. // "cpu 0 delta {delta} irq_delta {} irq_time_read(self.cpu) {} self.prev_irq_time {}",
  443. // irq_delta,
  444. // irq_time_read(self.cpu),
  445. // self.prev_irq_time
  446. // );
  447. // }
  448. compiler_fence(Ordering::SeqCst);
  449. if irq_delta > delta {
  450. irq_delta = delta;
  451. }
  452. self.prev_irq_time += irq_delta;
  453. delta -= irq_delta;
  454. // todo: psi?
  455. // send_to_default_serial8250_port(format!("\n{delta}\n",).as_bytes());
  456. compiler_fence(Ordering::SeqCst);
  457. self.clock_task += delta;
  458. compiler_fence(Ordering::SeqCst);
  459. // if self.cpu == 0 {
  460. // error!("cpu {} clock_task {}", self.cpu, self.clock_task);
  461. // }
  462. // todo: pelt?
  463. }
  464. /// 计算当前进程中的可执行数量
  465. fn calculate_load_fold_active(&mut self, adjust: usize) -> usize {
  466. let mut nr_active = self.nr_running - adjust;
  467. nr_active += self.nr_uninterruptible;
  468. let mut delta = 0;
  469. if nr_active != self.cala_load_active {
  470. delta = nr_active - self.cala_load_active;
  471. self.cala_load_active = nr_active;
  472. }
  473. delta
  474. }
  475. /// ## tick计算全局负载
  476. pub fn calculate_global_load_tick(&mut self) {
  477. if clock() < self.cala_load_update as u64 {
  478. // 如果当前时间在上次更新时间之前,则直接返回
  479. return;
  480. }
  481. let delta = self.calculate_load_fold_active(0);
  482. if delta != 0 {
  483. CALCULATE_LOAD_TASKS.fetch_add(delta, Ordering::SeqCst);
  484. }
  485. self.cala_load_update += LOAD_FREQ;
  486. }
  487. pub fn add_nr_running(&mut self, nr_running: usize) {
  488. let prev = self.nr_running;
  489. self.nr_running = prev + nr_running;
  490. if prev < 2 && self.nr_running >= 2 && !self.overload {
  491. self.overload = true;
  492. }
  493. }
  494. pub fn sub_nr_running(&mut self, count: usize) {
  495. self.nr_running -= count;
  496. }
  497. /// 在运行idle?
  498. pub fn sched_idle_rq(&self) -> bool {
  499. return unlikely(
  500. self.nr_running == self.cfs.idle_h_nr_running as usize && self.nr_running > 0,
  501. );
  502. }
  503. #[inline]
  504. pub fn current(&self) -> Arc<ProcessControlBlock> {
  505. self.current.upgrade().unwrap()
  506. }
  507. #[inline]
  508. pub fn set_current(&mut self, pcb: Weak<ProcessControlBlock>) {
  509. self.current = pcb;
  510. }
  511. #[inline]
  512. pub fn set_idle(&mut self, pcb: Weak<ProcessControlBlock>) {
  513. self.idle = pcb;
  514. }
  515. #[inline]
  516. pub fn clock_task(&self) -> u64 {
  517. self.clock_task
  518. }
  519. /// 重新调度当前进程
  520. pub fn resched_current(&self) {
  521. let current = self.current();
  522. // 又需要被调度?
  523. if unlikely(current.flags().contains(ProcessFlags::NEED_SCHEDULE)) {
  524. return;
  525. }
  526. let cpu = self.cpu;
  527. if cpu == smp_get_processor_id() {
  528. // assert!(
  529. // Arc::ptr_eq(&current, &ProcessManager::current_pcb()),
  530. // "rq current name {} process current {}",
  531. // current.basic().name().to_string(),
  532. // ProcessManager::current_pcb().basic().name().to_string(),
  533. // );
  534. // 设置需要调度
  535. ProcessManager::current_pcb()
  536. .flags()
  537. .insert(ProcessFlags::NEED_SCHEDULE);
  538. return;
  539. }
  540. // 向目标cpu发送重调度ipi
  541. send_resched_ipi(cpu);
  542. }
  543. /// 选择下一个task
  544. pub fn pick_next_task(&mut self, prev: Arc<ProcessControlBlock>) -> Arc<ProcessControlBlock> {
  545. if likely(prev.sched_info().policy() >= SchedPolicy::CFS)
  546. && self.nr_running == self.cfs.h_nr_running as usize
  547. {
  548. let p = CompletelyFairScheduler::pick_next_task(self, Some(prev.clone()));
  549. if let Some(pcb) = p.as_ref() {
  550. return pcb.clone();
  551. } else {
  552. // error!(
  553. // "pick idle cfs rq {:?}",
  554. // self.cfs_rq()
  555. // .entities
  556. // .iter()
  557. // .map(|x| x.1.pid)
  558. // .collect::<Vec<_>>()
  559. // );
  560. match prev.sched_info().policy() {
  561. SchedPolicy::FIFO => todo!(),
  562. SchedPolicy::RT => todo!(),
  563. SchedPolicy::CFS => CompletelyFairScheduler::put_prev_task(self, prev),
  564. SchedPolicy::IDLE => IdleScheduler::put_prev_task(self, prev),
  565. }
  566. // 选择idle
  567. return self.idle.upgrade().unwrap();
  568. }
  569. }
  570. todo!()
  571. }
  572. }
  573. bitflags! {
  574. pub struct SchedFeature:u32 {
  575. /// 给予睡眠任务仅有 50% 的服务赤字。这意味着睡眠任务在被唤醒后会获得一定的服务,但不能过多地占用资源。
  576. const GENTLE_FAIR_SLEEPERS = 1 << 0;
  577. /// 将新任务排在前面,以避免已经运行的任务被饿死
  578. const START_DEBIT = 1 << 1;
  579. /// 在调度时优先选择上次唤醒的任务,因为它可能会访问之前唤醒的任务所使用的数据,从而提高缓存局部性。
  580. const NEXT_BUDDY = 1 << 2;
  581. /// 在调度时优先选择上次运行的任务,因为它可能会访问与之前运行的任务相同的数据,从而提高缓存局部性。
  582. const LAST_BUDDY = 1 << 3;
  583. /// 认为任务的伙伴(buddy)在缓存中是热点,减少缓存伙伴被迁移的可能性,从而提高缓存局部性。
  584. const CACHE_HOT_BUDDY = 1 << 4;
  585. /// 允许唤醒时抢占当前任务。
  586. const WAKEUP_PREEMPTION = 1 << 5;
  587. /// 基于任务未运行时间来减少 CPU 的容量。
  588. const NONTASK_CAPACITY = 1 << 6;
  589. /// 将远程唤醒排队到目标 CPU,并使用调度器 IPI 处理它们,以减少运行队列锁的争用。
  590. const TTWU_QUEUE = 1 << 7;
  591. /// 在唤醒时尝试限制对最后级联缓存(LLC)域的无谓扫描。
  592. const SIS_UTIL = 1 << 8;
  593. /// 在 RT(Real-Time)任务迁移时,通过发送 IPI 来减少 CPU 之间的锁竞争。
  594. const RT_PUSH_IPI = 1 << 9;
  595. /// 启用估计的 CPU 利用率功能,用于调度决策。
  596. const UTIL_EST = 1 << 10;
  597. const UTIL_EST_FASTUP = 1 << 11;
  598. /// 启用备选调度周期
  599. const ALT_PERIOD = 1 << 12;
  600. /// 启用基本时间片
  601. const BASE_SLICE = 1 << 13;
  602. }
  603. pub struct EnqueueFlag: u8 {
  604. const ENQUEUE_WAKEUP = 0x01;
  605. const ENQUEUE_RESTORE = 0x02;
  606. const ENQUEUE_MOVE = 0x04;
  607. const ENQUEUE_NOCLOCK = 0x08;
  608. const ENQUEUE_MIGRATED = 0x40;
  609. const ENQUEUE_INITIAL = 0x80;
  610. }
  611. pub struct DequeueFlag: u8 {
  612. const DEQUEUE_SLEEP = 0x01;
  613. const DEQUEUE_SAVE = 0x02; /* Matches ENQUEUE_RESTORE */
  614. const DEQUEUE_MOVE = 0x04; /* Matches ENQUEUE_MOVE */
  615. const DEQUEUE_NOCLOCK = 0x08; /* Matches ENQUEUE_NOCLOCK */
  616. }
  617. pub struct WakeupFlags: u8 {
  618. /* Wake flags. The first three directly map to some SD flag value */
  619. const WF_EXEC = 0x02; /* Wakeup after exec; maps to SD_BALANCE_EXEC */
  620. const WF_FORK = 0x04; /* Wakeup after fork; maps to SD_BALANCE_FORK */
  621. const WF_TTWU = 0x08; /* Wakeup; maps to SD_BALANCE_WAKE */
  622. const WF_SYNC = 0x10; /* Waker goes to sleep after wakeup */
  623. const WF_MIGRATED = 0x20; /* Internal use, task got migrated */
  624. const WF_CURRENT_CPU = 0x40; /* Prefer to move the wakee to the current CPU. */
  625. }
  626. pub struct SchedMode: u8 {
  627. /*
  628. * Constants for the sched_mode argument of __schedule().
  629. *
  630. * The mode argument allows RT enabled kernels to differentiate a
  631. * preemption from blocking on an 'sleeping' spin/rwlock. Note that
  632. * SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to
  633. * optimize the AND operation out and just check for zero.
  634. */
  635. /// 在调度过程中不会再次进入队列,即需要手动唤醒
  636. const SM_NONE = 0x0;
  637. /// 重新加入队列,即当前进程被抢占,需要时钟调度
  638. const SM_PREEMPT = 0x1;
  639. /// rt相关
  640. const SM_RTLOCK_WAIT = 0x2;
  641. /// 默认与SM_PREEMPT相同
  642. const SM_MASK_PREEMPT = Self::SM_PREEMPT.bits;
  643. }
  644. }
  645. #[derive(Copy, Clone, Debug, PartialEq)]
  646. pub enum OnRq {
  647. Queued,
  648. Migrating,
  649. None,
  650. }
  651. impl ProcessManager {
  652. pub fn update_process_times(user_tick: bool) {
  653. let pcb = Self::current_pcb();
  654. CpuTimeFunc::irqtime_account_process_tick(&pcb, user_tick, 1);
  655. scheduler_tick();
  656. }
  657. }
  658. /// ## 时钟tick时调用此函数
  659. pub fn scheduler_tick() {
  660. fence(Ordering::SeqCst);
  661. // 获取当前CPU索引
  662. let cpu_idx = smp_get_processor_id().data() as usize;
  663. // 获取当前CPU的请求队列
  664. let rq = cpu_rq(cpu_idx);
  665. let (rq, guard) = rq.self_lock();
  666. // 获取当前请求队列的当前请求
  667. let current = rq.current();
  668. // 更新请求队列时钟
  669. rq.update_rq_clock();
  670. match current.sched_info().policy() {
  671. SchedPolicy::CFS => CompletelyFairScheduler::tick(rq, current, false),
  672. SchedPolicy::FIFO => todo!(),
  673. SchedPolicy::RT => todo!(),
  674. SchedPolicy::IDLE => IdleScheduler::tick(rq, current, false),
  675. }
  676. rq.calculate_global_load_tick();
  677. drop(guard);
  678. // TODO:处理负载均衡
  679. }
  680. /// ## 执行调度
  681. /// 若preempt_count不为0则报错
  682. #[inline]
  683. pub fn schedule(sched_mod: SchedMode) {
  684. let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
  685. assert_eq!(ProcessManager::current_pcb().preempt_count(), 0);
  686. __schedule(sched_mod);
  687. }
  688. /// ## 执行调度
  689. /// 此函数与schedule的区别为,该函数不会检查preempt_count
  690. /// 适用于时钟中断等场景
  691. pub fn __schedule(sched_mod: SchedMode) {
  692. let cpu = smp_get_processor_id().data() as usize;
  693. let rq = cpu_rq(cpu);
  694. let mut prev = rq.current();
  695. if let ProcessState::Exited(_) = prev.clone().sched_info().inner_lock_read_irqsave().state() {
  696. // 从exit进的Schedule
  697. prev = ProcessManager::current_pcb();
  698. }
  699. // TODO: hrtick_clear(rq);
  700. let (rq, _guard) = rq.self_lock();
  701. rq.clock_updata_flags = ClockUpdataFlag::from_bits_truncate(rq.clock_updata_flags.bits() << 1);
  702. rq.update_rq_clock();
  703. rq.clock_updata_flags = ClockUpdataFlag::RQCF_UPDATE;
  704. // kBUG!(
  705. // "before cfs rq pcbs {:?}\nvruntimes {:?}\n",
  706. // rq.cfs
  707. // .entities
  708. // .iter()
  709. // .map(|x| { x.1.pcb().pid() })
  710. // .collect::<Vec<_>>(),
  711. // rq.cfs
  712. // .entities
  713. // .iter()
  714. // .map(|x| { x.1.vruntime })
  715. // .collect::<Vec<_>>(),
  716. // );
  717. // warn!(
  718. // "before cfs rq {:?} prev {:?}",
  719. // rq.cfs
  720. // .entities
  721. // .iter()
  722. // .map(|x| { x.1.pcb().pid() })
  723. // .collect::<Vec<_>>(),
  724. // prev.pid()
  725. // );
  726. // error!("prev pid {:?} {:?}", prev.pid(), prev.sched_info().policy());
  727. if !sched_mod.contains(SchedMode::SM_MASK_PREEMPT)
  728. && prev.sched_info().policy() != SchedPolicy::IDLE
  729. && prev.sched_info().inner_lock_read_irqsave().is_mark_sleep()
  730. {
  731. // warn!("deactivate_task prev {:?}", prev.pid());
  732. // TODO: 这里需要处理信号
  733. // https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/core.c?r=&mo=172979&fi=6578#6630
  734. rq.deactivate_task(
  735. prev.clone(),
  736. DequeueFlag::DEQUEUE_SLEEP | DequeueFlag::DEQUEUE_NOCLOCK,
  737. );
  738. }
  739. let next = rq.pick_next_task(prev.clone());
  740. // kBUG!(
  741. // "after cfs rq pcbs {:?}\nvruntimes {:?}\n",
  742. // rq.cfs
  743. // .entities
  744. // .iter()
  745. // .map(|x| { x.1.pcb().pid() })
  746. // .collect::<Vec<_>>(),
  747. // rq.cfs
  748. // .entities
  749. // .iter()
  750. // .map(|x| { x.1.vruntime })
  751. // .collect::<Vec<_>>(),
  752. // );
  753. // error!("next {:?}", next.pid());
  754. prev.flags().remove(ProcessFlags::NEED_SCHEDULE);
  755. fence(Ordering::SeqCst);
  756. if likely(!Arc::ptr_eq(&prev, &next)) {
  757. rq.set_current(Arc::downgrade(&next));
  758. // warn!(
  759. // "switch_process prev {:?} next {:?} sched_mode {sched_mod:?}",
  760. // prev.pid(),
  761. // next.pid()
  762. // );
  763. // send_to_default_serial8250_port(
  764. // format!(
  765. // "switch_process prev {:?} next {:?} sched_mode {sched_mod:?}\n",
  766. // prev.pid(),
  767. // next.pid()
  768. // )
  769. // .as_bytes(),
  770. // );
  771. // CurrentApic.send_eoi();
  772. compiler_fence(Ordering::SeqCst);
  773. unsafe { ProcessManager::switch_process(prev, next) };
  774. } else {
  775. assert!(
  776. Arc::ptr_eq(&ProcessManager::current_pcb(), &prev),
  777. "{}",
  778. ProcessManager::current_pcb().basic().name()
  779. );
  780. }
  781. }
  782. pub fn sched_fork(pcb: &Arc<ProcessControlBlock>) -> Result<(), SystemError> {
  783. let mut prio_guard = pcb.sched_info().prio_data.write_irqsave();
  784. let current = ProcessManager::current_pcb();
  785. prio_guard.prio = current.sched_info().prio_data.read_irqsave().normal_prio;
  786. if PrioUtil::dl_prio(prio_guard.prio) {
  787. return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
  788. } else if PrioUtil::rt_prio(prio_guard.prio) {
  789. let policy = &pcb.sched_info().sched_policy;
  790. *policy.write_irqsave() = SchedPolicy::RT;
  791. } else {
  792. let policy = &pcb.sched_info().sched_policy;
  793. *policy.write_irqsave() = SchedPolicy::CFS;
  794. }
  795. pcb.sched_info()
  796. .sched_entity()
  797. .force_mut()
  798. .init_entity_runnable_average();
  799. Ok(())
  800. }
  801. pub fn sched_cgroup_fork(pcb: &Arc<ProcessControlBlock>) {
  802. __set_task_cpu(pcb, smp_get_processor_id());
  803. match pcb.sched_info().policy() {
  804. SchedPolicy::RT => todo!(),
  805. SchedPolicy::FIFO => todo!(),
  806. SchedPolicy::CFS => CompletelyFairScheduler::task_fork(pcb.clone()),
  807. SchedPolicy::IDLE => todo!(),
  808. }
  809. }
  810. fn __set_task_cpu(pcb: &Arc<ProcessControlBlock>, cpu: ProcessorId) {
  811. // TODO: Fixme There is not implement group sched;
  812. let se = pcb.sched_info().sched_entity();
  813. let rq = cpu_rq(cpu.data() as usize);
  814. se.force_mut().set_cfs(Arc::downgrade(&rq.cfs));
  815. }
  816. #[inline(never)]
  817. pub fn sched_init() {
  818. // 初始化percpu变量
  819. unsafe {
  820. CPU_IRQ_TIME = Some(Vec::with_capacity(PerCpu::MAX_CPU_NUM as usize));
  821. CPU_IRQ_TIME
  822. .as_mut()
  823. .unwrap()
  824. .resize_with(PerCpu::MAX_CPU_NUM as usize, || Box::leak(Box::default()));
  825. let mut cpu_runqueue = Vec::with_capacity(PerCpu::MAX_CPU_NUM as usize);
  826. for cpu in 0..PerCpu::MAX_CPU_NUM as usize {
  827. let rq = Arc::new(CpuRunQueue::new(ProcessorId::new(cpu as u32)));
  828. rq.cfs.force_mut().set_rq(Arc::downgrade(&rq));
  829. cpu_runqueue.push(rq);
  830. }
  831. CPU_RUNQUEUE.init(PerCpuVar::new(cpu_runqueue).unwrap());
  832. };
  833. }
  834. #[inline]
  835. pub fn send_resched_ipi(cpu: ProcessorId) {
  836. send_ipi(IpiKind::KickCpu, IpiTarget::Specified(cpu));
  837. }