queue.rs 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. #[cfg(test)]
  2. use crate::hal::VirtAddr;
  3. use crate::hal::{BufferDirection, Dma, Hal};
  4. use crate::transport::Transport;
  5. use crate::{align_up, Error, Result, PAGE_SIZE};
  6. use bitflags::bitflags;
  7. #[cfg(test)]
  8. use core::cmp::min;
  9. use core::hint::spin_loop;
  10. use core::mem::size_of;
  11. use core::ptr::{self, addr_of_mut, NonNull};
  12. use core::sync::atomic::{fence, Ordering};
  13. /// The mechanism for bulk data transport on virtio devices.
  14. ///
  15. /// Each device can have zero or more virtqueues.
  16. #[derive(Debug)]
  17. pub struct VirtQueue<H: Hal> {
  18. /// DMA guard
  19. dma: Dma<H>,
  20. /// Descriptor table
  21. desc: NonNull<[Descriptor]>,
  22. /// Available ring
  23. avail: NonNull<AvailRing>,
  24. /// Used ring
  25. used: NonNull<UsedRing>,
  26. /// The index of queue
  27. queue_idx: u16,
  28. /// The size of the queue.
  29. ///
  30. /// This is both the number of descriptors, and the number of slots in the available and used
  31. /// rings.
  32. queue_size: u16,
  33. /// The number of descriptors currently in use.
  34. num_used: u16,
  35. /// The head desc index of the free list.
  36. free_head: u16,
  37. avail_idx: u16,
  38. last_used_idx: u16,
  39. }
  40. impl<H: Hal> VirtQueue<H> {
  41. /// Create a new VirtQueue.
  42. pub fn new<T: Transport>(transport: &mut T, idx: u16, size: u16) -> Result<Self> {
  43. if transport.queue_used(idx) {
  44. return Err(Error::AlreadyUsed);
  45. }
  46. if !size.is_power_of_two() || transport.max_queue_size() < size as u32 {
  47. return Err(Error::InvalidParam);
  48. }
  49. let layout = VirtQueueLayout::new(size);
  50. // Allocate contiguous pages.
  51. let dma = Dma::new(layout.size / PAGE_SIZE)?;
  52. transport.queue_set(
  53. idx,
  54. size as u32,
  55. dma.paddr(),
  56. dma.paddr() + layout.avail_offset,
  57. dma.paddr() + layout.used_offset,
  58. );
  59. let desc = NonNull::new(ptr::slice_from_raw_parts_mut(
  60. dma.vaddr() as *mut Descriptor,
  61. size as usize,
  62. ))
  63. .unwrap();
  64. let avail = NonNull::new((dma.vaddr() + layout.avail_offset) as *mut AvailRing).unwrap();
  65. let used = NonNull::new((dma.vaddr() + layout.used_offset) as *mut UsedRing).unwrap();
  66. // Link descriptors together.
  67. for i in 0..(size - 1) {
  68. // Safe because `desc` is properly aligned, dereferenceable, initialised, and the device
  69. // won't access the descriptors for the duration of this unsafe block.
  70. unsafe {
  71. (*desc.as_ptr())[i as usize].next = i + 1;
  72. }
  73. }
  74. Ok(VirtQueue {
  75. dma,
  76. desc,
  77. avail,
  78. used,
  79. queue_size: size,
  80. queue_idx: idx,
  81. num_used: 0,
  82. free_head: 0,
  83. avail_idx: 0,
  84. last_used_idx: 0,
  85. })
  86. }
  87. /// Add buffers to the virtqueue, return a token.
  88. ///
  89. /// Ref: linux virtio_ring.c virtqueue_add
  90. ///
  91. /// # Safety
  92. ///
  93. /// The input and output buffers must remain valid until the token is returned by `pop_used`.
  94. pub unsafe fn add(&mut self, inputs: &[*const [u8]], outputs: &[*mut [u8]]) -> Result<u16> {
  95. if inputs.is_empty() && outputs.is_empty() {
  96. return Err(Error::InvalidParam);
  97. }
  98. if inputs.len() + outputs.len() + self.num_used as usize > self.queue_size as usize {
  99. return Err(Error::QueueFull);
  100. }
  101. // allocate descriptors from free list
  102. let head = self.free_head;
  103. let mut last = self.free_head;
  104. // Safe because self.desc is properly aligned, dereferenceable and initialised, and nothing
  105. // else reads or writes the free descriptors during this block.
  106. unsafe {
  107. for (buffer, direction) in input_output_iter(inputs, outputs) {
  108. let desc = self.desc_ptr(self.free_head);
  109. (*desc).set_buf::<H>(buffer, direction, DescFlags::NEXT);
  110. last = self.free_head;
  111. self.free_head = (*desc).next;
  112. }
  113. // set last_elem.next = NULL
  114. (*self.desc_ptr(last)).flags.remove(DescFlags::NEXT);
  115. }
  116. self.num_used += (inputs.len() + outputs.len()) as u16;
  117. let avail_slot = self.avail_idx & (self.queue_size - 1);
  118. // Safe because self.avail is properly aligned, dereferenceable and initialised.
  119. unsafe {
  120. (*self.avail.as_ptr()).ring[avail_slot as usize] = head;
  121. }
  122. // Write barrier so that device sees changes to descriptor table and available ring before
  123. // change to available index.
  124. fence(Ordering::SeqCst);
  125. // increase head of avail ring
  126. self.avail_idx = self.avail_idx.wrapping_add(1);
  127. // Safe because self.avail is properly aligned, dereferenceable and initialised.
  128. unsafe {
  129. (*self.avail.as_ptr()).idx = self.avail_idx;
  130. }
  131. // Write barrier so that device can see change to available index after this method returns.
  132. fence(Ordering::SeqCst);
  133. Ok(head)
  134. }
  135. /// Add the given buffers to the virtqueue, notifies the device, blocks until the device uses
  136. /// them, then pops them.
  137. ///
  138. /// This assumes that the device isn't processing any other buffers at the same time.
  139. pub fn add_notify_wait_pop(
  140. &mut self,
  141. inputs: &[*const [u8]],
  142. outputs: &[*mut [u8]],
  143. transport: &mut impl Transport,
  144. ) -> Result<u32> {
  145. // Safe because we don't return until the same token has been popped, so they remain valid
  146. // until then.
  147. let token = unsafe { self.add(inputs, outputs) }?;
  148. // Notify the queue.
  149. transport.notify(self.queue_idx);
  150. // Wait until there is at least one element in the used ring.
  151. while !self.can_pop() {
  152. spin_loop();
  153. }
  154. self.pop_used(token)
  155. }
  156. /// Returns a non-null pointer to the descriptor at the given index.
  157. fn desc_ptr(&mut self, index: u16) -> *mut Descriptor {
  158. // Safe because self.desc is properly aligned and dereferenceable.
  159. unsafe { addr_of_mut!((*self.desc.as_ptr())[index as usize]) }
  160. }
  161. /// Returns whether there is a used element that can be popped.
  162. pub fn can_pop(&self) -> bool {
  163. // Read barrier, so we read a fresh value from the device.
  164. fence(Ordering::SeqCst);
  165. // Safe because self.used points to a valid, aligned, initialised, dereferenceable, readable
  166. // instance of UsedRing.
  167. self.last_used_idx != unsafe { (*self.used.as_ptr()).idx }
  168. }
  169. /// Returns the descriptor index (a.k.a. token) of the next used element without popping it, or
  170. /// `None` if the used ring is empty.
  171. pub fn peek_used(&self) -> Option<u16> {
  172. if self.can_pop() {
  173. let last_used_slot = self.last_used_idx & (self.queue_size - 1);
  174. // Safe because self.used points to a valid, aligned, initialised, dereferenceable,
  175. // readable instance of UsedRing.
  176. Some(unsafe { (*self.used.as_ptr()).ring[last_used_slot as usize].id as u16 })
  177. } else {
  178. None
  179. }
  180. }
  181. /// Returns the number of free descriptors.
  182. pub fn available_desc(&self) -> usize {
  183. (self.queue_size - self.num_used) as usize
  184. }
  185. /// Recycle descriptors in the list specified by head.
  186. ///
  187. /// This will push all linked descriptors at the front of the free list.
  188. fn recycle_descriptors(&mut self, mut head: u16) {
  189. let original_free_head = self.free_head;
  190. self.free_head = head;
  191. loop {
  192. let desc = self.desc_ptr(head);
  193. // Safe because self.desc is properly aligned, dereferenceable and initialised, and
  194. // nothing else reads or writes the descriptor during this block.
  195. unsafe {
  196. self.num_used -= 1;
  197. if let Some(next) = (*desc).next() {
  198. head = next;
  199. } else {
  200. (*desc).next = original_free_head;
  201. return;
  202. }
  203. }
  204. }
  205. }
  206. /// If the given token is next on the device used queue, pops it and returns the total buffer
  207. /// length which was used (written) by the device.
  208. ///
  209. /// Ref: linux virtio_ring.c virtqueue_get_buf_ctx
  210. pub fn pop_used(&mut self, token: u16) -> Result<u32> {
  211. if !self.can_pop() {
  212. return Err(Error::NotReady);
  213. }
  214. // Read barrier not necessary, as can_pop already has one.
  215. // Get the index of the start of the descriptor chain for the next element in the used ring.
  216. let last_used_slot = self.last_used_idx & (self.queue_size - 1);
  217. let index;
  218. let len;
  219. // Safe because self.used points to a valid, aligned, initialised, dereferenceable, readable
  220. // instance of UsedRing.
  221. unsafe {
  222. index = (*self.used.as_ptr()).ring[last_used_slot as usize].id as u16;
  223. len = (*self.used.as_ptr()).ring[last_used_slot as usize].len;
  224. }
  225. if index != token {
  226. // The device used a different descriptor chain to the one we were expecting.
  227. return Err(Error::WrongToken);
  228. }
  229. self.recycle_descriptors(index);
  230. self.last_used_idx = self.last_used_idx.wrapping_add(1);
  231. Ok(len)
  232. }
  233. /// Return size of the queue.
  234. pub fn size(&self) -> u16 {
  235. self.queue_size
  236. }
  237. }
  238. /// The inner layout of a VirtQueue.
  239. ///
  240. /// Ref: 2.6.2 Legacy Interfaces: A Note on Virtqueue Layout
  241. struct VirtQueueLayout {
  242. avail_offset: usize,
  243. used_offset: usize,
  244. size: usize,
  245. }
  246. impl VirtQueueLayout {
  247. fn new(queue_size: u16) -> Self {
  248. assert!(
  249. queue_size.is_power_of_two(),
  250. "queue size should be a power of 2"
  251. );
  252. let queue_size = queue_size as usize;
  253. let desc = size_of::<Descriptor>() * queue_size;
  254. let avail = size_of::<u16>() * (3 + queue_size);
  255. let used = size_of::<u16>() * 3 + size_of::<UsedElem>() * queue_size;
  256. VirtQueueLayout {
  257. avail_offset: desc,
  258. used_offset: align_up(desc + avail),
  259. size: align_up(desc + avail) + align_up(used),
  260. }
  261. }
  262. }
  263. #[repr(C, align(16))]
  264. #[derive(Debug)]
  265. pub(crate) struct Descriptor {
  266. addr: u64,
  267. len: u32,
  268. flags: DescFlags,
  269. next: u16,
  270. }
  271. impl Descriptor {
  272. /// Sets the buffer address, length and flags, and shares it with the device.
  273. ///
  274. /// # Safety
  275. ///
  276. /// The caller must ensure that the buffer lives at least as long as the descriptor is active.
  277. unsafe fn set_buf<H: Hal>(
  278. &mut self,
  279. buf: NonNull<[u8]>,
  280. direction: BufferDirection,
  281. extra_flags: DescFlags,
  282. ) {
  283. self.addr = H::share(buf, direction) as u64;
  284. self.len = buf.len() as u32;
  285. self.flags = extra_flags
  286. | match direction {
  287. BufferDirection::DeviceToDriver => DescFlags::WRITE,
  288. BufferDirection::DriverToDevice => DescFlags::empty(),
  289. };
  290. }
  291. /// Returns the index of the next descriptor in the chain if the `NEXT` flag is set, or `None`
  292. /// if it is not (and thus this descriptor is the end of the chain).
  293. fn next(&self) -> Option<u16> {
  294. if self.flags.contains(DescFlags::NEXT) {
  295. Some(self.next)
  296. } else {
  297. None
  298. }
  299. }
  300. }
  301. bitflags! {
  302. /// Descriptor flags
  303. struct DescFlags: u16 {
  304. const NEXT = 1;
  305. const WRITE = 2;
  306. const INDIRECT = 4;
  307. }
  308. }
  309. /// The driver uses the available ring to offer buffers to the device:
  310. /// each ring entry refers to the head of a descriptor chain.
  311. /// It is only written by the driver and read by the device.
  312. #[repr(C)]
  313. #[derive(Debug)]
  314. struct AvailRing {
  315. flags: u16,
  316. /// A driver MUST NOT decrement the idx.
  317. idx: u16,
  318. ring: [u16; 32], // actual size: queue_size
  319. used_event: u16, // unused
  320. }
  321. /// The used ring is where the device returns buffers once it is done with them:
  322. /// it is only written to by the device, and read by the driver.
  323. #[repr(C)]
  324. #[derive(Debug)]
  325. struct UsedRing {
  326. flags: u16,
  327. idx: u16,
  328. ring: [UsedElem; 32], // actual size: queue_size
  329. avail_event: u16, // unused
  330. }
  331. #[repr(C)]
  332. #[derive(Debug)]
  333. struct UsedElem {
  334. id: u32,
  335. len: u32,
  336. }
  337. /// Simulates the device writing to a VirtIO queue, for use in tests.
  338. ///
  339. /// The fake device always uses descriptors in order.
  340. #[cfg(test)]
  341. pub(crate) fn fake_write_to_queue(
  342. queue_size: u16,
  343. receive_queue_descriptors: *const Descriptor,
  344. receive_queue_driver_area: VirtAddr,
  345. receive_queue_device_area: VirtAddr,
  346. data: &[u8],
  347. ) {
  348. let descriptors = ptr::slice_from_raw_parts(receive_queue_descriptors, queue_size as usize);
  349. let available_ring = receive_queue_driver_area as *const AvailRing;
  350. let used_ring = receive_queue_device_area as *mut UsedRing;
  351. // Safe because the various pointers are properly aligned, dereferenceable, initialised, and
  352. // nothing else accesses them during this block.
  353. unsafe {
  354. // Make sure there is actually at least one descriptor available to write to.
  355. assert_ne!((*available_ring).idx, (*used_ring).idx);
  356. // The fake device always uses descriptors in order, like VIRTIO_F_IN_ORDER, so
  357. // `used_ring.idx` marks the next descriptor we should take from the available ring.
  358. let next_slot = (*used_ring).idx & (queue_size - 1);
  359. let head_descriptor_index = (*available_ring).ring[next_slot as usize];
  360. let mut descriptor = &(*descriptors)[head_descriptor_index as usize];
  361. // Loop through all descriptors in the chain, writing data to them.
  362. let mut remaining_data = data;
  363. loop {
  364. // Check the buffer and write to it.
  365. let flags = descriptor.flags;
  366. assert!(flags.contains(DescFlags::WRITE));
  367. let buffer_length = descriptor.len as usize;
  368. let length_to_write = min(remaining_data.len(), buffer_length);
  369. ptr::copy(
  370. remaining_data.as_ptr(),
  371. descriptor.addr as *mut u8,
  372. length_to_write,
  373. );
  374. remaining_data = &remaining_data[length_to_write..];
  375. if let Some(next) = descriptor.next() {
  376. descriptor = &(*descriptors)[next as usize];
  377. } else {
  378. assert_eq!(remaining_data.len(), 0);
  379. break;
  380. }
  381. }
  382. // Mark the buffer as used.
  383. (*used_ring).ring[next_slot as usize].id = head_descriptor_index as u32;
  384. (*used_ring).ring[next_slot as usize].len = data.len() as u32;
  385. (*used_ring).idx += 1;
  386. }
  387. }
  388. #[cfg(test)]
  389. mod tests {
  390. use super::*;
  391. use crate::{
  392. hal::fake::FakeHal,
  393. transport::mmio::{MmioTransport, VirtIOHeader, MODERN_VERSION},
  394. };
  395. use core::ptr::NonNull;
  396. #[test]
  397. fn invalid_queue_size() {
  398. let mut header = VirtIOHeader::make_fake_header(MODERN_VERSION, 1, 0, 0, 4);
  399. let mut transport = unsafe { MmioTransport::new(NonNull::from(&mut header)) }.unwrap();
  400. // Size not a power of 2.
  401. assert_eq!(
  402. VirtQueue::<FakeHal>::new(&mut transport, 0, 3).unwrap_err(),
  403. Error::InvalidParam
  404. );
  405. }
  406. #[test]
  407. fn queue_too_big() {
  408. let mut header = VirtIOHeader::make_fake_header(MODERN_VERSION, 1, 0, 0, 4);
  409. let mut transport = unsafe { MmioTransport::new(NonNull::from(&mut header)) }.unwrap();
  410. assert_eq!(
  411. VirtQueue::<FakeHal>::new(&mut transport, 0, 5).unwrap_err(),
  412. Error::InvalidParam
  413. );
  414. }
  415. #[test]
  416. fn queue_already_used() {
  417. let mut header = VirtIOHeader::make_fake_header(MODERN_VERSION, 1, 0, 0, 4);
  418. let mut transport = unsafe { MmioTransport::new(NonNull::from(&mut header)) }.unwrap();
  419. VirtQueue::<FakeHal>::new(&mut transport, 0, 4).unwrap();
  420. assert_eq!(
  421. VirtQueue::<FakeHal>::new(&mut transport, 0, 4).unwrap_err(),
  422. Error::AlreadyUsed
  423. );
  424. }
  425. #[test]
  426. fn add_empty() {
  427. let mut header = VirtIOHeader::make_fake_header(MODERN_VERSION, 1, 0, 0, 4);
  428. let mut transport = unsafe { MmioTransport::new(NonNull::from(&mut header)) }.unwrap();
  429. let mut queue = VirtQueue::<FakeHal>::new(&mut transport, 0, 4).unwrap();
  430. assert_eq!(
  431. unsafe { queue.add(&[], &[]) }.unwrap_err(),
  432. Error::InvalidParam
  433. );
  434. }
  435. #[test]
  436. fn add_too_many() {
  437. let mut header = VirtIOHeader::make_fake_header(MODERN_VERSION, 1, 0, 0, 4);
  438. let mut transport = unsafe { MmioTransport::new(NonNull::from(&mut header)) }.unwrap();
  439. let mut queue = VirtQueue::<FakeHal>::new(&mut transport, 0, 4).unwrap();
  440. assert_eq!(queue.available_desc(), 4);
  441. assert_eq!(
  442. unsafe { queue.add(&[&[], &[], &[]], &[&mut [], &mut []]) }.unwrap_err(),
  443. Error::QueueFull
  444. );
  445. }
  446. #[test]
  447. fn add_buffers() {
  448. let mut header = VirtIOHeader::make_fake_header(MODERN_VERSION, 1, 0, 0, 4);
  449. let mut transport = unsafe { MmioTransport::new(NonNull::from(&mut header)) }.unwrap();
  450. let mut queue = VirtQueue::<FakeHal>::new(&mut transport, 0, 4).unwrap();
  451. assert_eq!(queue.size(), 4);
  452. assert_eq!(queue.available_desc(), 4);
  453. // Add a buffer chain consisting of two device-readable parts followed by two
  454. // device-writable parts.
  455. let token = unsafe { queue.add(&[&[1, 2], &[3]], &[&mut [0, 0], &mut [0]]) }.unwrap();
  456. assert_eq!(queue.available_desc(), 0);
  457. assert!(!queue.can_pop());
  458. // Safe because the various parts of the queue are properly aligned, dereferenceable and
  459. // initialised, and nothing else is accessing them at the same time.
  460. unsafe {
  461. let first_descriptor_index = (*queue.avail.as_ptr()).ring[0];
  462. assert_eq!(first_descriptor_index, token);
  463. assert_eq!(
  464. (*queue.desc.as_ptr())[first_descriptor_index as usize].len,
  465. 2
  466. );
  467. assert_eq!(
  468. (*queue.desc.as_ptr())[first_descriptor_index as usize].flags,
  469. DescFlags::NEXT
  470. );
  471. let second_descriptor_index =
  472. (*queue.desc.as_ptr())[first_descriptor_index as usize].next;
  473. assert_eq!(
  474. (*queue.desc.as_ptr())[second_descriptor_index as usize].len,
  475. 1
  476. );
  477. assert_eq!(
  478. (*queue.desc.as_ptr())[second_descriptor_index as usize].flags,
  479. DescFlags::NEXT
  480. );
  481. let third_descriptor_index =
  482. (*queue.desc.as_ptr())[second_descriptor_index as usize].next;
  483. assert_eq!(
  484. (*queue.desc.as_ptr())[third_descriptor_index as usize].len,
  485. 2
  486. );
  487. assert_eq!(
  488. (*queue.desc.as_ptr())[third_descriptor_index as usize].flags,
  489. DescFlags::NEXT | DescFlags::WRITE
  490. );
  491. let fourth_descriptor_index =
  492. (*queue.desc.as_ptr())[third_descriptor_index as usize].next;
  493. assert_eq!(
  494. (*queue.desc.as_ptr())[fourth_descriptor_index as usize].len,
  495. 1
  496. );
  497. assert_eq!(
  498. (*queue.desc.as_ptr())[fourth_descriptor_index as usize].flags,
  499. DescFlags::WRITE
  500. );
  501. }
  502. }
  503. }
  504. /// Returns an iterator over the buffers of first `inputs` and then `outputs`, paired with the
  505. /// corresponding `BufferDirection`.
  506. ///
  507. /// Panics if any of the buffer pointers is null.
  508. fn input_output_iter<'a>(
  509. inputs: &'a [*const [u8]],
  510. outputs: &'a [*mut [u8]],
  511. ) -> impl Iterator<Item = (NonNull<[u8]>, BufferDirection)> + 'a {
  512. inputs
  513. .iter()
  514. .map(|input| {
  515. (
  516. NonNull::new(*input as *mut [u8]).unwrap(),
  517. BufferDirection::DriverToDevice,
  518. )
  519. })
  520. .chain(outputs.iter().map(|output| {
  521. (
  522. NonNull::new(*output).unwrap(),
  523. BufferDirection::DeviceToDriver,
  524. )
  525. }))
  526. }