Browse Source

init: move virtio-blk from rCore

Runji Wang 5 years ago
commit
3c9b8265a9
6 changed files with 816 additions and 0 deletions
  1. 2 0
      .gitignore
  2. 13 0
      Cargo.toml
  3. 189 0
      src/blk.rs
  4. 266 0
      src/header.rs
  5. 79 0
      src/lib.rs
  6. 267 0
      src/queue.rs

+ 2 - 0
.gitignore

@@ -0,0 +1,2 @@
+/target
+Cargo.lock

+ 13 - 0
Cargo.toml

@@ -0,0 +1,13 @@
+[package]
+name = "virtio-drivers"
+version = "0.1.0"
+authors = ["Jiajie Chen <[email protected]>", "Runji Wang <[email protected]>"]
+edition = "2018"
+description = "VirtIO guest drivers."
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+volatile = "0.2"
+log = "0.4"
+bitflags = "1.2"

+ 189 - 0
src/blk.rs

@@ -0,0 +1,189 @@
+use core::mem::size_of;
+
+use super::*;
+use crate::header::VirtIOHeader;
+use crate::queue::VirtQueue;
+use bitflags::_core::sync::atomic::spin_loop_hint;
+use bitflags::*;
+use log::*;
+use volatile::Volatile;
+
+/// The virtio block device is a simple virtual block device (ie. disk).
+///
+/// Read and write requests (and other exotic requests) are placed in the queue,
+/// and serviced (probably out of order) by the device except where noted.
+pub struct VirtIOBlk<'a> {
+    header: &'static mut VirtIOHeader,
+    queue: VirtQueue<'a>,
+    capacity: usize,
+}
+
+impl VirtIOBlk<'_> {
+    /// Create a new VirtIO-Blk driver.
+    pub fn new(header: &'static mut VirtIOHeader) -> Result<Self> {
+        header.begin_init(|features| {
+            let features = BlkFeature::from_bits_truncate(features);
+            info!("device features: {:?}", features);
+            // negotiate these flags only
+            let supported_features = BlkFeature::empty();
+            (features & supported_features).bits()
+        });
+
+        // read configuration space
+        let config = unsafe { &mut *(header.config_space() as *mut BlkConfig) };
+        info!("config: {:?}", config);
+        info!(
+            "found a block device of size {}KB",
+            config.capacity.read() / 2
+        );
+
+        let queue = VirtQueue::new(header, 0, 16)?;
+        header.finish_init();
+
+        Ok(VirtIOBlk {
+            header,
+            queue,
+            capacity: config.capacity.read() as usize,
+        })
+    }
+
+    /// Acknowledge interrupt.
+    pub fn ack_interrupt(&mut self) -> bool {
+        self.header.ack_interrupt()
+    }
+
+    /// Read a block.
+    pub fn read_block(&mut self, block_id: usize, buf: &mut [u8]) -> Result {
+        assert_eq!(buf.len(), BLK_SIZE);
+        let req = BlkReq {
+            type_: ReqType::In,
+            reserved: 0,
+            sector: block_id as u64,
+        };
+        let mut resp = BlkResp::default();
+        self.queue.add(&[req.as_buf()], &[buf, resp.as_buf_mut()])?;
+        self.header.notify(0);
+        while !self.queue.can_get() {
+            spin_loop_hint();
+        }
+        self.queue.get()?;
+        match resp.status {
+            RespStatus::Ok => Ok(()),
+            _ => panic!("{:?}", resp.status),
+        }
+    }
+
+    /// Write a block.
+    pub fn write_block(&mut self, block_id: usize, buf: &[u8]) -> Result {
+        assert_eq!(buf.len(), BLK_SIZE);
+        let req = BlkReq {
+            type_: ReqType::Out,
+            reserved: 0,
+            sector: block_id as u64,
+        };
+        let mut resp = BlkResp::default();
+        self.queue.add(&[req.as_buf(), buf], &[resp.as_buf_mut()])?;
+        self.header.notify(0);
+        while !self.queue.can_get() {
+            spin_loop_hint();
+        }
+        self.queue.get()?;
+        match resp.status {
+            RespStatus::Ok => Ok(()),
+            _ => panic!("{:?}", resp.status),
+        }
+    }
+}
+
+#[repr(C)]
+#[derive(Debug)]
+struct BlkConfig {
+    /// Number of 512 Bytes sectors
+    capacity: Volatile<u64>,
+    // ... ignored
+}
+
+#[repr(C)]
+#[derive(Debug)]
+struct BlkReq {
+    type_: ReqType,
+    reserved: u32,
+    sector: u64,
+}
+
+#[repr(C)]
+#[derive(Debug)]
+struct BlkResp {
+    status: RespStatus,
+}
+
+#[repr(u32)]
+#[derive(Debug)]
+enum ReqType {
+    In = 0,
+    Out = 1,
+}
+
+#[repr(u32)]
+#[derive(Debug, Eq, PartialEq)]
+enum RespStatus {
+    Ok = 0,
+    IoErr = 1,
+    Unsupported = 2,
+}
+
+impl Default for BlkResp {
+    fn default() -> Self {
+        BlkResp {
+            status: RespStatus::Unsupported,
+        }
+    }
+}
+
+const BLK_SIZE: usize = 512;
+
+bitflags! {
+    struct BlkFeature: u64 {
+        const BARRIER       = 1 << 0;
+        const SIZE_MAX      = 1 << 1;
+        const SEG_MAX       = 1 << 2;
+        const GEOMETRY      = 1 << 4;
+        const RO            = 1 << 5;
+        const BLK_SIZE      = 1 << 6;
+        const SCSI          = 1 << 7;
+        const FLUSH         = 1 << 9;
+        const TOPOLOGY      = 1 << 10;
+        const CONFIG_WCE    = 1 << 11;
+        const DISCARD       = 1 << 13;
+        const WRITE_ZEROES  = 1 << 14;
+
+        // device independent
+        const NOTIFY_ON_EMPTY       = 1 << 24; // legacy
+        const ANY_LAYOUT            = 1 << 27; // legacy
+        const RING_INDIRECT_DESC    = 1 << 28;
+        const RING_EVENT_IDX        = 1 << 29;
+        const UNUSED                = 1 << 30; // legacy
+        const VERSION_1             = 1 << 32; // detect legacy
+
+        // the following since virtio v1.1
+        const ACCESS_PLATFORM       = 1 << 33;
+        const RING_PACKED           = 1 << 34;
+        const IN_ORDER              = 1 << 35;
+        const ORDER_PLATFORM        = 1 << 36;
+        const SR_IOV                = 1 << 37;
+        const NOTIFICATION_DATA     = 1 << 38;
+    }
+}
+
+/// Convert a struct into buffer.
+unsafe trait AsBuf: Sized {
+    fn as_buf(&self) -> &[u8] {
+        unsafe { core::slice::from_raw_parts(self as *const _ as _, size_of::<Self>()) }
+    }
+    fn as_buf_mut(&mut self) -> &mut [u8] {
+        unsafe { core::slice::from_raw_parts_mut(self as *mut _ as _, size_of::<Self>()) }
+    }
+}
+
+unsafe impl AsBuf for BlkReq {}
+unsafe impl AsBuf for BlkResp {}

+ 266 - 0
src/header.rs

@@ -0,0 +1,266 @@
+use crate::PAGE_SIZE;
+use bitflags::*;
+use volatile::{ReadOnly, Volatile, WriteOnly};
+
+/// MMIO Device Legacy Register Interface.
+///
+/// Ref: 4.2.4 Legacy interface
+#[repr(C)]
+#[derive(Debug)]
+pub struct VirtIOHeader {
+    /// Magic value
+    magic: ReadOnly<u32>,
+
+    /// Device version number
+    ///
+    /// Legacy device returns value 0x1.
+    version: ReadOnly<u32>,
+
+    /// Virtio Subsystem Device ID
+    device_id: ReadOnly<u32>,
+
+    /// Virtio Subsystem Vendor ID
+    vendor_id: ReadOnly<u32>,
+
+    /// Flags representing features the device supports
+    device_features: ReadOnly<u32>,
+
+    /// Device (host) features word selection
+    device_features_sel: WriteOnly<u32>,
+
+    /// Reserved
+    __r1: [ReadOnly<u32>; 2],
+
+    /// Flags representing device features understood and activated by the driver
+    driver_features: WriteOnly<u32>,
+
+    /// Activated (guest) features word selection
+    driver_features_sel: WriteOnly<u32>,
+
+    /// Guest page size
+    ///
+    /// The driver writes the guest page size in bytes to the register during
+    /// initialization, before any queues are used. This value should be a
+    /// power of 2 and is used by the device to calculate the Guest address
+    /// of the first queue page (see QueuePFN).
+    guest_page_size: WriteOnly<u32>,
+
+    /// Reserved
+    __r2: ReadOnly<u32>,
+
+    /// Virtual queue index
+    ///
+    /// Writing to this register selects the virtual queue that the following
+    /// operations on the QueueNumMax, QueueNum, QueueAlign and QueuePFN
+    /// registers apply to. The index number of the first queue is zero (0x0).
+    queue_sel: WriteOnly<u32>,
+
+    /// Maximum virtual queue size
+    ///
+    /// Reading from the register returns the maximum size of the queue the
+    /// device is ready to process or zero (0x0) if the queue is not available.
+    /// This applies to the queue selected by writing to QueueSel and is
+    /// allowed only when QueuePFN is set to zero (0x0), so when the queue is
+    /// not actively used.
+    queue_num_max: ReadOnly<u32>,
+
+    /// Virtual queue size
+    ///
+    /// Queue size is the number of elements in the queue. Writing to this
+    /// register notifies the device what size of the queue the driver will use.
+    /// This applies to the queue selected by writing to QueueSel.
+    queue_num: WriteOnly<u32>,
+
+    /// Used Ring alignment in the virtual queue
+    ///
+    /// Writing to this register notifies the device about alignment boundary
+    /// of the Used Ring in bytes. This value should be a power of 2 and
+    /// applies to the queue selected by writing to QueueSel.
+    queue_align: WriteOnly<u32>,
+
+    /// Guest physical page number of the virtual queue
+    ///
+    /// Writing to this register notifies the device about location of the
+    /// virtual queue in the Guest’s physical address space. This value is
+    /// the index number of a page starting with the queue Descriptor Table.
+    /// Value zero (0x0) means physical address zero (0x00000000) and is illegal.
+    /// When the driver stops using the queue it writes zero (0x0) to this
+    /// register. Reading from this register returns the currently used page
+    /// number of the queue, therefore a value other than zero (0x0) means that
+    /// the queue is in use. Both read and write accesses apply to the queue
+    /// selected by writing to QueueSel.
+    queue_pfn: Volatile<u32>,
+
+    /// new interface only
+    queue_ready: Volatile<u32>,
+
+    /// Reserved
+    __r3: [ReadOnly<u32>; 2],
+
+    /// Queue notifier
+    queue_notify: WriteOnly<u32>,
+
+    /// Reserved
+    __r4: [ReadOnly<u32>; 3],
+
+    /// Interrupt status
+    interrupt_status: ReadOnly<u32>,
+
+    /// Interrupt acknowledge
+    interrupt_ack: WriteOnly<u32>,
+
+    /// Reserved
+    __r5: [ReadOnly<u32>; 2],
+
+    /// Device status
+    ///
+    /// Reading from this register returns the current device status flags.
+    /// Writing non-zero values to this register sets the status flags,
+    /// indicating the OS/driver progress. Writing zero (0x0) to this register
+    /// triggers a device reset. The device sets QueuePFN to zero (0x0) for
+    /// all queues in the device. Also see 3.1 Device Initialization.
+    status: Volatile<DeviceStatus>,
+
+    /// Reserved
+    __r6: [ReadOnly<u32>; 3],
+
+    // new interface only since here
+    queue_desc_low: WriteOnly<u32>,
+    queue_desc_high: WriteOnly<u32>,
+
+    /// Reserved
+    __r7: [ReadOnly<u32>; 2],
+
+    queue_avail_low: WriteOnly<u32>,
+    queue_avail_high: WriteOnly<u32>,
+
+    /// Reserved
+    __r8: [ReadOnly<u32>; 2],
+
+    queue_used_low: WriteOnly<u32>,
+    queue_used_high: WriteOnly<u32>,
+
+    /// Reserved
+    __r9: [ReadOnly<u32>; 21],
+
+    config_generation: ReadOnly<u32>,
+}
+
+impl VirtIOHeader {
+    /// Verify a valid header.
+    pub fn verify(&self) -> bool {
+        self.magic.read() == 0x74726976 && self.version.read() == 1 && self.device_id.read() != 0
+    }
+
+    /// Begin initializing the device.
+    ///
+    /// Ref: virtio 3.1.1 Device Initialization
+    pub fn begin_init(&mut self, negotiate_features: impl FnOnce(u64) -> u64) {
+        self.status.write(DeviceStatus::ACKNOWLEDGE);
+        self.status.write(DeviceStatus::DRIVER);
+
+        let features = self.read_device_features();
+        self.write_driver_features(negotiate_features(features));
+        self.status.write(DeviceStatus::FEATURES_OK);
+
+        self.guest_page_size.write(PAGE_SIZE as u32);
+    }
+
+    /// Finish initializing the device.
+    pub fn finish_init(&mut self) {
+        self.status.write(DeviceStatus::DRIVER_OK);
+    }
+
+    /// Read device features.
+    fn read_device_features(&mut self) -> u64 {
+        self.device_features_sel.write(0); // device features [0, 32)
+        let mut device_features_bits = self.device_features.read().into();
+        self.device_features_sel.write(1); // device features [32, 64)
+        device_features_bits += (self.device_features.read() as u64) << 32;
+        device_features_bits
+    }
+
+    /// Write device features.
+    fn write_driver_features(&mut self, driver_features: u64) {
+        self.driver_features_sel.write(0); // driver features [0, 32)
+        self.driver_features.write(driver_features as u32);
+        self.driver_features_sel.write(1); // driver features [32, 64)
+        self.driver_features.write((driver_features >> 32) as u32);
+    }
+
+    /// Set queue.
+    pub fn queue_set(&mut self, queue: u32, size: u32, align: u32, pfn: u32) {
+        self.queue_sel.write(queue);
+        self.queue_num.write(size);
+        self.queue_align.write(align);
+        self.queue_pfn.write(pfn);
+    }
+
+    /// Get guest physical page number of the virtual queue.
+    pub fn queue_physical_page_number(&mut self, queue: u32) -> u32 {
+        self.queue_sel.write(queue);
+        self.queue_pfn.read()
+    }
+
+    /// Whether the queue is in used.
+    pub fn queue_used(&mut self, queue: u32) -> bool {
+        self.queue_physical_page_number(queue) != 0
+    }
+
+    /// Get the max size of queue.
+    pub fn max_queue_size(&self) -> u32 {
+        self.queue_num_max.read()
+    }
+
+    /// Notify device.
+    pub fn notify(&mut self, queue: u32) {
+        self.queue_notify.write(queue);
+    }
+
+    /// Acknowledge interrupt and return true if success.
+    pub fn ack_interrupt(&mut self) -> bool {
+        let interrupt = self.interrupt_status.read();
+        if interrupt != 0 {
+            self.interrupt_ack.write(interrupt);
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Get the pointer to config space (at offset 0x100)
+    pub fn config_space(&self) -> *mut u8 {
+        (self as *const _ as usize + CONFIG_SPACE_OFFSET) as _
+    }
+}
+
+bitflags! {
+    /// The device status field.
+    struct DeviceStatus: u32 {
+        /// Indicates that the guest OS has found the device and recognized it
+        /// as a valid virtio device.
+        const ACKNOWLEDGE = 1;
+
+        /// Indicates that the guest OS knows how to drive the device.
+        const DRIVER = 2;
+
+        /// Indicates that something went wrong in the guest, and it has given
+        /// up on the device. This could be an internal error, or the driver
+        /// didn’t like the device for some reason, or even a fatal error
+        /// during device operation.
+        const FAILED = 128;
+
+        /// Indicates that the driver has acknowledged all the features it
+        /// understands, and feature negotiation is complete.
+        const FEATURES_OK = 8;
+
+        /// Indicates that the driver is set up and ready to drive the device.
+        const DRIVER_OK = 4;
+
+        /// Indicates that the device has experienced an error from which it
+        /// can’t recover.
+        const DEVICE_NEEDS_RESET = 64;
+    }
+}
+
+pub const CONFIG_SPACE_OFFSET: usize = 0x100;

+ 79 - 0
src/lib.rs

@@ -0,0 +1,79 @@
+//! VirtIO guest drivers.
+
+#![no_std]
+#![deny(unused_must_use, missing_docs)]
+
+mod blk;
+mod header;
+mod queue;
+
+pub use self::blk::VirtIOBlk;
+
+const PAGE_SIZE: usize = 0x1000;
+
+type VirtAddr = usize;
+type PhysAddr = usize;
+
+fn alloc_dma(_pages: usize) -> Result<(VirtAddr, PhysAddr)> {
+    unimplemented!()
+}
+
+fn dealloc_dma(_paddr: PhysAddr, _pages: usize) -> Result {
+    unimplemented!()
+}
+
+fn phys_to_virt(_paddr: PhysAddr) -> VirtAddr {
+    unimplemented!()
+}
+
+fn virt_to_phys(_vaddr: VirtAddr) -> PhysAddr {
+    unimplemented!()
+}
+
+/// The type returned by driver methods.
+pub type Result<T = ()> = core::result::Result<T, Error>;
+
+// pub struct Error {
+//     kind: ErrorKind,
+//     reason: &'static str,
+// }
+
+/// The error type of VirtIO drivers.
+#[derive(Debug, Eq, PartialEq)]
+pub enum Error {
+    /// The buffer is too small.
+    BufferTooSmall,
+    /// The device is not ready.
+    NotReady,
+    /// The queue is already in use.
+    AlreadyUsed,
+    /// Invalid parameter.
+    InvalidParam,
+}
+
+/// Types of virtio devices.
+enum DeviceType {
+    Invalid = 0,
+    Network = 1,
+    Block = 2,
+    Console = 3,
+    EntropySource = 4,
+    MemoryBallooning = 5,
+    IoMemory = 6,
+    Rpmsg = 7,
+    ScsiHost = 8,
+    _9P = 9,
+    Mac80211 = 10,
+    RprocSerial = 11,
+    VirtioCAIF = 12,
+    MemoryBalloon = 13,
+    GPU = 16,
+    Timer = 17,
+    Input = 18,
+    Socket = 19,
+    Crypto = 20,
+    SignalDistributionModule = 21,
+    Pstore = 22,
+    IOMMU = 23,
+    Memory = 24,
+}

+ 267 - 0
src/queue.rs

@@ -0,0 +1,267 @@
+use core::mem::size_of;
+use core::slice;
+use core::sync::atomic::{fence, Ordering};
+
+use super::*;
+use crate::header::VirtIOHeader;
+use bitflags::*;
+
+use volatile::Volatile;
+
+/// The mechanism for bulk data transport on virtio devices.
+///
+/// Each device can have zero or more virtqueues.
+#[repr(C)]
+pub struct VirtQueue<'a> {
+    /// Descriptor table
+    desc: &'a mut [Descriptor],
+    /// Available ring
+    avail: &'a mut AvailRing,
+    /// Used ring
+    used: &'a mut UsedRing,
+    /// Pages of DMA region
+    pages: usize,
+
+    /// The index of queue
+    queue_idx: u32,
+    /// The size of queue
+    queue_size: u16,
+    /// The number of used queues.
+    num_used: u16,
+    /// The head desc index of the free list.
+    free_head: u16,
+    avail_idx: u16,
+    last_used_idx: u16,
+}
+
+impl VirtQueue<'_> {
+    /// Create a new VirtQueue.
+    pub fn new(header: &mut VirtIOHeader, idx: usize, size: u16) -> Result<Self> {
+        if header.queue_used(idx as u32) {
+            return Err(Error::AlreadyUsed);
+        }
+        if !size.is_power_of_two() || header.max_queue_size() < size as u32 {
+            return Err(Error::InvalidParam);
+        }
+        let layout = VirtQueueLayout::new(size);
+        let pages = layout.size / PAGE_SIZE;
+        // alloc continuous pages
+        let (vaddr, paddr) = alloc_dma(pages)?;
+
+        header.queue_set(
+            idx as u32,
+            size as u32,
+            PAGE_SIZE as u32,
+            (paddr as u32) >> 12,
+        );
+
+        let desc = unsafe { slice::from_raw_parts_mut(vaddr as *mut Descriptor, size as usize) };
+        let avail = unsafe { &mut *((vaddr + layout.avail_offset) as *mut AvailRing) };
+        let used = unsafe { &mut *((vaddr + layout.used_offset) as *mut UsedRing) };
+
+        // link descriptors together
+        for i in 0..(size - 1) {
+            desc[i as usize].next.write(i + 1);
+        }
+
+        Ok(VirtQueue {
+            desc,
+            avail,
+            used,
+            pages,
+            queue_size: size,
+            queue_idx: idx as u32,
+            num_used: 0,
+            free_head: 0,
+            avail_idx: 0,
+            last_used_idx: 0,
+        })
+    }
+
+    /// Add buffers to the virtqueue.
+    ///
+    /// Ref: linux virtio_ring.c virtqueue_add
+    pub fn add(&mut self, inputs: &[&[u8]], outputs: &[&mut [u8]]) -> Result {
+        if inputs.is_empty() && outputs.is_empty() {
+            return Ok(());
+        }
+        if inputs.len() + outputs.len() + self.num_used as usize > self.queue_size as usize {
+            return Err(Error::BufferTooSmall);
+        }
+
+        // allocate descriptors from free list
+        let head = self.free_head;
+        let mut last = self.free_head;
+        for input in inputs.iter() {
+            let desc = &mut self.desc[self.free_head as usize];
+            desc.set_buf(input);
+            desc.flags.write(DescFlags::NEXT);
+            last = self.free_head;
+            self.free_head = desc.next.read();
+        }
+        for output in outputs.iter() {
+            let desc = &mut self.desc[self.free_head as usize];
+            desc.set_buf(output);
+            desc.flags.write(DescFlags::NEXT | DescFlags::WRITE);
+            last = self.free_head;
+            self.free_head = desc.next.read();
+        }
+        // set last_elem.next = NULL
+        {
+            let desc = &mut self.desc[last as usize];
+            let mut flags = desc.flags.read();
+            flags.remove(DescFlags::NEXT);
+            desc.flags.write(flags);
+        }
+        self.num_used += (inputs.len() + outputs.len()) as u16;
+
+        let avail_slot = self.avail_idx & (self.queue_size - 1);
+        self.avail.ring[avail_slot as usize].write(head);
+
+        // write barrier
+        fence(Ordering::SeqCst);
+
+        // increase head of avail ring
+        self.avail_idx = self.avail_idx.wrapping_add(1);
+        self.avail.idx.write(self.avail_idx);
+        Ok(())
+    }
+
+    ///
+    pub fn can_get(&self) -> bool {
+        self.last_used_idx != self.used.idx.read()
+    }
+
+    /// Recycle descriptors in the list specified by head.
+    ///
+    /// This will push all linked descriptors at the front of the free list.
+    fn recycle_descriptors(&mut self, mut head: u16) {
+        let origin_free_head = self.free_head;
+        self.free_head = head;
+        loop {
+            let desc = &mut self.desc[head as usize];
+            let flags = desc.flags.read();
+            self.num_used -= 1;
+            if flags.contains(DescFlags::NEXT) {
+                head = desc.next.read();
+            } else {
+                desc.next.write(origin_free_head);
+                return;
+            }
+        }
+    }
+
+    /// Get device used buffers.
+    ///
+    /// Ref: linux virtio_ring.c virtqueue_get_buf_ctx
+    pub fn get(&mut self) -> Result<usize> {
+        if !self.can_get() {
+            return Err(Error::NotReady);
+        }
+        // read barrier
+        fence(Ordering::SeqCst);
+
+        let last_used_slot = self.last_used_idx & (self.queue_size - 1);
+        let index = self.used.ring[last_used_slot as usize].id.read();
+        let len = self.used.ring[last_used_slot as usize].len.read();
+
+        self.recycle_descriptors(index as u16);
+        self.last_used_idx = self.last_used_idx.wrapping_add(1);
+
+        Ok(len as usize)
+    }
+}
+
+impl Drop for VirtQueue<'_> {
+    fn drop(&mut self) {
+        dealloc_dma(virt_to_phys(self.desc.as_ptr() as _), self.pages).unwrap()
+    }
+}
+
+/// The inner layout of a VirtQueue.
+///
+/// Ref: 2.6.2 Legacy Interfaces: A Note on Virtqueue Layout
+struct VirtQueueLayout {
+    avail_offset: usize,
+    used_offset: usize,
+    size: usize,
+}
+
+impl VirtQueueLayout {
+    fn new(queue_size: u16) -> Self {
+        assert!(
+            queue_size.is_power_of_two(),
+            "queue size should be a power of 2"
+        );
+        let queue_size = queue_size as usize;
+        let desc = size_of::<Descriptor>() * queue_size;
+        let avail = size_of::<u16>() * (3 + queue_size);
+        let used = size_of::<u16>() * 3 + size_of::<UsedElem>() * queue_size;
+        VirtQueueLayout {
+            avail_offset: desc,
+            used_offset: align_up(desc + avail),
+            size: align_up(desc + avail) + align_up(used),
+        }
+    }
+}
+
+#[repr(C, align(16))]
+#[derive(Debug)]
+struct Descriptor {
+    addr: Volatile<u64>,
+    len: Volatile<u32>,
+    flags: Volatile<DescFlags>,
+    next: Volatile<u16>,
+}
+
+impl Descriptor {
+    fn set_buf(&mut self, buf: &[u8]) {
+        self.addr.write(virt_to_phys(buf.as_ptr() as usize) as u64);
+        self.len.write(buf.len() as u32);
+    }
+}
+
+bitflags! {
+    /// Descriptor flags
+    struct DescFlags: u16 {
+        const NEXT = 1;
+        const WRITE = 2;
+        const INDIRECT = 4;
+    }
+}
+
+/// The driver uses the available ring to offer buffers to the device:
+/// each ring entry refers to the head of a descriptor chain.
+/// It is only written by the driver and read by the device.
+#[repr(C)]
+#[derive(Debug)]
+struct AvailRing {
+    flags: Volatile<u16>,
+    /// A driver MUST NOT decrement the idx.
+    idx: Volatile<u16>,
+    ring: [Volatile<u16>; 32], // actual size: queue_size
+    used_event: Volatile<u16>, // unused
+}
+
+/// The used ring is where the device returns buffers once it is done with them:
+/// it is only written to by the device, and read by the driver.
+#[repr(C)]
+#[derive(Debug)]
+struct UsedRing {
+    flags: Volatile<u16>,
+    idx: Volatile<u16>,
+    ring: [UsedElem; 32],       // actual size: queue_size
+    avail_event: Volatile<u16>, // unused
+}
+
+#[repr(C)]
+#[derive(Debug)]
+struct UsedElem {
+    id: Volatile<u32>,
+    len: Volatile<u32>,
+}
+
+/// Align `size` up to a page.
+fn align_up(size: usize) -> usize {
+    (size + PAGE_SIZE) & !(PAGE_SIZE - 1)
+}