4
0

perf_buffer.rs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. use std::{
  2. ffi::c_void,
  3. io, mem,
  4. os::unix::io::{AsRawFd, RawFd},
  5. ptr, slice,
  6. sync::atomic::{self, AtomicPtr, Ordering},
  7. };
  8. use bytes::BytesMut;
  9. use libc::{c_int, close, munmap, MAP_FAILED, MAP_SHARED, PROT_READ, PROT_WRITE};
  10. use thiserror::Error;
  11. use crate::{
  12. generated::{
  13. perf_event_header, perf_event_mmap_page,
  14. perf_event_type::{PERF_RECORD_LOST, PERF_RECORD_SAMPLE},
  15. },
  16. sys::{perf_event_ioctl, perf_event_open_bpf},
  17. PERF_EVENT_IOC_DISABLE, PERF_EVENT_IOC_ENABLE,
  18. };
  19. /// Perf buffer error.
  20. #[derive(Error, Debug)]
  21. pub enum PerfBufferError {
  22. /// the page count value passed to [`PerfEventArray::open`](crate::maps::PerfEventArray::open) is invalid.
  23. #[error("invalid page count {page_count}, the value must be a power of two")]
  24. InvalidPageCount {
  25. /// the page count
  26. page_count: usize,
  27. },
  28. /// `perf_event_open` failed.
  29. #[error("perf_event_open failed: {io_error}")]
  30. OpenError {
  31. /// the source of this error
  32. #[source]
  33. io_error: io::Error,
  34. },
  35. /// `mmap`-ping the buffer failed.
  36. #[error("mmap failed: {io_error}")]
  37. MMapError {
  38. /// the source of this error
  39. #[source]
  40. io_error: io::Error,
  41. },
  42. /// The `PERF_EVENT_IOC_ENABLE` ioctl failed
  43. #[error("PERF_EVENT_IOC_ENABLE failed: {io_error}")]
  44. PerfEventEnableError {
  45. #[source]
  46. /// the source of this error
  47. io_error: io::Error,
  48. },
  49. /// `read_events()` was called with no output buffers.
  50. #[error("read_events() was called with no output buffers")]
  51. NoBuffers,
  52. /// `read_events()` was called with a buffer that is not large enough to
  53. /// contain the next event in the perf buffer.
  54. #[deprecated(
  55. since = "0.10.8",
  56. note = "read_events() now calls BytesMut::reserve() internally, so this error is never returned"
  57. )]
  58. #[error("the buffer needs to be of at least {size} bytes")]
  59. MoreSpaceNeeded {
  60. /// expected size
  61. size: usize,
  62. },
  63. /// An IO error occurred.
  64. #[error(transparent)]
  65. IOError(#[from] io::Error),
  66. }
  67. /// Return type of `read_events()`.
  68. #[derive(Debug, PartialEq, Eq)]
  69. pub struct Events {
  70. /// The number of events read.
  71. pub read: usize,
  72. /// The number of events lost.
  73. pub lost: usize,
  74. }
  75. pub(crate) struct PerfBuffer {
  76. buf: AtomicPtr<perf_event_mmap_page>,
  77. size: usize,
  78. page_size: usize,
  79. fd: RawFd,
  80. }
  81. impl PerfBuffer {
  82. pub(crate) fn open(
  83. cpu_id: u32,
  84. page_size: usize,
  85. page_count: usize,
  86. ) -> Result<PerfBuffer, PerfBufferError> {
  87. if !page_count.is_power_of_two() {
  88. return Err(PerfBufferError::InvalidPageCount { page_count });
  89. }
  90. let fd = perf_event_open_bpf(cpu_id as i32)
  91. .map_err(|(_, io_error)| PerfBufferError::OpenError { io_error })?
  92. as RawFd;
  93. let size = page_size * page_count;
  94. let buf = unsafe {
  95. mmap(
  96. ptr::null_mut(),
  97. size + page_size,
  98. PROT_READ | PROT_WRITE,
  99. MAP_SHARED,
  100. fd,
  101. 0,
  102. )
  103. };
  104. if buf == MAP_FAILED {
  105. return Err(PerfBufferError::MMapError {
  106. io_error: io::Error::last_os_error(),
  107. });
  108. }
  109. let perf_buf = PerfBuffer {
  110. buf: AtomicPtr::new(buf as *mut perf_event_mmap_page),
  111. fd,
  112. size,
  113. page_size,
  114. };
  115. perf_event_ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)
  116. .map_err(|(_, io_error)| PerfBufferError::PerfEventEnableError { io_error })?;
  117. Ok(perf_buf)
  118. }
  119. pub(crate) fn readable(&self) -> bool {
  120. let header = self.buf.load(Ordering::SeqCst);
  121. let head = unsafe { (*header).data_head } as usize;
  122. let tail = unsafe { (*header).data_tail } as usize;
  123. head != tail
  124. }
  125. pub(crate) fn read_events(
  126. &mut self,
  127. buffers: &mut [BytesMut],
  128. ) -> Result<Events, PerfBufferError> {
  129. if buffers.is_empty() {
  130. return Err(PerfBufferError::NoBuffers);
  131. }
  132. let header = self.buf.load(Ordering::SeqCst);
  133. let base = header as usize + self.page_size;
  134. let mut events = Events { read: 0, lost: 0 };
  135. let mut buf_n = 0;
  136. let fill_buf = |start_off, base, mmap_size, out_buf: &mut [u8]| {
  137. let len = out_buf.len();
  138. let end = (start_off + len) % mmap_size;
  139. let start = start_off % mmap_size;
  140. if start < end {
  141. out_buf.copy_from_slice(unsafe {
  142. slice::from_raw_parts((base + start) as *const u8, len)
  143. });
  144. } else {
  145. let size = mmap_size - start;
  146. unsafe {
  147. out_buf[..size]
  148. .copy_from_slice(slice::from_raw_parts((base + start) as *const u8, size));
  149. out_buf[size..]
  150. .copy_from_slice(slice::from_raw_parts(base as *const u8, len - size));
  151. }
  152. }
  153. };
  154. let read_event = |event_start, event_type, base, buf: &mut BytesMut| {
  155. let sample_size = match event_type {
  156. x if x == PERF_RECORD_SAMPLE as u32 || x == PERF_RECORD_LOST as u32 => {
  157. let mut size = [0u8; mem::size_of::<u32>()];
  158. fill_buf(
  159. event_start + mem::size_of::<perf_event_header>(),
  160. base,
  161. self.size,
  162. &mut size,
  163. );
  164. u32::from_ne_bytes(size)
  165. }
  166. _ => return Ok(None),
  167. } as usize;
  168. let sample_start =
  169. (event_start + mem::size_of::<perf_event_header>() + mem::size_of::<u32>())
  170. % self.size;
  171. match event_type {
  172. x if x == PERF_RECORD_SAMPLE as u32 => {
  173. buf.clear();
  174. buf.reserve(sample_size);
  175. unsafe { buf.set_len(sample_size) };
  176. fill_buf(sample_start, base, self.size, buf);
  177. Ok(Some((1, 0)))
  178. }
  179. x if x == PERF_RECORD_LOST as u32 => {
  180. let mut count = [0u8; mem::size_of::<u64>()];
  181. fill_buf(
  182. event_start + mem::size_of::<perf_event_header>() + mem::size_of::<u64>(),
  183. base,
  184. self.size,
  185. &mut count,
  186. );
  187. Ok(Some((0, u64::from_ne_bytes(count) as usize)))
  188. }
  189. _ => Ok(None),
  190. }
  191. };
  192. let head = unsafe { (*header).data_head } as usize;
  193. let mut tail = unsafe { (*header).data_tail } as usize;
  194. while head != tail {
  195. if buf_n == buffers.len() {
  196. break;
  197. }
  198. let buf = &mut buffers[buf_n];
  199. let event_start = tail % self.size;
  200. let event =
  201. unsafe { ptr::read_unaligned((base + event_start) as *const perf_event_header) };
  202. let event_size = event.size as usize;
  203. match read_event(event_start, event.type_, base, buf) {
  204. Ok(Some((read, lost))) => {
  205. if read > 0 {
  206. buf_n += 1;
  207. events.read += read;
  208. }
  209. events.lost += lost;
  210. }
  211. Ok(None) => { /* skip unknown event type */ }
  212. Err(e) => {
  213. // we got an error and we didn't process any events, propagate the error
  214. // and give the caller a chance to increase buffers
  215. atomic::fence(Ordering::SeqCst);
  216. unsafe { (*header).data_tail = tail as u64 };
  217. return Err(e);
  218. }
  219. }
  220. tail += event_size;
  221. }
  222. atomic::fence(Ordering::SeqCst);
  223. unsafe { (*header).data_tail = tail as u64 };
  224. Ok(events)
  225. }
  226. }
  227. impl AsRawFd for PerfBuffer {
  228. fn as_raw_fd(&self) -> RawFd {
  229. self.fd
  230. }
  231. }
  232. impl Drop for PerfBuffer {
  233. fn drop(&mut self) {
  234. unsafe {
  235. let _ = perf_event_ioctl(self.fd, PERF_EVENT_IOC_DISABLE, 0);
  236. munmap(
  237. self.buf.load(Ordering::SeqCst) as *mut c_void,
  238. self.size + self.page_size,
  239. );
  240. close(self.fd);
  241. }
  242. }
  243. }
  244. #[cfg_attr(test, allow(unused_variables))]
  245. unsafe fn mmap(
  246. addr: *mut c_void,
  247. len: usize,
  248. prot: c_int,
  249. flags: c_int,
  250. fd: i32,
  251. offset: libc::off_t,
  252. ) -> *mut c_void {
  253. #[cfg(not(test))]
  254. return libc::mmap(addr, len, prot, flags, fd, offset);
  255. #[cfg(test)]
  256. use crate::sys::TEST_MMAP_RET;
  257. #[cfg(test)]
  258. TEST_MMAP_RET.with(|ret| *ret.borrow())
  259. }
  260. #[derive(Debug)]
  261. #[repr(C)]
  262. struct Sample {
  263. header: perf_event_header,
  264. pub size: u32,
  265. }
  266. #[repr(C)]
  267. #[derive(Debug)]
  268. struct LostSamples {
  269. header: perf_event_header,
  270. pub id: u64,
  271. pub count: u64,
  272. }
  273. #[cfg(test)]
  274. mod tests {
  275. use super::*;
  276. use crate::{
  277. generated::perf_event_mmap_page,
  278. sys::{override_syscall, Syscall, TEST_MMAP_RET},
  279. };
  280. use std::{convert::TryInto, fmt::Debug, mem};
  281. const PAGE_SIZE: usize = 4096;
  282. union MMappedBuf {
  283. mmap_page: perf_event_mmap_page,
  284. data: [u8; PAGE_SIZE * 2],
  285. }
  286. fn fake_mmap(buf: &mut MMappedBuf) {
  287. override_syscall(|call| match call {
  288. Syscall::PerfEventOpen { .. } | Syscall::PerfEventIoctl { .. } => Ok(42),
  289. _ => panic!(),
  290. });
  291. TEST_MMAP_RET.with(|ret| *ret.borrow_mut() = buf as *const _ as *mut _);
  292. }
  293. #[test]
  294. fn test_invalid_page_count() {
  295. assert!(matches!(
  296. PerfBuffer::open(1, PAGE_SIZE, 0),
  297. Err(PerfBufferError::InvalidPageCount { .. })
  298. ));
  299. assert!(matches!(
  300. PerfBuffer::open(1, PAGE_SIZE, 3),
  301. Err(PerfBufferError::InvalidPageCount { .. })
  302. ));
  303. assert!(matches!(
  304. PerfBuffer::open(1, PAGE_SIZE, 5),
  305. Err(PerfBufferError::InvalidPageCount { .. })
  306. ));
  307. }
  308. #[test]
  309. #[cfg_attr(miri, ignore)]
  310. fn test_no_out_bufs() {
  311. let mut mmapped_buf = MMappedBuf {
  312. data: [0; PAGE_SIZE * 2],
  313. };
  314. fake_mmap(&mut mmapped_buf);
  315. let mut buf = PerfBuffer::open(1, PAGE_SIZE, 1).unwrap();
  316. assert!(matches!(
  317. buf.read_events(&mut []),
  318. Err(PerfBufferError::NoBuffers)
  319. ))
  320. }
  321. #[test]
  322. #[cfg_attr(miri, ignore)]
  323. fn test_no_events() {
  324. let mut mmapped_buf = MMappedBuf {
  325. data: [0; PAGE_SIZE * 2],
  326. };
  327. fake_mmap(&mut mmapped_buf);
  328. let mut buf = PerfBuffer::open(1, PAGE_SIZE, 1).unwrap();
  329. let out_buf = BytesMut::with_capacity(4);
  330. assert_eq!(
  331. buf.read_events(&mut [out_buf]).unwrap(),
  332. Events { read: 0, lost: 0 }
  333. );
  334. }
  335. #[test]
  336. #[cfg_attr(miri, ignore)]
  337. fn test_read_first_lost() {
  338. let mut mmapped_buf = MMappedBuf {
  339. data: [0; PAGE_SIZE * 2],
  340. };
  341. fake_mmap(&mut mmapped_buf);
  342. let evt = LostSamples {
  343. header: perf_event_header {
  344. type_: PERF_RECORD_LOST as u32,
  345. misc: 0,
  346. size: mem::size_of::<LostSamples>() as u16,
  347. },
  348. id: 1,
  349. count: 0xCAFEBABE,
  350. };
  351. write(&mut mmapped_buf, 0, evt);
  352. let mut buf = PerfBuffer::open(1, PAGE_SIZE, 1).unwrap();
  353. let out_buf = BytesMut::with_capacity(0);
  354. let events = buf.read_events(&mut [out_buf]).unwrap();
  355. assert_eq!(events.lost, 0xCAFEBABE);
  356. }
  357. #[repr(C)]
  358. #[derive(Debug)]
  359. struct PerfSample<T: Debug> {
  360. s_hdr: Sample,
  361. value: T,
  362. }
  363. fn write<T: Debug>(mmapped_buf: &mut MMappedBuf, offset: usize, value: T) -> usize {
  364. let dst = (mmapped_buf as *const _ as usize + PAGE_SIZE + offset) as *const PerfSample<T>
  365. as *mut T;
  366. unsafe {
  367. ptr::write_unaligned(dst, value);
  368. mmapped_buf.mmap_page.data_head = (offset + mem::size_of::<T>()) as u64;
  369. mmapped_buf.mmap_page.data_head as usize
  370. }
  371. }
  372. fn write_sample<T: Debug>(mmapped_buf: &mut MMappedBuf, offset: usize, value: T) -> usize {
  373. let sample = PerfSample {
  374. s_hdr: Sample {
  375. header: perf_event_header {
  376. type_: PERF_RECORD_SAMPLE as u32,
  377. misc: 0,
  378. size: mem::size_of::<PerfSample<T>>() as u16,
  379. },
  380. size: mem::size_of::<T>() as u32,
  381. },
  382. value,
  383. };
  384. write(mmapped_buf, offset, sample)
  385. }
  386. fn u32_from_buf(buf: &[u8]) -> u32 {
  387. u32::from_ne_bytes(buf[..4].try_into().unwrap())
  388. }
  389. fn u64_from_buf(buf: &[u8]) -> u64 {
  390. u64::from_ne_bytes(buf[..8].try_into().unwrap())
  391. }
  392. #[test]
  393. #[cfg_attr(miri, ignore)]
  394. fn test_read_first_sample() {
  395. let mut mmapped_buf = MMappedBuf {
  396. data: [0; PAGE_SIZE * 2],
  397. };
  398. fake_mmap(&mut mmapped_buf);
  399. let mut buf = PerfBuffer::open(1, PAGE_SIZE, 1).unwrap();
  400. write_sample(&mut mmapped_buf, 0, 0xCAFEBABEu32);
  401. let mut out_bufs = [BytesMut::with_capacity(4)];
  402. let events = buf.read_events(&mut out_bufs).unwrap();
  403. assert_eq!(events, Events { lost: 0, read: 1 });
  404. assert_eq!(u32_from_buf(&out_bufs[0]), 0xCAFEBABE);
  405. }
  406. #[test]
  407. #[cfg_attr(miri, ignore)]
  408. fn test_read_many_with_many_reads() {
  409. let mut mmapped_buf = MMappedBuf {
  410. data: [0; PAGE_SIZE * 2],
  411. };
  412. fake_mmap(&mut mmapped_buf);
  413. let mut buf = PerfBuffer::open(1, PAGE_SIZE, 1).unwrap();
  414. let next = write_sample(&mut mmapped_buf, 0, 0xCAFEBABEu32);
  415. write_sample(&mut mmapped_buf, next, 0xBADCAFEu32);
  416. let mut out_bufs = [BytesMut::with_capacity(4)];
  417. let events = buf.read_events(&mut out_bufs).unwrap();
  418. assert_eq!(events, Events { lost: 0, read: 1 });
  419. assert_eq!(u32_from_buf(&out_bufs[0]), 0xCAFEBABE);
  420. let events = buf.read_events(&mut out_bufs).unwrap();
  421. assert_eq!(events, Events { lost: 0, read: 1 });
  422. assert_eq!(u32_from_buf(&out_bufs[0]), 0xBADCAFE);
  423. }
  424. #[test]
  425. #[cfg_attr(miri, ignore)]
  426. fn test_read_many_with_one_read() {
  427. let mut mmapped_buf = MMappedBuf {
  428. data: [0; PAGE_SIZE * 2],
  429. };
  430. fake_mmap(&mut mmapped_buf);
  431. let mut buf = PerfBuffer::open(1, PAGE_SIZE, 1).unwrap();
  432. let next = write_sample(&mut mmapped_buf, 0, 0xCAFEBABEu32);
  433. write_sample(&mut mmapped_buf, next, 0xBADCAFEu32);
  434. let mut out_bufs = (0..3)
  435. .map(|_| BytesMut::with_capacity(4))
  436. .collect::<Vec<_>>();
  437. let events = buf.read_events(&mut out_bufs).unwrap();
  438. assert_eq!(events, Events { lost: 0, read: 2 });
  439. assert_eq!(u32_from_buf(&out_bufs[0]), 0xCAFEBABE);
  440. assert_eq!(u32_from_buf(&out_bufs[1]), 0xBADCAFE);
  441. }
  442. #[test]
  443. #[cfg_attr(miri, ignore)]
  444. fn test_read_last_sample() {
  445. let mut mmapped_buf = MMappedBuf {
  446. data: [0; PAGE_SIZE * 2],
  447. };
  448. fake_mmap(&mut mmapped_buf);
  449. let mut buf = PerfBuffer::open(1, PAGE_SIZE, 1).unwrap();
  450. let offset = PAGE_SIZE - mem::size_of::<PerfSample<u32>>();
  451. mmapped_buf.mmap_page.data_tail = offset as u64;
  452. write_sample(&mut mmapped_buf, offset, 0xCAFEBABEu32);
  453. let mut out_bufs = [BytesMut::with_capacity(4)];
  454. let events = buf.read_events(&mut out_bufs).unwrap();
  455. assert_eq!(events, Events { lost: 0, read: 1 });
  456. assert_eq!(u32_from_buf(&out_bufs[0]), 0xCAFEBABE);
  457. }
  458. #[test]
  459. #[cfg_attr(miri, ignore)]
  460. fn test_read_wrapping_sample_size() {
  461. let mut mmapped_buf = MMappedBuf {
  462. data: [0; PAGE_SIZE * 2],
  463. };
  464. fake_mmap(&mut mmapped_buf);
  465. let mut buf = PerfBuffer::open(1, PAGE_SIZE, 1).unwrap();
  466. let header = perf_event_header {
  467. type_: PERF_RECORD_SAMPLE as u32,
  468. misc: 0,
  469. size: mem::size_of::<PerfSample<u64>>() as u16,
  470. };
  471. let offset = PAGE_SIZE - mem::size_of::<perf_event_header>() - 2;
  472. mmapped_buf.mmap_page.data_tail = offset as u64;
  473. write(&mut mmapped_buf, offset, header);
  474. write(&mut mmapped_buf, PAGE_SIZE - 2, 0x0004u16);
  475. write(&mut mmapped_buf, 0, 0x0000u16);
  476. write(&mut mmapped_buf, 2, 0xBAADCAFEu32);
  477. let mut out_bufs = [BytesMut::with_capacity(8)];
  478. let events = buf.read_events(&mut out_bufs).unwrap();
  479. assert_eq!(events, Events { lost: 0, read: 1 });
  480. assert_eq!(u32_from_buf(&out_bufs[0]), 0xBAADCAFE);
  481. }
  482. #[test]
  483. #[cfg_attr(miri, ignore)]
  484. fn test_read_wrapping_value() {
  485. let mut mmapped_buf = MMappedBuf {
  486. data: [0; PAGE_SIZE * 2],
  487. };
  488. fake_mmap(&mut mmapped_buf);
  489. let mut buf = PerfBuffer::open(1, PAGE_SIZE, 1).unwrap();
  490. let sample = PerfSample {
  491. s_hdr: Sample {
  492. header: perf_event_header {
  493. type_: PERF_RECORD_SAMPLE as u32,
  494. misc: 0,
  495. size: mem::size_of::<PerfSample<u64>>() as u16,
  496. },
  497. size: mem::size_of::<u64>() as u32,
  498. },
  499. value: 0xCAFEBABEu32,
  500. };
  501. let offset = PAGE_SIZE - mem::size_of::<PerfSample<u32>>();
  502. mmapped_buf.mmap_page.data_tail = offset as u64;
  503. write(&mut mmapped_buf, offset, sample);
  504. write(&mut mmapped_buf, 0, 0xBAADCAFEu32);
  505. let mut out_bufs = [BytesMut::with_capacity(8)];
  506. let events = buf.read_events(&mut out_bufs).unwrap();
  507. assert_eq!(events, Events { lost: 0, read: 1 });
  508. assert_eq!(u64_from_buf(&out_bufs[0]), 0xBAADCAFECAFEBABE);
  509. }
  510. }