asm_parser.rs 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. // SPDX-License-Identifier: (Apache-2.0 OR MIT)
  2. // Copyright 2017 Rich Lane <lanerl@gmail.com>
  3. // Rust-doc comments were left in the module, but it is no longer publicly exposed from the root
  4. // file of the crate. Do not expect to find those comments in the documentation of the crate.
  5. //! This module parses eBPF assembly language source code.
  6. use alloc::{
  7. string::{String, ToString},
  8. vec::Vec,
  9. };
  10. #[cfg(feature = "std")]
  11. use combine::EasyParser;
  12. use combine::{
  13. attempt, between, eof, many, many1, one_of, optional,
  14. parser::char::{alpha_num, char, digit, hex_digit, spaces, string},
  15. sep_by,
  16. stream::position::{self},
  17. ParseError, Parser, Stream,
  18. };
  19. /// Operand of an instruction.
  20. #[derive(Clone, Copy, Debug, PartialEq, Eq)]
  21. pub enum Operand {
  22. /// Register number.
  23. Register(i64),
  24. /// Jump offset or immediate.
  25. Integer(i64),
  26. /// Register number and offset.
  27. Memory(i64, i64),
  28. /// Used for pattern matching.
  29. Nil,
  30. }
  31. /// Parsed instruction.
  32. #[derive(Debug, PartialEq, Eq)]
  33. pub struct Instruction {
  34. /// Instruction name.
  35. pub name: String,
  36. /// Operands.
  37. pub operands: Vec<Operand>,
  38. }
  39. fn ident<I>() -> impl Parser<I, Output = String>
  40. where
  41. I: Stream<Token = char>,
  42. I::Error: ParseError<I::Token, I::Range, I::Position>,
  43. {
  44. many1(alpha_num())
  45. }
  46. fn integer<I>() -> impl Parser<I, Output = i64>
  47. where
  48. I: Stream<Token = char>,
  49. I::Error: ParseError<I::Token, I::Range, I::Position>,
  50. {
  51. let sign = optional(one_of("-+".chars())).map(|x| match x {
  52. Some('-') => -1,
  53. _ => 1,
  54. });
  55. let hex = string("0x")
  56. .with(many1(hex_digit()))
  57. .map(|x: String| u64::from_str_radix(&x, 16).unwrap() as i64);
  58. let dec = many1(digit()).map(|x: String| x.parse::<i64>().unwrap());
  59. (sign, attempt(hex).or(dec)).map(|(s, x)| s * x)
  60. }
  61. fn register<I>() -> impl Parser<I, Output = i64>
  62. where
  63. I: Stream<Token = char>,
  64. I::Error: ParseError<I::Token, I::Range, I::Position>,
  65. {
  66. char('r')
  67. .with(many1(digit()))
  68. .map(|x: String| x.parse::<i64>().unwrap())
  69. }
  70. fn operand<I>() -> impl Parser<I, Output = Operand>
  71. where
  72. I: Stream<Token = char>,
  73. I::Error: ParseError<I::Token, I::Range, I::Position>,
  74. {
  75. let register_operand = register().map(Operand::Register);
  76. let immediate = integer().map(Operand::Integer);
  77. let memory = between(char('['), char(']'), (register(), optional(integer())))
  78. .map(|t| Operand::Memory(t.0, t.1.unwrap_or(0)));
  79. register_operand.or(immediate).or(memory)
  80. }
  81. fn instruction<I>() -> impl Parser<I, Output = Instruction>
  82. where
  83. I: Stream<Token = char>,
  84. I::Error: ParseError<I::Token, I::Range, I::Position>,
  85. {
  86. let operands = sep_by(operand(), char(',').skip(spaces()));
  87. (ident().skip(spaces()), operands, spaces()).map(|t| Instruction {
  88. name: t.0,
  89. operands: t.1,
  90. })
  91. }
  92. /// Parse a string into a list of instructions.
  93. ///
  94. /// The instructions are not validated and may have invalid names and operand types.
  95. pub fn parse(input: &str) -> Result<Vec<Instruction>, String> {
  96. let mut with = spaces().with(many(instruction()).skip(eof()));
  97. #[cfg(feature = "std")]
  98. {
  99. match with.easy_parse(position::Stream::new(input)) {
  100. Ok((insts, _)) => Ok(insts),
  101. Err(err) => Err(err.to_string()),
  102. }
  103. }
  104. #[cfg(not(feature = "std"))]
  105. {
  106. match with.parse(position::Stream::new(input)) {
  107. Ok((insts, _)) => Ok(insts),
  108. Err(err) => Err(err.to_string()),
  109. }
  110. }
  111. }
  112. #[cfg(test)]
  113. mod tests {
  114. use alloc::{string::ToString, vec};
  115. use combine::Parser;
  116. use super::{ident, instruction, integer, operand, parse, register, Instruction, Operand};
  117. // Unit tests for the different kinds of parsers.
  118. #[test]
  119. fn test_ident() {
  120. assert_eq!(ident().parse("nop"), Ok(("nop".to_string(), "")));
  121. assert_eq!(ident().parse("add32"), Ok(("add32".to_string(), "")));
  122. assert_eq!(ident().parse("add32*"), Ok(("add32".to_string(), "*")));
  123. }
  124. #[test]
  125. fn test_integer() {
  126. assert_eq!(integer().parse("0"), Ok((0, "")));
  127. assert_eq!(integer().parse("42"), Ok((42, "")));
  128. assert_eq!(integer().parse("+42"), Ok((42, "")));
  129. assert_eq!(integer().parse("-42"), Ok((-42, "")));
  130. assert_eq!(integer().parse("0x0"), Ok((0, "")));
  131. assert_eq!(
  132. integer().parse("0x123456789abcdef0"),
  133. Ok((0x123456789abcdef0, ""))
  134. );
  135. assert_eq!(integer().parse("-0x1f"), Ok((-31, "")));
  136. }
  137. #[test]
  138. fn test_register() {
  139. assert_eq!(register().parse("r0"), Ok((0, "")));
  140. assert_eq!(register().parse("r15"), Ok((15, "")));
  141. }
  142. #[test]
  143. fn test_operand() {
  144. assert_eq!(operand().parse("r0"), Ok((Operand::Register(0), "")));
  145. assert_eq!(operand().parse("r15"), Ok((Operand::Register(15), "")));
  146. assert_eq!(operand().parse("0"), Ok((Operand::Integer(0), "")));
  147. assert_eq!(operand().parse("42"), Ok((Operand::Integer(42), "")));
  148. assert_eq!(operand().parse("[r1]"), Ok((Operand::Memory(1, 0), "")));
  149. assert_eq!(operand().parse("[r3+5]"), Ok((Operand::Memory(3, 5), "")));
  150. assert_eq!(
  151. operand().parse("[r3+0x1f]"),
  152. Ok((Operand::Memory(3, 31), ""))
  153. );
  154. assert_eq!(
  155. operand().parse("[r3-0x1f]"),
  156. Ok((Operand::Memory(3, -31), ""))
  157. );
  158. }
  159. #[test]
  160. fn test_instruction() {
  161. assert_eq!(
  162. instruction().parse("exit"),
  163. Ok((
  164. Instruction {
  165. name: "exit".to_string(),
  166. operands: vec![],
  167. },
  168. ""
  169. ))
  170. );
  171. assert_eq!(
  172. instruction().parse("call 2"),
  173. Ok((
  174. Instruction {
  175. name: "call".to_string(),
  176. operands: vec![Operand::Integer(2)],
  177. },
  178. ""
  179. ))
  180. );
  181. assert_eq!(
  182. instruction().parse("addi r1, 2"),
  183. Ok((
  184. Instruction {
  185. name: "addi".to_string(),
  186. operands: vec![Operand::Register(1), Operand::Integer(2)],
  187. },
  188. ""
  189. ))
  190. );
  191. assert_eq!(
  192. instruction().parse("ldxb r2, [r1+12]"),
  193. Ok((
  194. Instruction {
  195. name: "ldxb".to_string(),
  196. operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
  197. },
  198. ""
  199. ))
  200. );
  201. assert_eq!(
  202. instruction().parse("lsh r3, 0x8"),
  203. Ok((
  204. Instruction {
  205. name: "lsh".to_string(),
  206. operands: vec![Operand::Register(3), Operand::Integer(8)],
  207. },
  208. ""
  209. ))
  210. );
  211. assert_eq!(
  212. instruction().parse("jne r3, 0x8, +37"),
  213. Ok((
  214. Instruction {
  215. name: "jne".to_string(),
  216. operands: vec![
  217. Operand::Register(3),
  218. Operand::Integer(8),
  219. Operand::Integer(37)
  220. ],
  221. },
  222. ""
  223. ))
  224. );
  225. // Whitespace between operands is optional.
  226. assert_eq!(
  227. instruction().parse("jne r3,0x8,+37"),
  228. Ok((
  229. Instruction {
  230. name: "jne".to_string(),
  231. operands: vec![
  232. Operand::Register(3),
  233. Operand::Integer(8),
  234. Operand::Integer(37)
  235. ],
  236. },
  237. ""
  238. ))
  239. );
  240. }
  241. // Other unit tests: try to parse various set of instructions.
  242. #[test]
  243. fn test_empty() {
  244. assert_eq!(parse(""), Ok(vec![]));
  245. }
  246. #[test]
  247. fn test_exit() {
  248. // No operands.
  249. assert_eq!(
  250. parse("exit"),
  251. Ok(vec![Instruction {
  252. name: "exit".to_string(),
  253. operands: vec![],
  254. }])
  255. );
  256. }
  257. #[test]
  258. fn test_lsh() {
  259. // Register and immediate operands.
  260. assert_eq!(
  261. parse("lsh r3, 0x20"),
  262. Ok(vec![Instruction {
  263. name: "lsh".to_string(),
  264. operands: vec![Operand::Register(3), Operand::Integer(0x20)],
  265. }])
  266. );
  267. }
  268. #[test]
  269. fn test_ja() {
  270. // Jump offset operand.
  271. assert_eq!(
  272. parse("ja +1"),
  273. Ok(vec![Instruction {
  274. name: "ja".to_string(),
  275. operands: vec![Operand::Integer(1)],
  276. }])
  277. );
  278. }
  279. #[test]
  280. fn test_ldxh() {
  281. // Register and memory operands.
  282. assert_eq!(
  283. parse("ldxh r4, [r1+12]"),
  284. Ok(vec![Instruction {
  285. name: "ldxh".to_string(),
  286. operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
  287. }])
  288. );
  289. }
  290. #[test]
  291. fn test_tcp_sack() {
  292. // Sample program from ubpf.
  293. // We could technically indent the instructions since the parser support white spaces at
  294. // the beginning, but there is another test for that.
  295. let src = "\
  296. ldxb r2, [r1+12]
  297. ldxb r3, [r1+13]
  298. lsh r3, 0x8
  299. or r3, r2
  300. mov r0, 0x0
  301. jne r3, 0x8, +37
  302. ldxb r2, [r1+23]
  303. jne r2, 0x6, +35
  304. ldxb r2, [r1+14]
  305. add r1, 0xe
  306. and r2, 0xf
  307. lsh r2, 0x2
  308. add r1, r2
  309. mov r0, 0x0
  310. ldxh r4, [r1+12]
  311. add r1, 0x14
  312. rsh r4, 0x2
  313. and r4, 0x3c
  314. mov r2, r4
  315. add r2, 0xffffffec
  316. mov r5, 0x15
  317. mov r3, 0x0
  318. jgt r5, r4, +20
  319. mov r5, r3
  320. lsh r5, 0x20
  321. arsh r5, 0x20
  322. mov r4, r1
  323. add r4, r5
  324. ldxb r5, [r4]
  325. jeq r5, 0x1, +4
  326. jeq r5, 0x0, +12
  327. mov r6, r3
  328. jeq r5, 0x5, +9
  329. ja +2
  330. add r3, 0x1
  331. mov r6, r3
  332. ldxb r3, [r4+1]
  333. add r3, r6
  334. lsh r3, 0x20
  335. arsh r3, 0x20
  336. jsgt r2, r3, -18
  337. ja +1
  338. mov r0, 0x1
  339. exit
  340. ";
  341. assert_eq!(
  342. parse(src),
  343. Ok(vec![
  344. Instruction {
  345. name: "ldxb".to_string(),
  346. operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
  347. },
  348. Instruction {
  349. name: "ldxb".to_string(),
  350. operands: vec![Operand::Register(3), Operand::Memory(1, 13)],
  351. },
  352. Instruction {
  353. name: "lsh".to_string(),
  354. operands: vec![Operand::Register(3), Operand::Integer(8)],
  355. },
  356. Instruction {
  357. name: "or".to_string(),
  358. operands: vec![Operand::Register(3), Operand::Register(2)],
  359. },
  360. Instruction {
  361. name: "mov".to_string(),
  362. operands: vec![Operand::Register(0), Operand::Integer(0)],
  363. },
  364. Instruction {
  365. name: "jne".to_string(),
  366. operands: vec![
  367. Operand::Register(3),
  368. Operand::Integer(8),
  369. Operand::Integer(37)
  370. ],
  371. },
  372. Instruction {
  373. name: "ldxb".to_string(),
  374. operands: vec![Operand::Register(2), Operand::Memory(1, 23)],
  375. },
  376. Instruction {
  377. name: "jne".to_string(),
  378. operands: vec![
  379. Operand::Register(2),
  380. Operand::Integer(6),
  381. Operand::Integer(35)
  382. ],
  383. },
  384. Instruction {
  385. name: "ldxb".to_string(),
  386. operands: vec![Operand::Register(2), Operand::Memory(1, 14)],
  387. },
  388. Instruction {
  389. name: "add".to_string(),
  390. operands: vec![Operand::Register(1), Operand::Integer(14)],
  391. },
  392. Instruction {
  393. name: "and".to_string(),
  394. operands: vec![Operand::Register(2), Operand::Integer(15)],
  395. },
  396. Instruction {
  397. name: "lsh".to_string(),
  398. operands: vec![Operand::Register(2), Operand::Integer(2)],
  399. },
  400. Instruction {
  401. name: "add".to_string(),
  402. operands: vec![Operand::Register(1), Operand::Register(2)],
  403. },
  404. Instruction {
  405. name: "mov".to_string(),
  406. operands: vec![Operand::Register(0), Operand::Integer(0)],
  407. },
  408. Instruction {
  409. name: "ldxh".to_string(),
  410. operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
  411. },
  412. Instruction {
  413. name: "add".to_string(),
  414. operands: vec![Operand::Register(1), Operand::Integer(20)],
  415. },
  416. Instruction {
  417. name: "rsh".to_string(),
  418. operands: vec![Operand::Register(4), Operand::Integer(2)],
  419. },
  420. Instruction {
  421. name: "and".to_string(),
  422. operands: vec![Operand::Register(4), Operand::Integer(60)],
  423. },
  424. Instruction {
  425. name: "mov".to_string(),
  426. operands: vec![Operand::Register(2), Operand::Register(4)],
  427. },
  428. Instruction {
  429. name: "add".to_string(),
  430. operands: vec![Operand::Register(2), Operand::Integer(4294967276)],
  431. },
  432. Instruction {
  433. name: "mov".to_string(),
  434. operands: vec![Operand::Register(5), Operand::Integer(21)],
  435. },
  436. Instruction {
  437. name: "mov".to_string(),
  438. operands: vec![Operand::Register(3), Operand::Integer(0)],
  439. },
  440. Instruction {
  441. name: "jgt".to_string(),
  442. operands: vec![
  443. Operand::Register(5),
  444. Operand::Register(4),
  445. Operand::Integer(20)
  446. ],
  447. },
  448. Instruction {
  449. name: "mov".to_string(),
  450. operands: vec![Operand::Register(5), Operand::Register(3)],
  451. },
  452. Instruction {
  453. name: "lsh".to_string(),
  454. operands: vec![Operand::Register(5), Operand::Integer(32)],
  455. },
  456. Instruction {
  457. name: "arsh".to_string(),
  458. operands: vec![Operand::Register(5), Operand::Integer(32)],
  459. },
  460. Instruction {
  461. name: "mov".to_string(),
  462. operands: vec![Operand::Register(4), Operand::Register(1)],
  463. },
  464. Instruction {
  465. name: "add".to_string(),
  466. operands: vec![Operand::Register(4), Operand::Register(5)],
  467. },
  468. Instruction {
  469. name: "ldxb".to_string(),
  470. operands: vec![Operand::Register(5), Operand::Memory(4, 0)],
  471. },
  472. Instruction {
  473. name: "jeq".to_string(),
  474. operands: vec![
  475. Operand::Register(5),
  476. Operand::Integer(1),
  477. Operand::Integer(4)
  478. ],
  479. },
  480. Instruction {
  481. name: "jeq".to_string(),
  482. operands: vec![
  483. Operand::Register(5),
  484. Operand::Integer(0),
  485. Operand::Integer(12)
  486. ],
  487. },
  488. Instruction {
  489. name: "mov".to_string(),
  490. operands: vec![Operand::Register(6), Operand::Register(3)],
  491. },
  492. Instruction {
  493. name: "jeq".to_string(),
  494. operands: vec![
  495. Operand::Register(5),
  496. Operand::Integer(5),
  497. Operand::Integer(9)
  498. ],
  499. },
  500. Instruction {
  501. name: "ja".to_string(),
  502. operands: vec![Operand::Integer(2)],
  503. },
  504. Instruction {
  505. name: "add".to_string(),
  506. operands: vec![Operand::Register(3), Operand::Integer(1)],
  507. },
  508. Instruction {
  509. name: "mov".to_string(),
  510. operands: vec![Operand::Register(6), Operand::Register(3)],
  511. },
  512. Instruction {
  513. name: "ldxb".to_string(),
  514. operands: vec![Operand::Register(3), Operand::Memory(4, 1)],
  515. },
  516. Instruction {
  517. name: "add".to_string(),
  518. operands: vec![Operand::Register(3), Operand::Register(6)],
  519. },
  520. Instruction {
  521. name: "lsh".to_string(),
  522. operands: vec![Operand::Register(3), Operand::Integer(32)],
  523. },
  524. Instruction {
  525. name: "arsh".to_string(),
  526. operands: vec![Operand::Register(3), Operand::Integer(32)],
  527. },
  528. Instruction {
  529. name: "jsgt".to_string(),
  530. operands: vec![
  531. Operand::Register(2),
  532. Operand::Register(3),
  533. Operand::Integer(-18)
  534. ],
  535. },
  536. Instruction {
  537. name: "ja".to_string(),
  538. operands: vec![Operand::Integer(1)],
  539. },
  540. Instruction {
  541. name: "mov".to_string(),
  542. operands: vec![Operand::Register(0), Operand::Integer(1)],
  543. },
  544. Instruction {
  545. name: "exit".to_string(),
  546. operands: vec![],
  547. }
  548. ])
  549. );
  550. }
  551. /// When running without `std` the `EasyParser` provided by `combine`
  552. /// cannot be used. Because of this we need to use the `Parser` and the
  553. /// error messages are different.
  554. #[test]
  555. fn test_error_eof() {
  556. let expected_error;
  557. #[cfg(feature = "std")]
  558. {
  559. expected_error = Err(
  560. "Parse error at line: 1, column: 6\nUnexpected end of input\nExpected digit\n"
  561. .to_string(),
  562. );
  563. }
  564. #[cfg(not(feature = "std"))]
  565. {
  566. expected_error = Err("unexpected parse".to_string());
  567. }
  568. // Unexpected end of input in a register name.
  569. assert_eq!(parse("lsh r"), expected_error);
  570. }
  571. /// When running without `std` the `EasyParser` provided by `combine`
  572. /// cannot be used. Because of this we need to use the `Parser` and the
  573. /// error messages are different.
  574. #[test]
  575. fn test_error_unexpected_character() {
  576. let expected_error;
  577. #[cfg(feature = "std")]
  578. {
  579. expected_error = Err(
  580. "Parse error at line: 2, column: 1\nUnexpected `^`\nExpected letter or digit, whitespaces, `r`, `-`, `+`, `[` or end of input\n".to_string()
  581. );
  582. }
  583. #[cfg(not(feature = "std"))]
  584. {
  585. expected_error = Err("unexpected parse".to_string());
  586. }
  587. // Unexpected character at end of input.
  588. assert_eq!(parse("exit\n^"), expected_error);
  589. }
  590. #[test]
  591. fn test_initial_whitespace() {
  592. assert_eq!(
  593. parse(
  594. "
  595. exit"
  596. ),
  597. Ok(vec![Instruction {
  598. name: "exit".to_string(),
  599. operands: vec![],
  600. }])
  601. );
  602. }
  603. }