4
0

asm_parser.rs 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. // Copyright 2017 Rich Lane <lanerl@gmail.com>
  2. //
  3. // Licensed under the Apache License, Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0> or
  4. // the MIT license <http://opensource.org/licenses/MIT>, at your option. This file may not be
  5. // copied, modified, or distributed except according to those terms.
  6. // Rust-doc comments were left in the module, but it is no longer publicly exposed from the root
  7. // file of the crate. Do not expect to find those comments in the documentation of the crate.
  8. //! This module parses eBPF assembly language source code.
  9. use combine::char::{alpha_num, char, digit, hex_digit, spaces, string};
  10. use combine::{between, eof, many, many1, one_of, optional, Parser, ParseError, ParseResult, parser,
  11. sep_by, try, State, Stream};
  12. use combine::primitives::{Error, Info};
  13. /// Operand of an instruction.
  14. #[derive(Clone, Copy, Debug, PartialEq)]
  15. pub enum Operand {
  16. /// Register number.
  17. Register(i64),
  18. /// Jump offset or immediate.
  19. Integer(i64),
  20. /// Register number and offset.
  21. Memory(i64, i64),
  22. /// Used for pattern matching.
  23. Nil,
  24. }
  25. /// Parsed instruction.
  26. #[derive(Debug, PartialEq)]
  27. pub struct Instruction {
  28. /// Instruction name.
  29. pub name: String,
  30. /// Operands.
  31. pub operands: Vec<Operand>,
  32. }
  33. fn ident<I>(input: I) -> ParseResult<String, I>
  34. where I: Stream<Item = char>
  35. {
  36. many1(alpha_num()).parse_stream(input)
  37. }
  38. fn integer<I>(input: I) -> ParseResult<i64, I>
  39. where I: Stream<Item = char>
  40. {
  41. let sign = optional(one_of("-+".chars())).map(|x| match x {
  42. Some('-') => -1,
  43. _ => 1,
  44. });
  45. let hex = string("0x")
  46. .with(many1(hex_digit()))
  47. .map(|x: String| u64::from_str_radix(&x, 16).unwrap() as i64);
  48. let dec = many1(digit()).map(|x: String| i64::from_str_radix(&x, 10).unwrap());
  49. (sign, try(hex).or(dec)).map(|(s, x)| s * x).parse_stream(input)
  50. }
  51. fn register<I>(input: I) -> ParseResult<i64, I>
  52. where I: Stream<Item = char>
  53. {
  54. char('r')
  55. .with(many1(digit()))
  56. .map(|x: String| i64::from_str_radix(&x, 10).unwrap())
  57. .parse_stream(input)
  58. }
  59. fn operand<I>(input: I) -> ParseResult<Operand, I>
  60. where I: Stream<Item = char>
  61. {
  62. let register_operand = parser(register).map(Operand::Register);
  63. let immediate = parser(integer).map(Operand::Integer);
  64. let memory = between(char('['),
  65. char(']'),
  66. (parser(register), optional(parser(integer))))
  67. .map(|t| Operand::Memory(t.0, t.1.unwrap_or(0)));
  68. register_operand.or(immediate).or(memory).parse_stream(input)
  69. }
  70. fn instruction<I>(input: I) -> ParseResult<Instruction, I>
  71. where I: Stream<Item = char>
  72. {
  73. let operands = sep_by(parser(operand), char(',').skip(spaces()));
  74. (parser(ident).skip(spaces()), operands, spaces())
  75. .map(|t| {
  76. Instruction {
  77. name: t.0,
  78. operands: t.1,
  79. }
  80. })
  81. .parse_stream(input)
  82. }
  83. fn format_info(info: &Info<char, &str>) -> String {
  84. match *info {
  85. Info::Token(x) => format!("{:?}", x),
  86. Info::Range(x) => format!("{:?}", x),
  87. Info::Owned(ref x) => x.clone(),
  88. Info::Borrowed(x) => x.to_string(),
  89. }
  90. }
  91. fn format_error(error: &Error<char, &str>) -> String {
  92. match *error {
  93. Error::Unexpected(ref x) => format!("unexpected {}", format_info(x)),
  94. Error::Expected(ref x) => format!("expected {}", format_info(x)),
  95. Error::Message(ref x) => format_info(x),
  96. Error::Other(ref x) => format!("{:?}", x),
  97. }
  98. }
  99. fn format_parse_error(parse_error: &ParseError<State<&str>>) -> String {
  100. format!("Parse error at line {} column {}: {}",
  101. parse_error.position.line,
  102. parse_error.position.column,
  103. parse_error.errors.iter().map(format_error).collect::<Vec<String>>().join(", "))
  104. }
  105. /// Parse a string into a list of instructions.
  106. ///
  107. /// The instructions are not validated and may have invalid names and operand types.
  108. pub fn parse(input: &str) -> Result<Vec<Instruction>, String> {
  109. match spaces().with(many(parser(instruction)).skip(eof())).parse(State::new(input)) {
  110. Ok((insts, _)) => Ok(insts),
  111. Err(err) => Err(format_parse_error(&err)),
  112. }
  113. }
  114. #[cfg(test)]
  115. mod tests {
  116. use combine::{Parser, parser};
  117. use super::{ident, integer, register, operand, instruction, Operand, Instruction, parse};
  118. // Unit tests for the different kinds of parsers.
  119. #[test]
  120. fn test_ident() {
  121. assert_eq!(parser(ident).parse("nop"), Ok(("nop".to_string(), "")));
  122. assert_eq!(parser(ident).parse("add32"), Ok(("add32".to_string(), "")));
  123. assert_eq!(parser(ident).parse("add32*"),
  124. Ok(("add32".to_string(), "*")));
  125. }
  126. #[test]
  127. fn test_integer() {
  128. assert_eq!(parser(integer).parse("0"), Ok((0, "")));
  129. assert_eq!(parser(integer).parse("42"), Ok((42, "")));
  130. assert_eq!(parser(integer).parse("+42"), Ok((42, "")));
  131. assert_eq!(parser(integer).parse("-42"), Ok((-42, "")));
  132. assert_eq!(parser(integer).parse("0x0"), Ok((0, "")));
  133. assert_eq!(parser(integer).parse("0x123456789abcdef0"),
  134. Ok((0x123456789abcdef0, "")));
  135. assert_eq!(parser(integer).parse("-0x1f"), Ok((-31, "")));
  136. }
  137. #[test]
  138. fn test_register() {
  139. assert_eq!(parser(register).parse("r0"), Ok((0, "")));
  140. assert_eq!(parser(register).parse("r15"), Ok((15, "")));
  141. }
  142. #[test]
  143. fn test_operand() {
  144. assert_eq!(parser(operand).parse("r0"), Ok((Operand::Register(0), "")));
  145. assert_eq!(parser(operand).parse("r15"),
  146. Ok((Operand::Register(15), "")));
  147. assert_eq!(parser(operand).parse("0"), Ok((Operand::Integer(0), "")));
  148. assert_eq!(parser(operand).parse("42"), Ok((Operand::Integer(42), "")));
  149. assert_eq!(parser(operand).parse("[r1]"),
  150. Ok((Operand::Memory(1, 0), "")));
  151. assert_eq!(parser(operand).parse("[r3+5]"),
  152. Ok((Operand::Memory(3, 5), "")));
  153. assert_eq!(parser(operand).parse("[r3+0x1f]"),
  154. Ok((Operand::Memory(3, 31), "")));
  155. assert_eq!(parser(operand).parse("[r3-0x1f]"),
  156. Ok((Operand::Memory(3, -31), "")));
  157. }
  158. #[test]
  159. fn test_instruction() {
  160. assert_eq!(parser(instruction).parse("exit"),
  161. Ok((Instruction {
  162. name: "exit".to_string(),
  163. operands: vec![],
  164. },
  165. "")));
  166. assert_eq!(parser(instruction).parse("call 2"),
  167. Ok((Instruction {
  168. name: "call".to_string(),
  169. operands: vec![Operand::Integer(2)],
  170. },
  171. "")));
  172. assert_eq!(parser(instruction).parse("addi r1, 2"),
  173. Ok((Instruction {
  174. name: "addi".to_string(),
  175. operands: vec![Operand::Register(1), Operand::Integer(2)],
  176. },
  177. "")));
  178. assert_eq!(parser(instruction).parse("ldxb r2, [r1+12]"),
  179. Ok((Instruction {
  180. name: "ldxb".to_string(),
  181. operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
  182. },
  183. "")));
  184. assert_eq!(parser(instruction).parse("lsh r3, 0x8"),
  185. Ok((Instruction {
  186. name: "lsh".to_string(),
  187. operands: vec![Operand::Register(3), Operand::Integer(8)],
  188. },
  189. "")));
  190. assert_eq!(parser(instruction).parse("jne r3, 0x8, +37"),
  191. Ok((Instruction {
  192. name: "jne".to_string(),
  193. operands: vec![Operand::Register(3),
  194. Operand::Integer(8),
  195. Operand::Integer(37)],
  196. },
  197. "")));
  198. // Whitespace between operands is optional.
  199. assert_eq!(parser(instruction).parse("jne r3,0x8,+37"),
  200. Ok((Instruction {
  201. name: "jne".to_string(),
  202. operands: vec![Operand::Register(3),
  203. Operand::Integer(8),
  204. Operand::Integer(37)],
  205. },
  206. "")));
  207. }
  208. // Other unit tests: try to parse various set of instructions.
  209. #[test]
  210. fn test_empty() {
  211. assert_eq!(parse(""), Ok(vec![]));
  212. }
  213. #[test]
  214. fn test_exit() {
  215. // No operands.
  216. assert_eq!(parse("exit"),
  217. Ok(vec![Instruction {
  218. name: "exit".to_string(),
  219. operands: vec![],
  220. }]));
  221. }
  222. #[test]
  223. fn test_lsh() {
  224. // Register and immediate operands.
  225. assert_eq!(parse("lsh r3, 0x20"),
  226. Ok(vec![Instruction {
  227. name: "lsh".to_string(),
  228. operands: vec![Operand::Register(3), Operand::Integer(0x20)],
  229. }]));
  230. }
  231. #[test]
  232. fn test_ja() {
  233. // Jump offset operand.
  234. assert_eq!(parse("ja +1"),
  235. Ok(vec![Instruction {
  236. name: "ja".to_string(),
  237. operands: vec![Operand::Integer(1)],
  238. }]));
  239. }
  240. #[test]
  241. fn test_ldxh() {
  242. // Register and memory operands.
  243. assert_eq!(parse("ldxh r4, [r1+12]"),
  244. Ok(vec![Instruction {
  245. name: "ldxh".to_string(),
  246. operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
  247. }]));
  248. }
  249. #[test]
  250. fn test_tcp_sack() {
  251. // Sample program from ubpf.
  252. // We could technically indent the instructions since the parser support white spaces at
  253. // the beginning, but there is another test for that.
  254. let src = "\
  255. ldxb r2, [r1+12]
  256. ldxb r3, [r1+13]
  257. lsh r3, 0x8
  258. or r3, r2
  259. mov r0, 0x0
  260. jne r3, 0x8, +37
  261. ldxb r2, [r1+23]
  262. jne r2, 0x6, +35
  263. ldxb r2, [r1+14]
  264. add r1, 0xe
  265. and r2, 0xf
  266. lsh r2, 0x2
  267. add r1, r2
  268. mov r0, 0x0
  269. ldxh r4, [r1+12]
  270. add r1, 0x14
  271. rsh r4, 0x2
  272. and r4, 0x3c
  273. mov r2, r4
  274. add r2, 0xffffffec
  275. mov r5, 0x15
  276. mov r3, 0x0
  277. jgt r5, r4, +20
  278. mov r5, r3
  279. lsh r5, 0x20
  280. arsh r5, 0x20
  281. mov r4, r1
  282. add r4, r5
  283. ldxb r5, [r4]
  284. jeq r5, 0x1, +4
  285. jeq r5, 0x0, +12
  286. mov r6, r3
  287. jeq r5, 0x5, +9
  288. ja +2
  289. add r3, 0x1
  290. mov r6, r3
  291. ldxb r3, [r4+1]
  292. add r3, r6
  293. lsh r3, 0x20
  294. arsh r3, 0x20
  295. jsgt r2, r3, -18
  296. ja +1
  297. mov r0, 0x1
  298. exit
  299. ";
  300. assert_eq!(parse(src),
  301. Ok(vec![Instruction {
  302. name: "ldxb".to_string(),
  303. operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
  304. },
  305. Instruction {
  306. name: "ldxb".to_string(),
  307. operands: vec![Operand::Register(3), Operand::Memory(1, 13)],
  308. },
  309. Instruction {
  310. name: "lsh".to_string(),
  311. operands: vec![Operand::Register(3), Operand::Integer(8)],
  312. },
  313. Instruction {
  314. name: "or".to_string(),
  315. operands: vec![Operand::Register(3), Operand::Register(2)],
  316. },
  317. Instruction {
  318. name: "mov".to_string(),
  319. operands: vec![Operand::Register(0), Operand::Integer(0)],
  320. },
  321. Instruction {
  322. name: "jne".to_string(),
  323. operands: vec![Operand::Register(3),
  324. Operand::Integer(8),
  325. Operand::Integer(37)],
  326. },
  327. Instruction {
  328. name: "ldxb".to_string(),
  329. operands: vec![Operand::Register(2), Operand::Memory(1, 23)],
  330. },
  331. Instruction {
  332. name: "jne".to_string(),
  333. operands: vec![Operand::Register(2),
  334. Operand::Integer(6),
  335. Operand::Integer(35)],
  336. },
  337. Instruction {
  338. name: "ldxb".to_string(),
  339. operands: vec![Operand::Register(2), Operand::Memory(1, 14)],
  340. },
  341. Instruction {
  342. name: "add".to_string(),
  343. operands: vec![Operand::Register(1), Operand::Integer(14)],
  344. },
  345. Instruction {
  346. name: "and".to_string(),
  347. operands: vec![Operand::Register(2), Operand::Integer(15)],
  348. },
  349. Instruction {
  350. name: "lsh".to_string(),
  351. operands: vec![Operand::Register(2), Operand::Integer(2)],
  352. },
  353. Instruction {
  354. name: "add".to_string(),
  355. operands: vec![Operand::Register(1), Operand::Register(2)],
  356. },
  357. Instruction {
  358. name: "mov".to_string(),
  359. operands: vec![Operand::Register(0), Operand::Integer(0)],
  360. },
  361. Instruction {
  362. name: "ldxh".to_string(),
  363. operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
  364. },
  365. Instruction {
  366. name: "add".to_string(),
  367. operands: vec![Operand::Register(1), Operand::Integer(20)],
  368. },
  369. Instruction {
  370. name: "rsh".to_string(),
  371. operands: vec![Operand::Register(4), Operand::Integer(2)],
  372. },
  373. Instruction {
  374. name: "and".to_string(),
  375. operands: vec![Operand::Register(4), Operand::Integer(60)],
  376. },
  377. Instruction {
  378. name: "mov".to_string(),
  379. operands: vec![Operand::Register(2), Operand::Register(4)],
  380. },
  381. Instruction {
  382. name: "add".to_string(),
  383. operands: vec![Operand::Register(2), Operand::Integer(4294967276)],
  384. },
  385. Instruction {
  386. name: "mov".to_string(),
  387. operands: vec![Operand::Register(5), Operand::Integer(21)],
  388. },
  389. Instruction {
  390. name: "mov".to_string(),
  391. operands: vec![Operand::Register(3), Operand::Integer(0)],
  392. },
  393. Instruction {
  394. name: "jgt".to_string(),
  395. operands: vec![Operand::Register(5),
  396. Operand::Register(4),
  397. Operand::Integer(20)],
  398. },
  399. Instruction {
  400. name: "mov".to_string(),
  401. operands: vec![Operand::Register(5), Operand::Register(3)],
  402. },
  403. Instruction {
  404. name: "lsh".to_string(),
  405. operands: vec![Operand::Register(5), Operand::Integer(32)],
  406. },
  407. Instruction {
  408. name: "arsh".to_string(),
  409. operands: vec![Operand::Register(5), Operand::Integer(32)],
  410. },
  411. Instruction {
  412. name: "mov".to_string(),
  413. operands: vec![Operand::Register(4), Operand::Register(1)],
  414. },
  415. Instruction {
  416. name: "add".to_string(),
  417. operands: vec![Operand::Register(4), Operand::Register(5)],
  418. },
  419. Instruction {
  420. name: "ldxb".to_string(),
  421. operands: vec![Operand::Register(5), Operand::Memory(4, 0)],
  422. },
  423. Instruction {
  424. name: "jeq".to_string(),
  425. operands: vec![Operand::Register(5),
  426. Operand::Integer(1),
  427. Operand::Integer(4)],
  428. },
  429. Instruction {
  430. name: "jeq".to_string(),
  431. operands: vec![Operand::Register(5),
  432. Operand::Integer(0),
  433. Operand::Integer(12)],
  434. },
  435. Instruction {
  436. name: "mov".to_string(),
  437. operands: vec![Operand::Register(6), Operand::Register(3)],
  438. },
  439. Instruction {
  440. name: "jeq".to_string(),
  441. operands: vec![Operand::Register(5),
  442. Operand::Integer(5),
  443. Operand::Integer(9)],
  444. },
  445. Instruction {
  446. name: "ja".to_string(),
  447. operands: vec![Operand::Integer(2)],
  448. },
  449. Instruction {
  450. name: "add".to_string(),
  451. operands: vec![Operand::Register(3), Operand::Integer(1)],
  452. },
  453. Instruction {
  454. name: "mov".to_string(),
  455. operands: vec![Operand::Register(6), Operand::Register(3)],
  456. },
  457. Instruction {
  458. name: "ldxb".to_string(),
  459. operands: vec![Operand::Register(3), Operand::Memory(4, 1)],
  460. },
  461. Instruction {
  462. name: "add".to_string(),
  463. operands: vec![Operand::Register(3), Operand::Register(6)],
  464. },
  465. Instruction {
  466. name: "lsh".to_string(),
  467. operands: vec![Operand::Register(3), Operand::Integer(32)],
  468. },
  469. Instruction {
  470. name: "arsh".to_string(),
  471. operands: vec![Operand::Register(3), Operand::Integer(32)],
  472. },
  473. Instruction {
  474. name: "jsgt".to_string(),
  475. operands: vec![Operand::Register(2),
  476. Operand::Register(3),
  477. Operand::Integer(-18)],
  478. },
  479. Instruction {
  480. name: "ja".to_string(),
  481. operands: vec![Operand::Integer(1)],
  482. },
  483. Instruction {
  484. name: "mov".to_string(),
  485. operands: vec![Operand::Register(0), Operand::Integer(1)],
  486. },
  487. Instruction {
  488. name: "exit".to_string(),
  489. operands: vec![],
  490. }]));
  491. }
  492. #[test]
  493. fn test_error_eof() {
  494. // Unexpected end of input in a register name.
  495. assert_eq!(parse("lsh r"),
  496. Err("Parse error at line 1 column 6: unexpected end of input, expected digit"
  497. .to_string()));
  498. }
  499. #[test]
  500. fn test_error_unexpected_character() {
  501. // Unexpected character at end of input.
  502. assert_eq!(parse("exit\n^"),
  503. Err("Parse error at line 2 column 1: unexpected '^', expected end of input"
  504. .to_string()));
  505. }
  506. #[test]
  507. fn test_initial_whitespace() {
  508. assert_eq!(parse("
  509. exit"),
  510. Ok(vec![Instruction {
  511. name: "exit".to_string(),
  512. operands: vec![],
  513. }]));
  514. }
  515. }