parser.rs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. use crate::{pkg_length::PkgLength, AmlContext, AmlError, DebugVerbosity};
  2. use alloc::vec::Vec;
  3. use core::marker::PhantomData;
  4. use log::trace;
  5. pub type ParseResult<'a, 'c, R> =
  6. Result<(&'a [u8], &'c mut AmlContext, R), (&'a [u8], &'c mut AmlContext, AmlError)>;
  7. pub trait Parser<'a, 'c, R>: Sized
  8. where
  9. 'c: 'a,
  10. {
  11. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, R>;
  12. fn map<F, A>(self, map_fn: F) -> Map<'a, 'c, Self, F, R, A>
  13. where
  14. F: Fn(R) -> Result<A, AmlError>,
  15. {
  16. Map { parser: self, map_fn, _phantom: PhantomData }
  17. }
  18. fn map_with_context<F, A>(self, map_fn: F) -> MapWithContext<'a, 'c, Self, F, R, A>
  19. where
  20. F: Fn(R, &'c mut AmlContext) -> (Result<A, AmlError>, &'c mut AmlContext),
  21. {
  22. MapWithContext { parser: self, map_fn, _phantom: PhantomData }
  23. }
  24. fn discard_result(self) -> DiscardResult<'a, 'c, Self, R> {
  25. DiscardResult { parser: self, _phantom: PhantomData }
  26. }
  27. /// Try parsing with `self`. If it succeeds, return its result. If it returns `AmlError::WrongParser`, try
  28. /// parsing with `other`, returning the result of that parser in all cases. Other errors from the first
  29. /// parser are propagated without attempting the second parser. To chain more than two parsers using
  30. /// `or`, see the `choice!` macro.
  31. fn or<OtherParser>(self, other: OtherParser) -> Or<'a, 'c, Self, OtherParser, R>
  32. where
  33. OtherParser: Parser<'a, 'c, R>,
  34. {
  35. Or { p1: self, p2: other, _phantom: PhantomData }
  36. }
  37. fn then<NextParser, NextR>(self, next: NextParser) -> Then<'a, 'c, Self, NextParser, R, NextR>
  38. where
  39. NextParser: Parser<'a, 'c, NextR>,
  40. {
  41. Then { p1: self, p2: next, _phantom: PhantomData }
  42. }
  43. /// `feed` takes a function that takes the result of this parser (`self`) and creates another
  44. /// parser, which is then used to parse the next part of the stream. This sounds convoluted,
  45. /// but is useful for when the next parser's behaviour depends on a property of the result of
  46. /// the first (e.g. the first parser might parse a length `n`, and the second parser then
  47. /// consumes `n` bytes).
  48. fn feed<F, P2, R2>(self, producer_fn: F) -> Feed<'a, 'c, Self, P2, F, R, R2>
  49. where
  50. P2: Parser<'a, 'c, R2>,
  51. F: Fn(R) -> P2,
  52. {
  53. Feed { parser: self, producer_fn, _phantom: PhantomData }
  54. }
  55. }
  56. impl<'a, 'c, F, R> Parser<'a, 'c, R> for F
  57. where
  58. 'c: 'a,
  59. F: Fn(&'a [u8], &'c mut AmlContext) -> ParseResult<'a, 'c, R>,
  60. {
  61. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, R> {
  62. self(input, context)
  63. }
  64. }
  65. /// The identity parser - returns the stream and context unchanged. Useful for producing parsers
  66. /// that produce a result without parsing anything by doing: `id().map(|()| Ok(foo))`.
  67. pub fn id<'a, 'c>() -> impl Parser<'a, 'c, ()>
  68. where
  69. 'c: 'a,
  70. {
  71. move |input: &'a [u8], context: &'c mut AmlContext| Ok((input, context, ()))
  72. }
  73. pub fn take<'a, 'c>() -> impl Parser<'a, 'c, u8>
  74. where
  75. 'c: 'a,
  76. {
  77. move |input: &'a [u8], context: &'c mut AmlContext| match input.first() {
  78. Some(&byte) => Ok((&input[1..], context, byte)),
  79. None => Err((input, context, AmlError::UnexpectedEndOfStream)),
  80. }
  81. }
  82. pub fn take_u16<'a, 'c>() -> impl Parser<'a, 'c, u16>
  83. where
  84. 'c: 'a,
  85. {
  86. move |input: &'a [u8], context: &'c mut AmlContext| {
  87. if input.len() < 2 {
  88. return Err((input, context, AmlError::UnexpectedEndOfStream));
  89. }
  90. Ok((&input[2..], context, input[0] as u16 + ((input[1] as u16) << 8)))
  91. }
  92. }
  93. pub fn take_u32<'a, 'c>() -> impl Parser<'a, 'c, u32>
  94. where
  95. 'c: 'a,
  96. {
  97. move |input: &'a [u8], context: &'c mut AmlContext| {
  98. if input.len() < 4 {
  99. return Err((input, context, AmlError::UnexpectedEndOfStream));
  100. }
  101. Ok((
  102. &input[4..],
  103. context,
  104. input[0] as u32 + ((input[1] as u32) << 8) + ((input[2] as u32) << 16) + ((input[3] as u32) << 24),
  105. ))
  106. }
  107. }
  108. pub fn take_u64<'a, 'c>() -> impl Parser<'a, 'c, u64>
  109. where
  110. 'c: 'a,
  111. {
  112. move |input: &'a [u8], context: &'c mut AmlContext| {
  113. if input.len() < 8 {
  114. return Err((input, context, AmlError::UnexpectedEndOfStream));
  115. }
  116. Ok((
  117. &input[8..],
  118. context,
  119. input[0] as u64
  120. + ((input[1] as u64) << 8)
  121. + ((input[2] as u64) << 16)
  122. + ((input[3] as u64) << 24)
  123. + ((input[4] as u64) << 32)
  124. + ((input[5] as u64) << 40)
  125. + ((input[6] as u64) << 48)
  126. + ((input[7] as u64) << 56),
  127. ))
  128. }
  129. }
  130. pub fn take_n<'a, 'c>(n: u32) -> impl Parser<'a, 'c, &'a [u8]>
  131. where
  132. 'c: 'a,
  133. {
  134. move |input: &'a [u8], context| {
  135. if (input.len() as u32) < n {
  136. return Err((input, context, AmlError::UnexpectedEndOfStream));
  137. }
  138. let (result, new_input) = input.split_at(n as usize);
  139. Ok((new_input, context, result))
  140. }
  141. }
  142. pub fn take_to_end_of_pkglength<'a, 'c>(length: PkgLength) -> impl Parser<'a, 'c, &'a [u8]>
  143. where
  144. 'c: 'a,
  145. {
  146. move |input: &'a [u8], context| {
  147. let bytes_to_take = (input.len() as u32) - length.end_offset;
  148. take_n(bytes_to_take).parse(input, context)
  149. }
  150. }
  151. pub fn n_of<'a, 'c, P, R>(parser: P, n: usize) -> impl Parser<'a, 'c, Vec<R>>
  152. where
  153. 'c: 'a,
  154. P: Parser<'a, 'c, R>,
  155. {
  156. // TODO: can we write this more nicely?
  157. move |mut input, mut context| {
  158. let mut results = Vec::with_capacity(n);
  159. for _ in 0..n {
  160. let (new_input, new_context, result) = match parser.parse(input, context) {
  161. Ok((input, context, result)) => (input, context, result),
  162. Err((_, context, err)) => return Err((input, context, err)),
  163. };
  164. results.push(result);
  165. input = new_input;
  166. context = new_context;
  167. }
  168. Ok((input, context, results))
  169. }
  170. }
  171. pub fn take_while<'a, 'c, P, R>(parser: P) -> impl Parser<'a, 'c, usize>
  172. where
  173. 'c: 'a,
  174. P: Parser<'a, 'c, R>,
  175. {
  176. move |mut input: &'a [u8], mut context: &'c mut AmlContext| {
  177. let mut num_passed = 0;
  178. loop {
  179. match parser.parse(input, context) {
  180. Ok((new_input, new_context, _)) => {
  181. input = new_input;
  182. context = new_context;
  183. num_passed += 1;
  184. }
  185. Err((_, context, AmlError::WrongParser)) => return Ok((input, context, num_passed)),
  186. Err((_, context, err)) => return Err((input, context, err)),
  187. }
  188. }
  189. }
  190. }
  191. pub fn consume<'a, 'c, F>(condition: F) -> impl Parser<'a, 'c, u8>
  192. where
  193. 'c: 'a,
  194. F: Fn(u8) -> bool,
  195. {
  196. move |input: &'a [u8], context: &'c mut AmlContext| match input.first() {
  197. Some(&byte) if condition(byte) => Ok((&input[1..], context, byte)),
  198. Some(&byte) => Err((input, context, AmlError::UnexpectedByte(byte))),
  199. None => Err((input, context, AmlError::UnexpectedEndOfStream)),
  200. }
  201. }
  202. pub fn comment_scope<'a, 'c, P, R>(
  203. verbosity: DebugVerbosity,
  204. scope_name: &'a str,
  205. parser: P,
  206. ) -> impl Parser<'a, 'c, R>
  207. where
  208. 'c: 'a,
  209. R: core::fmt::Debug,
  210. P: Parser<'a, 'c, R>,
  211. {
  212. const INDENT_PER_SCOPE: usize = 2;
  213. move |input, context: &'c mut AmlContext| {
  214. if verbosity <= context.debug_verbosity {
  215. trace!("{:indent$}--> {}", "", scope_name, indent = context.scope_indent);
  216. }
  217. // Return if the parse fails, so we don't print the tail. Makes it easier to debug.
  218. context.scope_indent += INDENT_PER_SCOPE;
  219. let (new_input, context, result) = parser.parse(input, context)?;
  220. context.scope_indent -= INDENT_PER_SCOPE;
  221. if verbosity <= context.debug_verbosity {
  222. trace!("{:indent$}<-- {}", "", scope_name, indent = context.scope_indent);
  223. }
  224. Ok((new_input, context, result))
  225. }
  226. }
  227. pub struct Or<'a, 'c, P1, P2, R>
  228. where
  229. 'c: 'a,
  230. P1: Parser<'a, 'c, R>,
  231. P2: Parser<'a, 'c, R>,
  232. {
  233. p1: P1,
  234. p2: P2,
  235. _phantom: PhantomData<(&'a R, &'c ())>,
  236. }
  237. impl<'a, 'c, P1, P2, R> Parser<'a, 'c, R> for Or<'a, 'c, P1, P2, R>
  238. where
  239. 'c: 'a,
  240. P1: Parser<'a, 'c, R>,
  241. P2: Parser<'a, 'c, R>,
  242. {
  243. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, R> {
  244. match self.p1.parse(input, context) {
  245. Ok(parse_result) => Ok(parse_result),
  246. Err((_, context, AmlError::WrongParser)) => self.p2.parse(input, context),
  247. Err((_, context, err)) => Err((input, context, err)),
  248. }
  249. }
  250. }
  251. pub struct Map<'a, 'c, P, F, R, A>
  252. where
  253. 'c: 'a,
  254. P: Parser<'a, 'c, R>,
  255. F: Fn(R) -> Result<A, AmlError>,
  256. {
  257. parser: P,
  258. map_fn: F,
  259. _phantom: PhantomData<(&'a (R, A), &'c ())>,
  260. }
  261. impl<'a, 'c, P, F, R, A> Parser<'a, 'c, A> for Map<'a, 'c, P, F, R, A>
  262. where
  263. 'c: 'a,
  264. P: Parser<'a, 'c, R>,
  265. F: Fn(R) -> Result<A, AmlError>,
  266. {
  267. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, A> {
  268. match self.parser.parse(input, context) {
  269. Ok((new_input, context, result)) => match (self.map_fn)(result) {
  270. Ok(result_value) => Ok((new_input, context, result_value)),
  271. Err(err) => Err((input, context, err)),
  272. },
  273. Err(result) => Err(result),
  274. }
  275. }
  276. }
  277. pub struct MapWithContext<'a, 'c, P, F, R, A>
  278. where
  279. 'c: 'a,
  280. P: Parser<'a, 'c, R>,
  281. F: Fn(R, &'c mut AmlContext) -> (Result<A, AmlError>, &'c mut AmlContext),
  282. {
  283. parser: P,
  284. map_fn: F,
  285. _phantom: PhantomData<(&'a (R, A), &'c ())>,
  286. }
  287. impl<'a, 'c, P, F, R, A> Parser<'a, 'c, A> for MapWithContext<'a, 'c, P, F, R, A>
  288. where
  289. 'c: 'a,
  290. P: Parser<'a, 'c, R>,
  291. F: Fn(R, &'c mut AmlContext) -> (Result<A, AmlError>, &'c mut AmlContext),
  292. {
  293. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, A> {
  294. match self.parser.parse(input, context) {
  295. Ok((new_input, context, result)) => match (self.map_fn)(result, context) {
  296. (Ok(result_value), context) => Ok((new_input, context, result_value)),
  297. (Err(err), context) => Err((input, context, err)),
  298. },
  299. Err(result) => Err(result),
  300. }
  301. }
  302. }
  303. pub struct DiscardResult<'a, 'c, P, R>
  304. where
  305. 'c: 'a,
  306. P: Parser<'a, 'c, R>,
  307. {
  308. parser: P,
  309. _phantom: PhantomData<(&'a R, &'c ())>,
  310. }
  311. impl<'a, 'c, P, R> Parser<'a, 'c, ()> for DiscardResult<'a, 'c, P, R>
  312. where
  313. 'c: 'a,
  314. P: Parser<'a, 'c, R>,
  315. {
  316. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, ()> {
  317. self.parser.parse(input, context).map(|(new_input, new_context, _)| (new_input, new_context, ()))
  318. }
  319. }
  320. pub struct Then<'a, 'c, P1, P2, R1, R2>
  321. where
  322. 'c: 'a,
  323. P1: Parser<'a, 'c, R1>,
  324. P2: Parser<'a, 'c, R2>,
  325. {
  326. p1: P1,
  327. p2: P2,
  328. _phantom: PhantomData<(&'a (R1, R2), &'c ())>,
  329. }
  330. impl<'a, 'c, P1, P2, R1, R2> Parser<'a, 'c, (R1, R2)> for Then<'a, 'c, P1, P2, R1, R2>
  331. where
  332. 'c: 'a,
  333. P1: Parser<'a, 'c, R1>,
  334. P2: Parser<'a, 'c, R2>,
  335. {
  336. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, (R1, R2)> {
  337. self.p1.parse(input, context).and_then(|(next_input, context, result_a)| {
  338. self.p2
  339. .parse(next_input, context)
  340. .map(|(final_input, context, result_b)| (final_input, context, (result_a, result_b)))
  341. })
  342. }
  343. }
  344. pub struct Feed<'a, 'c, P1, P2, F, R1, R2>
  345. where
  346. 'c: 'a,
  347. P1: Parser<'a, 'c, R1>,
  348. P2: Parser<'a, 'c, R2>,
  349. F: Fn(R1) -> P2,
  350. {
  351. parser: P1,
  352. producer_fn: F,
  353. _phantom: PhantomData<(&'a (R1, R2), &'c ())>,
  354. }
  355. impl<'a, 'c, P1, P2, F, R1, R2> Parser<'a, 'c, R2> for Feed<'a, 'c, P1, P2, F, R1, R2>
  356. where
  357. 'c: 'a,
  358. P1: Parser<'a, 'c, R1>,
  359. P2: Parser<'a, 'c, R2>,
  360. F: Fn(R1) -> P2,
  361. {
  362. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, R2> {
  363. let (input, context, first_result) = self.parser.parse(input, context)?;
  364. // We can now produce the second parser, and parse using that.
  365. let second_parser = (self.producer_fn)(first_result);
  366. second_parser.parse(input, context)
  367. }
  368. }
  369. /// Takes a number of parsers, and tries to apply each one to the input in order. Returns the
  370. /// result of the first one that succeeds, or fails if all of them fail.
  371. pub(crate) macro choice {
  372. () => {
  373. id().map(|()| Err(AmlError::WrongParser))
  374. },
  375. ($first_parser: expr) => {
  376. $first_parser
  377. .or(id().map(|()| Err(AmlError::WrongParser)))
  378. },
  379. ($first_parser: expr, $($other_parser: expr),*) => {
  380. $first_parser
  381. $(
  382. .or($other_parser)
  383. )*
  384. .or(id().map(|()| Err(AmlError::WrongParser)))
  385. }
  386. }
  387. /// This encapsulates an unfortunate hack we sometimes need to use, where the type checker gets
  388. /// caught in an infinite loop of parser types. This occurs when an object can indirectly contain
  389. /// itself, and so the parser type will contain its own type. This works by breaking the cycle of
  390. /// `impl Parser` chains that build up, by effectively creating a "concrete" closure type.
  391. ///
  392. /// You can try using this hack if you are writing a parser and end up with an error of the form:
  393. /// `error[E0275]: overflow evaluating the requirement 'impl Parser<{a type}>'
  394. /// help: consider adding a a '#![recursion_limit="128"] attribute to your crate`
  395. /// Note: Increasing the recursion limit will not fix the issue, as the cycle will just continue
  396. /// until you either hit the new recursion limit or `rustc` overflows its stack.
  397. pub(crate) macro make_parser_concrete($parser: expr) {
  398. |input, context| ($parser).parse(input, context)
  399. }
  400. /// Helper macro for use within `map_with_context` as an alternative to "trying" an expression.
  401. ///
  402. /// ### Example
  403. /// Problem: `expr?` won't work because the expected return type is `(Result<R, AmlError>, &mut AmlContext)`
  404. /// Solution: use `try_with_context!(context, expr)` instead.
  405. pub(crate) macro try_with_context($context: expr, $expr: expr) {
  406. match $expr {
  407. Ok(result) => result,
  408. Err(err) => return (Err(err), $context),
  409. }
  410. }
  411. #[cfg(test)]
  412. mod tests {
  413. use super::*;
  414. use crate::test_utils::*;
  415. #[test]
  416. fn test_take_n() {
  417. let mut context = AmlContext::new();
  418. check_err!(take_n(1).parse(&[], &mut context), AmlError::UnexpectedEndOfStream, &[]);
  419. check_err!(take_n(2).parse(&[0xf5], &mut context), AmlError::UnexpectedEndOfStream, &[0xf5]);
  420. check_ok!(take_n(1).parse(&[0xff], &mut context), &[0xff], &[]);
  421. check_ok!(take_n(1).parse(&[0xff, 0xf8], &mut context), &[0xff], &[0xf8]);
  422. check_ok!(take_n(2).parse(&[0xff, 0xf8], &mut context), &[0xff, 0xf8], &[]);
  423. }
  424. #[test]
  425. fn test_take_ux() {
  426. let mut context = AmlContext::new();
  427. check_err!(take_u16().parse(&[0x34], &mut context), AmlError::UnexpectedEndOfStream, &[0x34]);
  428. check_ok!(take_u16().parse(&[0x34, 0x12], &mut context), 0x1234, &[]);
  429. check_err!(take_u32().parse(&[0x34, 0x12], &mut context), AmlError::UnexpectedEndOfStream, &[0x34, 0x12]);
  430. check_ok!(take_u32().parse(&[0x34, 0x12, 0xf4, 0xc3, 0x3e], &mut context), 0xc3f41234, &[0x3e]);
  431. check_err!(take_u64().parse(&[0x34], &mut context), AmlError::UnexpectedEndOfStream, &[0x34]);
  432. check_ok!(
  433. take_u64().parse(&[0x34, 0x12, 0x35, 0x76, 0xd4, 0x43, 0xa3, 0xb6, 0xff, 0x00], &mut context),
  434. 0xb6a343d476351234,
  435. &[0xff, 0x00]
  436. );
  437. }
  438. }