parser.rs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. use crate::{pkg_length::PkgLength, AmlContext, AmlError, DebugVerbosity};
  2. use alloc::vec::Vec;
  3. use core::marker::PhantomData;
  4. use log::trace;
  5. /// This is the number of spaces added to indent a scope when printing parser debug messages.
  6. pub const INDENT_PER_SCOPE: usize = 2;
  7. impl AmlContext {
  8. /// This is used by the parser to provide debug comments about the current object, which are indented to the
  9. /// correct level for the current object. We most often need to print these comments from `map_with_context`s,
  10. /// so it's most convenient to have this method on `AmlContext`.
  11. pub(crate) fn comment(&self, verbosity: DebugVerbosity, message: &str) {
  12. if verbosity <= self.debug_verbosity {
  13. log::trace!("{:indent$}{}", "", message, indent = self.scope_indent);
  14. }
  15. }
  16. }
  17. pub type ParseResult<'a, 'c, R> =
  18. Result<(&'a [u8], &'c mut AmlContext, R), (&'a [u8], &'c mut AmlContext, AmlError)>;
  19. pub trait Parser<'a, 'c, R>: Sized
  20. where
  21. 'c: 'a,
  22. {
  23. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, R>;
  24. fn map<F, A>(self, map_fn: F) -> Map<'a, 'c, Self, F, R, A>
  25. where
  26. F: Fn(R) -> Result<A, AmlError>,
  27. {
  28. Map { parser: self, map_fn, _phantom: PhantomData }
  29. }
  30. fn map_with_context<F, A>(self, map_fn: F) -> MapWithContext<'a, 'c, Self, F, R, A>
  31. where
  32. F: Fn(R, &'c mut AmlContext) -> (Result<A, AmlError>, &'c mut AmlContext),
  33. {
  34. MapWithContext { parser: self, map_fn, _phantom: PhantomData }
  35. }
  36. fn discard_result(self) -> DiscardResult<'a, 'c, Self, R> {
  37. DiscardResult { parser: self, _phantom: PhantomData }
  38. }
  39. /// Try parsing with `self`. If it succeeds, return its result. If it returns `AmlError::WrongParser`, try
  40. /// parsing with `other`, returning the result of that parser in all cases. Other errors from the first
  41. /// parser are propagated without attempting the second parser. To chain more than two parsers using
  42. /// `or`, see the `choice!` macro.
  43. fn or<OtherParser>(self, other: OtherParser) -> Or<'a, 'c, Self, OtherParser, R>
  44. where
  45. OtherParser: Parser<'a, 'c, R>,
  46. {
  47. Or { p1: self, p2: other, _phantom: PhantomData }
  48. }
  49. fn then<NextParser, NextR>(self, next: NextParser) -> Then<'a, 'c, Self, NextParser, R, NextR>
  50. where
  51. NextParser: Parser<'a, 'c, NextR>,
  52. {
  53. Then { p1: self, p2: next, _phantom: PhantomData }
  54. }
  55. /// `feed` takes a function that takes the result of this parser (`self`) and creates another
  56. /// parser, which is then used to parse the next part of the stream. This sounds convoluted,
  57. /// but is useful for when the next parser's behaviour depends on a property of the result of
  58. /// the first (e.g. the first parser might parse a length `n`, and the second parser then
  59. /// consumes `n` bytes).
  60. fn feed<F, P2, R2>(self, producer_fn: F) -> Feed<'a, 'c, Self, P2, F, R, R2>
  61. where
  62. P2: Parser<'a, 'c, R2>,
  63. F: Fn(R) -> P2,
  64. {
  65. Feed { parser: self, producer_fn, _phantom: PhantomData }
  66. }
  67. }
  68. impl<'a, 'c, F, R> Parser<'a, 'c, R> for F
  69. where
  70. 'c: 'a,
  71. F: Fn(&'a [u8], &'c mut AmlContext) -> ParseResult<'a, 'c, R>,
  72. {
  73. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, R> {
  74. self(input, context)
  75. }
  76. }
  77. /// The identity parser - returns the stream and context unchanged. Useful for producing parsers
  78. /// that produce a result without parsing anything by doing: `id().map(|()| Ok(foo))`.
  79. pub fn id<'a, 'c>() -> impl Parser<'a, 'c, ()>
  80. where
  81. 'c: 'a,
  82. {
  83. move |input: &'a [u8], context: &'c mut AmlContext| Ok((input, context, ()))
  84. }
  85. pub fn take<'a, 'c>() -> impl Parser<'a, 'c, u8>
  86. where
  87. 'c: 'a,
  88. {
  89. move |input: &'a [u8], context: &'c mut AmlContext| match input.first() {
  90. Some(&byte) => Ok((&input[1..], context, byte)),
  91. None => Err((input, context, AmlError::UnexpectedEndOfStream)),
  92. }
  93. }
  94. pub fn take_u16<'a, 'c>() -> impl Parser<'a, 'c, u16>
  95. where
  96. 'c: 'a,
  97. {
  98. move |input: &'a [u8], context: &'c mut AmlContext| {
  99. if input.len() < 2 {
  100. return Err((input, context, AmlError::UnexpectedEndOfStream));
  101. }
  102. Ok((&input[2..], context, input[0] as u16 + ((input[1] as u16) << 8)))
  103. }
  104. }
  105. pub fn take_u32<'a, 'c>() -> impl Parser<'a, 'c, u32>
  106. where
  107. 'c: 'a,
  108. {
  109. move |input: &'a [u8], context: &'c mut AmlContext| {
  110. if input.len() < 4 {
  111. return Err((input, context, AmlError::UnexpectedEndOfStream));
  112. }
  113. Ok((
  114. &input[4..],
  115. context,
  116. input[0] as u32 + ((input[1] as u32) << 8) + ((input[2] as u32) << 16) + ((input[3] as u32) << 24),
  117. ))
  118. }
  119. }
  120. pub fn take_u64<'a, 'c>() -> impl Parser<'a, 'c, u64>
  121. where
  122. 'c: 'a,
  123. {
  124. move |input: &'a [u8], context: &'c mut AmlContext| {
  125. if input.len() < 8 {
  126. return Err((input, context, AmlError::UnexpectedEndOfStream));
  127. }
  128. Ok((
  129. &input[8..],
  130. context,
  131. input[0] as u64
  132. + ((input[1] as u64) << 8)
  133. + ((input[2] as u64) << 16)
  134. + ((input[3] as u64) << 24)
  135. + ((input[4] as u64) << 32)
  136. + ((input[5] as u64) << 40)
  137. + ((input[6] as u64) << 48)
  138. + ((input[7] as u64) << 56),
  139. ))
  140. }
  141. }
  142. pub fn take_n<'a, 'c>(n: u32) -> impl Parser<'a, 'c, &'a [u8]>
  143. where
  144. 'c: 'a,
  145. {
  146. move |input: &'a [u8], context| {
  147. if (input.len() as u32) < n {
  148. return Err((input, context, AmlError::UnexpectedEndOfStream));
  149. }
  150. let (result, new_input) = input.split_at(n as usize);
  151. Ok((new_input, context, result))
  152. }
  153. }
  154. pub fn take_to_end_of_pkglength<'a, 'c>(length: PkgLength) -> impl Parser<'a, 'c, &'a [u8]>
  155. where
  156. 'c: 'a,
  157. {
  158. move |input: &'a [u8], context| {
  159. /*
  160. * TODO: fuzzing manages to find PkgLengths that correctly parse during construction, but later crash here.
  161. * I would've thought we would pick up all invalid lengths there, so have a look at why this is needed.
  162. */
  163. let bytes_to_take = match (input.len() as u32).checked_sub(length.end_offset) {
  164. Some(bytes_to_take) => bytes_to_take,
  165. None => return Err((input, context, AmlError::InvalidPkgLength)),
  166. };
  167. take_n(bytes_to_take).parse(input, context)
  168. }
  169. }
  170. pub fn n_of<'a, 'c, P, R>(parser: P, n: usize) -> impl Parser<'a, 'c, Vec<R>>
  171. where
  172. 'c: 'a,
  173. P: Parser<'a, 'c, R>,
  174. {
  175. // TODO: can we write this more nicely?
  176. move |mut input, mut context| {
  177. let mut results = Vec::with_capacity(n);
  178. for _ in 0..n {
  179. let (new_input, new_context, result) = match parser.parse(input, context) {
  180. Ok((input, context, result)) => (input, context, result),
  181. Err((_, context, err)) => return Err((input, context, err)),
  182. };
  183. results.push(result);
  184. input = new_input;
  185. context = new_context;
  186. }
  187. Ok((input, context, results))
  188. }
  189. }
  190. pub fn take_while<'a, 'c, P, R>(parser: P) -> impl Parser<'a, 'c, usize>
  191. where
  192. 'c: 'a,
  193. P: Parser<'a, 'c, R>,
  194. {
  195. move |mut input: &'a [u8], mut context: &'c mut AmlContext| {
  196. let mut num_passed = 0;
  197. loop {
  198. match parser.parse(input, context) {
  199. Ok((new_input, new_context, _)) => {
  200. input = new_input;
  201. context = new_context;
  202. num_passed += 1;
  203. }
  204. Err((_, context, AmlError::WrongParser)) => return Ok((input, context, num_passed)),
  205. Err((_, context, err)) => return Err((input, context, err)),
  206. }
  207. }
  208. }
  209. }
  210. pub fn consume<'a, 'c, F>(condition: F) -> impl Parser<'a, 'c, u8>
  211. where
  212. 'c: 'a,
  213. F: Fn(u8) -> bool,
  214. {
  215. move |input: &'a [u8], context: &'c mut AmlContext| match input.first() {
  216. Some(&byte) if condition(byte) => Ok((&input[1..], context, byte)),
  217. Some(&byte) => Err((input, context, AmlError::UnexpectedByte(byte))),
  218. None => Err((input, context, AmlError::UnexpectedEndOfStream)),
  219. }
  220. }
  221. pub fn comment_scope<'a, 'c, P, R>(
  222. verbosity: DebugVerbosity,
  223. scope_name: &'a str,
  224. parser: P,
  225. ) -> impl Parser<'a, 'c, R>
  226. where
  227. 'c: 'a,
  228. R: core::fmt::Debug,
  229. P: Parser<'a, 'c, R>,
  230. {
  231. move |input, context: &'c mut AmlContext| {
  232. if verbosity <= context.debug_verbosity {
  233. trace!("{:indent$}--> {}", "", scope_name, indent = context.scope_indent);
  234. context.scope_indent += INDENT_PER_SCOPE;
  235. }
  236. // Return if the parse fails, so we don't print the tail. Makes it easier to debug.
  237. let (new_input, context, result) = parser.parse(input, context)?;
  238. if verbosity <= context.debug_verbosity {
  239. context.scope_indent -= INDENT_PER_SCOPE;
  240. trace!("{:indent$}<-- {}", "", scope_name, indent = context.scope_indent);
  241. }
  242. Ok((new_input, context, result))
  243. }
  244. }
  245. pub struct Or<'a, 'c, P1, P2, R>
  246. where
  247. 'c: 'a,
  248. P1: Parser<'a, 'c, R>,
  249. P2: Parser<'a, 'c, R>,
  250. {
  251. p1: P1,
  252. p2: P2,
  253. _phantom: PhantomData<(&'a R, &'c ())>,
  254. }
  255. impl<'a, 'c, P1, P2, R> Parser<'a, 'c, R> for Or<'a, 'c, P1, P2, R>
  256. where
  257. 'c: 'a,
  258. P1: Parser<'a, 'c, R>,
  259. P2: Parser<'a, 'c, R>,
  260. {
  261. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, R> {
  262. match self.p1.parse(input, context) {
  263. Ok(parse_result) => Ok(parse_result),
  264. Err((_, context, AmlError::WrongParser)) => self.p2.parse(input, context),
  265. Err((_, context, err)) => Err((input, context, err)),
  266. }
  267. }
  268. }
  269. pub struct Map<'a, 'c, P, F, R, A>
  270. where
  271. 'c: 'a,
  272. P: Parser<'a, 'c, R>,
  273. F: Fn(R) -> Result<A, AmlError>,
  274. {
  275. parser: P,
  276. map_fn: F,
  277. _phantom: PhantomData<(&'a (R, A), &'c ())>,
  278. }
  279. impl<'a, 'c, P, F, R, A> Parser<'a, 'c, A> for Map<'a, 'c, P, F, R, A>
  280. where
  281. 'c: 'a,
  282. P: Parser<'a, 'c, R>,
  283. F: Fn(R) -> Result<A, AmlError>,
  284. {
  285. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, A> {
  286. match self.parser.parse(input, context) {
  287. Ok((new_input, context, result)) => match (self.map_fn)(result) {
  288. Ok(result_value) => Ok((new_input, context, result_value)),
  289. Err(err) => Err((input, context, err)),
  290. },
  291. Err(result) => Err(result),
  292. }
  293. }
  294. }
  295. pub struct MapWithContext<'a, 'c, P, F, R, A>
  296. where
  297. 'c: 'a,
  298. P: Parser<'a, 'c, R>,
  299. F: Fn(R, &'c mut AmlContext) -> (Result<A, AmlError>, &'c mut AmlContext),
  300. {
  301. parser: P,
  302. map_fn: F,
  303. _phantom: PhantomData<(&'a (R, A), &'c ())>,
  304. }
  305. impl<'a, 'c, P, F, R, A> Parser<'a, 'c, A> for MapWithContext<'a, 'c, P, F, R, A>
  306. where
  307. 'c: 'a,
  308. P: Parser<'a, 'c, R>,
  309. F: Fn(R, &'c mut AmlContext) -> (Result<A, AmlError>, &'c mut AmlContext),
  310. {
  311. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, A> {
  312. match self.parser.parse(input, context) {
  313. Ok((new_input, context, result)) => match (self.map_fn)(result, context) {
  314. (Ok(result_value), context) => Ok((new_input, context, result_value)),
  315. (Err(err), context) => Err((input, context, err)),
  316. },
  317. Err(result) => Err(result),
  318. }
  319. }
  320. }
  321. pub struct DiscardResult<'a, 'c, P, R>
  322. where
  323. 'c: 'a,
  324. P: Parser<'a, 'c, R>,
  325. {
  326. parser: P,
  327. _phantom: PhantomData<(&'a R, &'c ())>,
  328. }
  329. impl<'a, 'c, P, R> Parser<'a, 'c, ()> for DiscardResult<'a, 'c, P, R>
  330. where
  331. 'c: 'a,
  332. P: Parser<'a, 'c, R>,
  333. {
  334. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, ()> {
  335. self.parser.parse(input, context).map(|(new_input, new_context, _)| (new_input, new_context, ()))
  336. }
  337. }
  338. pub struct Then<'a, 'c, P1, P2, R1, R2>
  339. where
  340. 'c: 'a,
  341. P1: Parser<'a, 'c, R1>,
  342. P2: Parser<'a, 'c, R2>,
  343. {
  344. p1: P1,
  345. p2: P2,
  346. _phantom: PhantomData<(&'a (R1, R2), &'c ())>,
  347. }
  348. impl<'a, 'c, P1, P2, R1, R2> Parser<'a, 'c, (R1, R2)> for Then<'a, 'c, P1, P2, R1, R2>
  349. where
  350. 'c: 'a,
  351. P1: Parser<'a, 'c, R1>,
  352. P2: Parser<'a, 'c, R2>,
  353. {
  354. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, (R1, R2)> {
  355. self.p1.parse(input, context).and_then(|(next_input, context, result_a)| {
  356. self.p2
  357. .parse(next_input, context)
  358. .map(|(final_input, context, result_b)| (final_input, context, (result_a, result_b)))
  359. })
  360. }
  361. }
  362. pub struct Feed<'a, 'c, P1, P2, F, R1, R2>
  363. where
  364. 'c: 'a,
  365. P1: Parser<'a, 'c, R1>,
  366. P2: Parser<'a, 'c, R2>,
  367. F: Fn(R1) -> P2,
  368. {
  369. parser: P1,
  370. producer_fn: F,
  371. _phantom: PhantomData<(&'a (R1, R2), &'c ())>,
  372. }
  373. impl<'a, 'c, P1, P2, F, R1, R2> Parser<'a, 'c, R2> for Feed<'a, 'c, P1, P2, F, R1, R2>
  374. where
  375. 'c: 'a,
  376. P1: Parser<'a, 'c, R1>,
  377. P2: Parser<'a, 'c, R2>,
  378. F: Fn(R1) -> P2,
  379. {
  380. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, R2> {
  381. let (input, context, first_result) = self.parser.parse(input, context)?;
  382. // We can now produce the second parser, and parse using that.
  383. let second_parser = (self.producer_fn)(first_result);
  384. second_parser.parse(input, context)
  385. }
  386. }
  387. /// Takes a number of parsers, and tries to apply each one to the input in order. Returns the
  388. /// result of the first one that succeeds, or fails if all of them fail.
  389. pub(crate) macro choice {
  390. () => {
  391. id().map(|()| Err(AmlError::WrongParser))
  392. },
  393. /*
  394. * The nice way of writing this would generate something like:
  395. * ```
  396. * $first_parser
  397. * $(
  398. * .or($other_parser)
  399. * )*
  400. * .or(id().map(|()| Err(AmlError::WrongParser)))
  401. * ```
  402. * This problem with this is that it generates enormous types that very easily break `rustc`'s type
  403. * limit, so writing large parsers with choice required some gymnastics, which sucks for everyone involved.
  404. *
  405. * Instead, we manually call each parser sequentially, checking its result to see if we should return, or try
  406. * the next parser. This generates worse code at the macro callsite, but is much easier for the compiler to
  407. * type-check (and so reduces the cost of pulling us in as a dependency as well as improving ergonomics).
  408. */
  409. ($($parser: expr),+) => {
  410. move |input, mut context| {
  411. $(
  412. match ($parser).parse(input, context) {
  413. Ok(parse_result) => return Ok(parse_result),
  414. Err((_, new_context, AmlError::WrongParser)) => { context = new_context; },
  415. Err((_, context, err)) => return Err((input, context, err)),
  416. }
  417. )+
  418. Err((input, context, AmlError::WrongParser))
  419. }
  420. }
  421. }
  422. /// This encapsulates an unfortunate hack we sometimes need to use, where the type checker gets
  423. /// caught in an infinite loop of parser types. This occurs when an object can indirectly contain
  424. /// itself, and so the parser type will contain its own type. This works by breaking the cycle of
  425. /// `impl Parser` chains that build up, by effectively creating a "concrete" closure type.
  426. ///
  427. /// You can try using this hack if you are writing a parser and end up with an error of the form:
  428. /// `error[E0275]: overflow evaluating the requirement 'impl Parser<{a type}>'
  429. /// help: consider adding a a '#![recursion_limit="128"] attribute to your crate`
  430. /// Note: Increasing the recursion limit will not fix the issue, as the cycle will just continue
  431. /// until you either hit the new recursion limit or `rustc` overflows its stack.
  432. pub(crate) macro make_parser_concrete($parser: expr) {
  433. |input, context| ($parser).parse(input, context)
  434. }
  435. /// Helper macro for use within `map_with_context` as an alternative to "trying" an expression.
  436. ///
  437. /// ### Example
  438. /// Problem: `expr?` won't work because the expected return type is `(Result<R, AmlError>, &mut AmlContext)`
  439. /// Solution: use `try_with_context!(context, expr)` instead.
  440. pub(crate) macro try_with_context($context: expr, $expr: expr) {
  441. match $expr {
  442. Ok(result) => result,
  443. Err(err) => return (Err(err), $context),
  444. }
  445. }
  446. #[cfg(test)]
  447. mod tests {
  448. use super::*;
  449. use crate::test_utils::*;
  450. #[test]
  451. fn test_take_n() {
  452. let mut context = make_test_context();
  453. check_err!(take_n(1).parse(&[], &mut context), AmlError::UnexpectedEndOfStream, &[]);
  454. check_err!(take_n(2).parse(&[0xf5], &mut context), AmlError::UnexpectedEndOfStream, &[0xf5]);
  455. check_ok!(take_n(1).parse(&[0xff], &mut context), &[0xff], &[]);
  456. check_ok!(take_n(1).parse(&[0xff, 0xf8], &mut context), &[0xff], &[0xf8]);
  457. check_ok!(take_n(2).parse(&[0xff, 0xf8], &mut context), &[0xff, 0xf8], &[]);
  458. }
  459. #[test]
  460. fn test_take_ux() {
  461. let mut context = make_test_context();
  462. check_err!(take_u16().parse(&[0x34], &mut context), AmlError::UnexpectedEndOfStream, &[0x34]);
  463. check_ok!(take_u16().parse(&[0x34, 0x12], &mut context), 0x1234, &[]);
  464. check_err!(take_u32().parse(&[0x34, 0x12], &mut context), AmlError::UnexpectedEndOfStream, &[0x34, 0x12]);
  465. check_ok!(take_u32().parse(&[0x34, 0x12, 0xf4, 0xc3, 0x3e], &mut context), 0xc3f41234, &[0x3e]);
  466. check_err!(take_u64().parse(&[0x34], &mut context), AmlError::UnexpectedEndOfStream, &[0x34]);
  467. check_ok!(
  468. take_u64().parse(&[0x34, 0x12, 0x35, 0x76, 0xd4, 0x43, 0xa3, 0xb6, 0xff, 0x00], &mut context),
  469. 0xb6a343d476351234,
  470. &[0xff, 0x00]
  471. );
  472. }
  473. }