parser.rs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. use crate::{pkg_length::PkgLength, AmlContext, AmlError, AmlValue, DebugVerbosity};
  2. use alloc::vec::Vec;
  3. use core::{convert::TryInto, marker::PhantomData};
  4. use log::trace;
  5. /// This is the number of spaces added to indent a scope when printing parser debug messages.
  6. pub const INDENT_PER_SCOPE: usize = 2;
  7. impl AmlContext {
  8. /// This is used by the parser to provide debug comments about the current object, which are indented to the
  9. /// correct level for the current object. We most often need to print these comments from `map_with_context`s,
  10. /// so it's most convenient to have this method on `AmlContext`.
  11. pub(crate) fn comment(&self, verbosity: DebugVerbosity, message: &str) {
  12. if verbosity <= self.debug_verbosity {
  13. log::trace!("{:indent$}{}", "", message, indent = self.scope_indent);
  14. }
  15. }
  16. }
  17. #[derive(Debug)]
  18. pub enum Propagate {
  19. Err(AmlError),
  20. Return(AmlValue),
  21. }
  22. impl From<AmlError> for Propagate {
  23. fn from(error: AmlError) -> Self {
  24. Self::Err(error)
  25. }
  26. }
  27. pub type ParseResult<'a, 'c, R> =
  28. Result<(&'a [u8], &'c mut AmlContext, R), (&'a [u8], &'c mut AmlContext, Propagate)>;
  29. pub trait Parser<'a, 'c, R>: Sized
  30. where
  31. 'c: 'a,
  32. {
  33. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, R>;
  34. fn map<F, A>(self, map_fn: F) -> Map<'a, 'c, Self, F, R, A>
  35. where
  36. F: Fn(R) -> Result<A, Propagate>,
  37. {
  38. Map { parser: self, map_fn, _phantom: PhantomData }
  39. }
  40. fn map_with_context<F, A>(self, map_fn: F) -> MapWithContext<'a, 'c, Self, F, R, A>
  41. where
  42. F: Fn(R, &'c mut AmlContext) -> (Result<A, Propagate>, &'c mut AmlContext),
  43. {
  44. MapWithContext { parser: self, map_fn, _phantom: PhantomData }
  45. }
  46. fn discard_result(self) -> DiscardResult<'a, 'c, Self, R> {
  47. DiscardResult { parser: self, _phantom: PhantomData }
  48. }
  49. /// Try parsing with `self`. If it succeeds, return its result. If it returns `AmlError::WrongParser`, try
  50. /// parsing with `other`, returning the result of that parser in all cases. Other errors from the first
  51. /// parser are propagated without attempting the second parser. To chain more than two parsers using
  52. /// `or`, see the `choice!` macro.
  53. fn or<OtherParser>(self, other: OtherParser) -> Or<'a, 'c, Self, OtherParser, R>
  54. where
  55. OtherParser: Parser<'a, 'c, R>,
  56. {
  57. Or { p1: self, p2: other, _phantom: PhantomData }
  58. }
  59. fn then<NextParser, NextR>(self, next: NextParser) -> Then<'a, 'c, Self, NextParser, R, NextR>
  60. where
  61. NextParser: Parser<'a, 'c, NextR>,
  62. {
  63. Then { p1: self, p2: next, _phantom: PhantomData }
  64. }
  65. /// `feed` takes a function that takes the result of this parser (`self`) and creates another
  66. /// parser, which is then used to parse the next part of the stream. This sounds convoluted,
  67. /// but is useful for when the next parser's behaviour depends on a property of the result of
  68. /// the first (e.g. the first parser might parse a length `n`, and the second parser then
  69. /// consumes `n` bytes).
  70. fn feed<F, P2, R2>(self, producer_fn: F) -> Feed<'a, 'c, Self, P2, F, R, R2>
  71. where
  72. P2: Parser<'a, 'c, R2>,
  73. F: Fn(R) -> P2,
  74. {
  75. Feed { parser: self, producer_fn, _phantom: PhantomData }
  76. }
  77. }
  78. impl<'a, 'c, F, R> Parser<'a, 'c, R> for F
  79. where
  80. 'c: 'a,
  81. F: Fn(&'a [u8], &'c mut AmlContext) -> ParseResult<'a, 'c, R>,
  82. {
  83. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, R> {
  84. self(input, context)
  85. }
  86. }
  87. /// The identity parser - returns the stream and context unchanged. Useful for producing parsers
  88. /// that produce a result without parsing anything by doing: `id().map(|()| Ok(foo))`.
  89. pub fn id<'a, 'c>() -> impl Parser<'a, 'c, ()>
  90. where
  91. 'c: 'a,
  92. {
  93. move |input: &'a [u8], context: &'c mut AmlContext| Ok((input, context, ()))
  94. }
  95. pub fn take<'a, 'c>() -> impl Parser<'a, 'c, u8>
  96. where
  97. 'c: 'a,
  98. {
  99. move |input: &'a [u8], context: &'c mut AmlContext| match input.first() {
  100. Some(&byte) => Ok((&input[1..], context, byte)),
  101. None => Err((input, context, Propagate::Err(AmlError::UnexpectedEndOfStream))),
  102. }
  103. }
  104. pub fn take_u16<'a, 'c>() -> impl Parser<'a, 'c, u16>
  105. where
  106. 'c: 'a,
  107. {
  108. move |input: &'a [u8], context: &'c mut AmlContext| {
  109. if input.len() < 2 {
  110. return Err((input, context, Propagate::Err(AmlError::UnexpectedEndOfStream)));
  111. }
  112. Ok((&input[2..], context, u16::from_le_bytes(input[0..2].try_into().unwrap())))
  113. }
  114. }
  115. pub fn take_u32<'a, 'c>() -> impl Parser<'a, 'c, u32>
  116. where
  117. 'c: 'a,
  118. {
  119. move |input: &'a [u8], context: &'c mut AmlContext| {
  120. if input.len() < 4 {
  121. return Err((input, context, Propagate::Err(AmlError::UnexpectedEndOfStream)));
  122. }
  123. Ok((&input[4..], context, u32::from_le_bytes(input[0..4].try_into().unwrap())))
  124. }
  125. }
  126. pub fn take_u64<'a, 'c>() -> impl Parser<'a, 'c, u64>
  127. where
  128. 'c: 'a,
  129. {
  130. move |input: &'a [u8], context: &'c mut AmlContext| {
  131. if input.len() < 8 {
  132. return Err((input, context, Propagate::Err(AmlError::UnexpectedEndOfStream)));
  133. }
  134. Ok((&input[8..], context, u64::from_le_bytes(input[0..8].try_into().unwrap())))
  135. }
  136. }
  137. pub fn take_n<'a, 'c>(n: u32) -> impl Parser<'a, 'c, &'a [u8]>
  138. where
  139. 'c: 'a,
  140. {
  141. move |input: &'a [u8], context| {
  142. if (input.len() as u32) < n {
  143. return Err((input, context, Propagate::Err(AmlError::UnexpectedEndOfStream)));
  144. }
  145. let (result, new_input) = input.split_at(n as usize);
  146. Ok((new_input, context, result))
  147. }
  148. }
  149. pub fn take_to_end_of_pkglength<'a, 'c>(length: PkgLength) -> impl Parser<'a, 'c, &'a [u8]>
  150. where
  151. 'c: 'a,
  152. {
  153. move |input: &'a [u8], context| {
  154. /*
  155. * TODO: fuzzing manages to find PkgLengths that correctly parse during construction, but later crash here.
  156. * I would've thought we would pick up all invalid lengths there, so have a look at why this is needed.
  157. */
  158. let bytes_to_take = match (input.len() as u32).checked_sub(length.end_offset) {
  159. Some(bytes_to_take) => bytes_to_take,
  160. None => return Err((input, context, Propagate::Err(AmlError::InvalidPkgLength))),
  161. };
  162. take_n(bytes_to_take).parse(input, context)
  163. }
  164. }
  165. pub fn n_of<'a, 'c, P, R>(parser: P, n: usize) -> impl Parser<'a, 'c, Vec<R>>
  166. where
  167. 'c: 'a,
  168. P: Parser<'a, 'c, R>,
  169. {
  170. // TODO: can we write this more nicely?
  171. move |mut input, mut context| {
  172. let mut results = Vec::with_capacity(n);
  173. for _ in 0..n {
  174. let (new_input, new_context, result) = match parser.parse(input, context) {
  175. Ok((input, context, result)) => (input, context, result),
  176. Err((_, context, propagate)) => return Err((input, context, propagate)),
  177. };
  178. results.push(result);
  179. input = new_input;
  180. context = new_context;
  181. }
  182. Ok((input, context, results))
  183. }
  184. }
  185. pub fn take_while<'a, 'c, P, R>(parser: P) -> impl Parser<'a, 'c, usize>
  186. where
  187. 'c: 'a,
  188. P: Parser<'a, 'c, R>,
  189. {
  190. move |mut input: &'a [u8], mut context: &'c mut AmlContext| {
  191. let mut num_passed = 0;
  192. loop {
  193. match parser.parse(input, context) {
  194. Ok((new_input, new_context, _)) => {
  195. input = new_input;
  196. context = new_context;
  197. num_passed += 1;
  198. }
  199. Err((_, context, Propagate::Err(AmlError::WrongParser))) => {
  200. return Ok((input, context, num_passed))
  201. }
  202. Err((_, context, err)) => return Err((input, context, err)),
  203. }
  204. }
  205. }
  206. }
  207. pub fn consume<'a, 'c, F>(condition: F) -> impl Parser<'a, 'c, u8>
  208. where
  209. 'c: 'a,
  210. F: Fn(u8) -> bool,
  211. {
  212. move |input: &'a [u8], context: &'c mut AmlContext| match input.first() {
  213. Some(&byte) if condition(byte) => Ok((&input[1..], context, byte)),
  214. Some(&byte) => Err((input, context, Propagate::Err(AmlError::UnexpectedByte(byte)))),
  215. None => Err((input, context, Propagate::Err(AmlError::UnexpectedEndOfStream))),
  216. }
  217. }
  218. pub fn comment_scope<'a, 'c, P, R>(
  219. verbosity: DebugVerbosity,
  220. scope_name: &'a str,
  221. parser: P,
  222. ) -> impl Parser<'a, 'c, R>
  223. where
  224. 'c: 'a,
  225. R: core::fmt::Debug,
  226. P: Parser<'a, 'c, R>,
  227. {
  228. move |input, context: &'c mut AmlContext| {
  229. if verbosity <= context.debug_verbosity {
  230. trace!("{:indent$}--> {}", "", scope_name, indent = context.scope_indent);
  231. context.scope_indent += INDENT_PER_SCOPE;
  232. }
  233. // Return if the parse fails, so we don't print the tail. Makes it easier to debug.
  234. let (new_input, context, result) = parser.parse(input, context)?;
  235. if verbosity <= context.debug_verbosity {
  236. context.scope_indent -= INDENT_PER_SCOPE;
  237. trace!("{:indent$}<-- {}", "", scope_name, indent = context.scope_indent);
  238. }
  239. Ok((new_input, context, result))
  240. }
  241. }
  242. pub struct Or<'a, 'c, P1, P2, R>
  243. where
  244. 'c: 'a,
  245. P1: Parser<'a, 'c, R>,
  246. P2: Parser<'a, 'c, R>,
  247. {
  248. p1: P1,
  249. p2: P2,
  250. _phantom: PhantomData<(&'a R, &'c ())>,
  251. }
  252. impl<'a, 'c, P1, P2, R> Parser<'a, 'c, R> for Or<'a, 'c, P1, P2, R>
  253. where
  254. 'c: 'a,
  255. P1: Parser<'a, 'c, R>,
  256. P2: Parser<'a, 'c, R>,
  257. {
  258. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, R> {
  259. match self.p1.parse(input, context) {
  260. Ok(parse_result) => Ok(parse_result),
  261. Err((_, context, Propagate::Err(AmlError::WrongParser))) => self.p2.parse(input, context),
  262. Err((_, context, err)) => Err((input, context, err)),
  263. }
  264. }
  265. }
  266. pub struct Map<'a, 'c, P, F, R, A>
  267. where
  268. 'c: 'a,
  269. P: Parser<'a, 'c, R>,
  270. F: Fn(R) -> Result<A, Propagate>,
  271. {
  272. parser: P,
  273. map_fn: F,
  274. _phantom: PhantomData<(&'a (R, A), &'c ())>,
  275. }
  276. impl<'a, 'c, P, F, R, A> Parser<'a, 'c, A> for Map<'a, 'c, P, F, R, A>
  277. where
  278. 'c: 'a,
  279. P: Parser<'a, 'c, R>,
  280. F: Fn(R) -> Result<A, Propagate>,
  281. {
  282. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, A> {
  283. match self.parser.parse(input, context) {
  284. Ok((new_input, context, result)) => match (self.map_fn)(result) {
  285. Ok(result_value) => Ok((new_input, context, result_value)),
  286. Err(err) => Err((input, context, err)),
  287. },
  288. Err(result) => Err(result),
  289. }
  290. }
  291. }
  292. pub struct MapWithContext<'a, 'c, P, F, R, A>
  293. where
  294. 'c: 'a,
  295. P: Parser<'a, 'c, R>,
  296. F: Fn(R, &'c mut AmlContext) -> (Result<A, Propagate>, &'c mut AmlContext),
  297. {
  298. parser: P,
  299. map_fn: F,
  300. _phantom: PhantomData<(&'a (R, A), &'c ())>,
  301. }
  302. impl<'a, 'c, P, F, R, A> Parser<'a, 'c, A> for MapWithContext<'a, 'c, P, F, R, A>
  303. where
  304. 'c: 'a,
  305. P: Parser<'a, 'c, R>,
  306. F: Fn(R, &'c mut AmlContext) -> (Result<A, Propagate>, &'c mut AmlContext),
  307. {
  308. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, A> {
  309. match self.parser.parse(input, context) {
  310. Ok((new_input, context, result)) => match (self.map_fn)(result, context) {
  311. (Ok(result_value), context) => Ok((new_input, context, result_value)),
  312. (Err(err), context) => Err((input, context, err)),
  313. },
  314. Err(result) => Err(result),
  315. }
  316. }
  317. }
  318. pub struct DiscardResult<'a, 'c, P, R>
  319. where
  320. 'c: 'a,
  321. P: Parser<'a, 'c, R>,
  322. {
  323. parser: P,
  324. _phantom: PhantomData<(&'a R, &'c ())>,
  325. }
  326. impl<'a, 'c, P, R> Parser<'a, 'c, ()> for DiscardResult<'a, 'c, P, R>
  327. where
  328. 'c: 'a,
  329. P: Parser<'a, 'c, R>,
  330. {
  331. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, ()> {
  332. self.parser.parse(input, context).map(|(new_input, new_context, _)| (new_input, new_context, ()))
  333. }
  334. }
  335. pub struct Then<'a, 'c, P1, P2, R1, R2>
  336. where
  337. 'c: 'a,
  338. P1: Parser<'a, 'c, R1>,
  339. P2: Parser<'a, 'c, R2>,
  340. {
  341. p1: P1,
  342. p2: P2,
  343. _phantom: PhantomData<(&'a (R1, R2), &'c ())>,
  344. }
  345. impl<'a, 'c, P1, P2, R1, R2> Parser<'a, 'c, (R1, R2)> for Then<'a, 'c, P1, P2, R1, R2>
  346. where
  347. 'c: 'a,
  348. P1: Parser<'a, 'c, R1>,
  349. P2: Parser<'a, 'c, R2>,
  350. {
  351. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, (R1, R2)> {
  352. self.p1.parse(input, context).and_then(|(next_input, context, result_a)| {
  353. self.p2
  354. .parse(next_input, context)
  355. .map(|(final_input, context, result_b)| (final_input, context, (result_a, result_b)))
  356. })
  357. }
  358. }
  359. pub struct Feed<'a, 'c, P1, P2, F, R1, R2>
  360. where
  361. 'c: 'a,
  362. P1: Parser<'a, 'c, R1>,
  363. P2: Parser<'a, 'c, R2>,
  364. F: Fn(R1) -> P2,
  365. {
  366. parser: P1,
  367. producer_fn: F,
  368. _phantom: PhantomData<(&'a (R1, R2), &'c ())>,
  369. }
  370. impl<'a, 'c, P1, P2, F, R1, R2> Parser<'a, 'c, R2> for Feed<'a, 'c, P1, P2, F, R1, R2>
  371. where
  372. 'c: 'a,
  373. P1: Parser<'a, 'c, R1>,
  374. P2: Parser<'a, 'c, R2>,
  375. F: Fn(R1) -> P2,
  376. {
  377. fn parse(&self, input: &'a [u8], context: &'c mut AmlContext) -> ParseResult<'a, 'c, R2> {
  378. let (input, context, first_result) = self.parser.parse(input, context)?;
  379. // We can now produce the second parser, and parse using that.
  380. let second_parser = (self.producer_fn)(first_result);
  381. second_parser.parse(input, context)
  382. }
  383. }
  384. /// Takes a number of parsers, and tries to apply each one to the input in order. Returns the
  385. /// result of the first one that succeeds, or fails if all of them fail.
  386. pub(crate) macro choice {
  387. () => {
  388. id().map(|()| Err(AmlError::WrongParser))
  389. },
  390. /*
  391. * The nice way of writing this would be something like:
  392. * ```
  393. * $first_parser
  394. * $(
  395. * .or($other_parser)
  396. * )*
  397. * .or(id().map(|()| Err(AmlError::WrongParser)))
  398. * ```
  399. * This problem with this is that it generates enormous types that very easily break `rustc`'s type
  400. * limit, so writing large parsers with choice required some gymnastics, which sucks for everyone involved.
  401. *
  402. * Instead, we manually call each parser sequentially, checking its result to see if we should return, or try
  403. * the next parser. This generates worse code at the macro callsite, but is much easier for the compiler to
  404. * type-check (and so reduces the cost of pulling us in as a dependency as well as improving ergonomics).
  405. */
  406. ($($parser: expr),+) => {
  407. move |input, context| {
  408. $(
  409. let context = match ($parser).parse(input, context) {
  410. Ok(parse_result) => return Ok(parse_result),
  411. Err((_, new_context, Propagate::Err(AmlError::WrongParser))) => new_context,
  412. Err((_, context, propagate)) => return Err((input, context, propagate)),
  413. };
  414. )+
  415. Err((input, context, Propagate::Err(AmlError::WrongParser)))
  416. }
  417. }
  418. }
  419. /// This encapsulates an unfortunate hack we sometimes need to use, where the type checker gets
  420. /// caught in an infinite loop of parser types. This occurs when an object can indirectly contain
  421. /// itself, and so the parser type will contain its own type. This works by breaking the cycle of
  422. /// `impl Parser` chains that build up, by effectively creating a "concrete" closure type.
  423. ///
  424. /// You can try using this hack if you are writing a parser and end up with an error of the form:
  425. /// `error[E0275]: overflow evaluating the requirement 'impl Parser<{a type}>'
  426. /// help: consider adding a a '#![recursion_limit="128"] attribute to your crate`
  427. /// Note: Increasing the recursion limit will not fix the issue, as the cycle will just continue
  428. /// until you either hit the new recursion limit or `rustc` overflows its stack.
  429. pub(crate) macro make_parser_concrete($parser: expr) {
  430. |input, context| ($parser).parse(input, context)
  431. }
  432. /// Helper macro for use within `map_with_context` as an alternative to "trying" an expression.
  433. ///
  434. /// ### Example
  435. /// Problem: `expr?` won't work because the expected return type is `(Result<R, AmlError>, &mut AmlContext)`
  436. /// Solution: use `try_with_context!(context, expr)` instead.
  437. pub(crate) macro try_with_context($context: expr, $expr: expr) {
  438. match $expr {
  439. Ok(result) => result,
  440. Err(err) => return (Err(Propagate::Err(err)), $context),
  441. }
  442. }
  443. #[cfg(test)]
  444. mod tests {
  445. use super::*;
  446. use crate::test_utils::*;
  447. #[test]
  448. fn test_take_n() {
  449. let mut context = make_test_context();
  450. check_err!(take_n(1).parse(&[], &mut context), AmlError::UnexpectedEndOfStream, &[]);
  451. check_err!(take_n(2).parse(&[0xf5], &mut context), AmlError::UnexpectedEndOfStream, &[0xf5]);
  452. check_ok!(take_n(1).parse(&[0xff], &mut context), &[0xff], &[]);
  453. check_ok!(take_n(1).parse(&[0xff, 0xf8], &mut context), &[0xff], &[0xf8]);
  454. check_ok!(take_n(2).parse(&[0xff, 0xf8], &mut context), &[0xff, 0xf8], &[]);
  455. }
  456. #[test]
  457. fn test_take_ux() {
  458. let mut context = make_test_context();
  459. check_err!(take_u16().parse(&[0x34], &mut context), AmlError::UnexpectedEndOfStream, &[0x34]);
  460. check_ok!(take_u16().parse(&[0x34, 0x12], &mut context), 0x1234, &[]);
  461. check_err!(take_u32().parse(&[0x34, 0x12], &mut context), AmlError::UnexpectedEndOfStream, &[0x34, 0x12]);
  462. check_ok!(take_u32().parse(&[0x34, 0x12, 0xf4, 0xc3, 0x3e], &mut context), 0xc3f41234, &[0x3e]);
  463. check_err!(take_u64().parse(&[0x34], &mut context), AmlError::UnexpectedEndOfStream, &[0x34]);
  464. check_ok!(
  465. take_u64().parse(&[0x34, 0x12, 0x35, 0x76, 0xd4, 0x43, 0xa3, 0xb6, 0xff, 0x00], &mut context),
  466. 0xb6a343d476351234,
  467. &[0xff, 0x00]
  468. );
  469. }
  470. }