ignore_block.rs 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. use pulldown_cmark::{Event, Parser, Tag};
  2. use std::ops::Range;
  3. #[derive(Debug)]
  4. pub struct IgnoreBlocks {
  5. ignore: Vec<Range<usize>>,
  6. }
  7. impl IgnoreBlocks {
  8. pub fn new(s: &str) -> IgnoreBlocks {
  9. let mut ignore = Vec::new();
  10. let mut parser = Parser::new(s).into_offset_iter();
  11. while let Some((event, range)) = parser.next() {
  12. if let Event::Start(Tag::CodeBlock(_)) = event {
  13. let start = range.start;
  14. while let Some((event, range)) = parser.next() {
  15. if let Event::End(Tag::CodeBlock(_)) = event {
  16. ignore.push(start..range.end);
  17. break;
  18. }
  19. }
  20. } else if let Event::Start(Tag::BlockQuote) = event {
  21. let start = range.start;
  22. let mut count = 1;
  23. while let Some((event, range)) = parser.next() {
  24. if let Event::Start(Tag::BlockQuote) = event {
  25. count += 1;
  26. } else if let Event::End(Tag::BlockQuote) = event {
  27. count -= 1;
  28. if count == 0 {
  29. ignore.push(start..range.end);
  30. break;
  31. }
  32. }
  33. }
  34. } else if let Event::Code(_) = event {
  35. ignore.push(range);
  36. }
  37. }
  38. IgnoreBlocks { ignore }
  39. }
  40. pub fn overlaps_ignore(&self, region: Range<usize>) -> Option<Range<usize>> {
  41. for ignore in &self.ignore {
  42. // See https://stackoverflow.com/questions/3269434.
  43. if ignore.start <= region.end && region.start <= ignore.end {
  44. return Some(ignore.clone());
  45. }
  46. }
  47. None
  48. }
  49. }
  50. #[cfg(test)]
  51. #[derive(Debug, PartialEq, Eq)]
  52. enum Ignore<'a> {
  53. Yes(&'a str),
  54. No(&'a str),
  55. }
  56. #[cfg(test)]
  57. fn bodies(s: &str) -> Vec<Ignore<'_>> {
  58. let mut bodies = Vec::new();
  59. let cbs = IgnoreBlocks::new(s);
  60. let mut previous = 0..0;
  61. for range in &cbs.ignore {
  62. let range = range.clone();
  63. if previous.end != range.start {
  64. bodies.push(Ignore::No(&s[previous.end..range.start]));
  65. }
  66. bodies.push(Ignore::Yes(&s[range.clone()]));
  67. previous = range.clone();
  68. }
  69. if let Some(range) = cbs.ignore.last() {
  70. if range.end != s.len() {
  71. bodies.push(Ignore::No(&s[range.end..]));
  72. }
  73. }
  74. bodies
  75. }
  76. #[test]
  77. fn cbs_1() {
  78. assert_eq!(
  79. bodies("`hey you`bar me too"),
  80. [Ignore::Yes("`hey you`"), Ignore::No("bar me too")]
  81. );
  82. }
  83. #[test]
  84. fn cbs_2() {
  85. assert_eq!(
  86. bodies("`hey you` <b>me too</b>"),
  87. [Ignore::Yes("`hey you`"), Ignore::No(" <b>me too</b>")]
  88. );
  89. }
  90. #[test]
  91. fn cbs_3() {
  92. assert_eq!(
  93. bodies(r"`hey you\` <b>`me too</b>"),
  94. [Ignore::Yes(r"`hey you\`"), Ignore::No(" <b>`me too</b>")]
  95. );
  96. }
  97. #[test]
  98. fn cbs_4() {
  99. assert_eq!(
  100. bodies(
  101. "
  102. ```language_spec
  103. testing
  104. ```
  105. nope
  106. "
  107. ),
  108. [
  109. Ignore::No("\n"),
  110. Ignore::Yes("```language_spec\ntesting\n```"),
  111. Ignore::No("\n\nnope\n")
  112. ],
  113. );
  114. }
  115. #[test]
  116. fn cbs_5() {
  117. assert_eq!(
  118. bodies(concat!(
  119. "
  120. ``` tag_after_space
  121. testing
  122. ```",
  123. " "
  124. )),
  125. [
  126. Ignore::No("\n"),
  127. Ignore::Yes("``` tag_after_space\ntesting\n``` "),
  128. ],
  129. );
  130. }
  131. #[test]
  132. fn cbs_6() {
  133. assert_eq!(
  134. bodies(
  135. "
  136. this is indented
  137. this is indented too
  138. "
  139. ),
  140. [
  141. Ignore::No("\n "),
  142. Ignore::Yes("this is indented\n this is indented too\n"),
  143. ],
  144. );
  145. }
  146. #[test]
  147. fn cbs_7() {
  148. assert_eq!(
  149. bodies(
  150. "
  151. ```
  152. testing unclosed
  153. "
  154. ),
  155. [Ignore::No("\n"), Ignore::Yes("```\ntesting unclosed\n"),],
  156. );
  157. }
  158. #[test]
  159. fn cbs_8() {
  160. assert_eq!(
  161. bodies("`one` not `two`"),
  162. [
  163. Ignore::Yes("`one`"),
  164. Ignore::No(" not "),
  165. Ignore::Yes("`two`")
  166. ]
  167. );
  168. }
  169. #[test]
  170. fn cbs_9() {
  171. assert_eq!(
  172. bodies(
  173. "
  174. some text
  175. > testing citations
  176. still in citation
  177. more text
  178. "
  179. ),
  180. [
  181. Ignore::No("\nsome text\n"),
  182. Ignore::Yes("> testing citations\nstill in citation\n"),
  183. Ignore::No("\nmore text\n")
  184. ],
  185. );
  186. }
  187. #[test]
  188. fn cbs_10() {
  189. assert_eq!(
  190. bodies(
  191. "
  192. # abc
  193. > multiline
  194. > citation
  195. lorem ipsum
  196. "
  197. ),
  198. [
  199. Ignore::No("\n# abc\n\n"),
  200. Ignore::Yes("> multiline\n> citation\n"),
  201. Ignore::No("\nlorem ipsum\n")
  202. ],
  203. );
  204. }
  205. #[test]
  206. fn cbs_11() {
  207. assert_eq!(
  208. bodies(
  209. "
  210. > some
  211. > > nested
  212. > citations
  213. "
  214. ),
  215. [
  216. Ignore::No("\n"),
  217. Ignore::Yes("> some\n> > nested\n> citations\n"),
  218. ],
  219. );
  220. }