Эх сурвалжийг харах

Implement label parser with new framework

Mark Rousskov 6 жил өмнө
parent
commit
cb57263c0b

+ 35 - 8
parser/src/command.rs

@@ -1,27 +1,54 @@
-use crate::token::{Error, Token, Tokenizer};
+use crate::error::Error;
+use crate::token::{Token, Tokenizer};
+
+pub mod label;
 
 pub fn find_commmand_start(input: &str, bot: &str) -> Option<usize> {
     input.find(&format!("@{}", bot))
 }
 
 #[derive(Debug)]
-pub enum Command {
-    Label(label::LabelCommand),
+pub enum Command<'a> {
+    Label(label::LabelCommand<'a>),
 }
 
-pub fn parse_command<'a>(input: &'a str, bot: &str) -> Result<Option<Command>, Error<'a>> {
+pub fn parse_command<'a>(input: &mut &'a str, bot: &str) -> Result<Option<Command<'a>>, Error<'a>> {
     let start = match find_commmand_start(input, bot) {
         Some(pos) => pos,
         None => return Ok(None),
     };
-    let input = &input[start..];
-    let mut tok = Tokenizer::new(input);
+    *input = &input[start..];
+    let mut tok = Tokenizer::new(&input);
     assert_eq!(
         tok.next_token().unwrap(),
         Some(Token::Word(&format!("@{}", bot)))
     );
 
-    let cmd = Command::Label;
+    let mut success = vec![];
+
+    {
+        let mut lc = tok.clone();
+        let res = label::LabelCommand::parse(&mut lc)?;
+        match res {
+            None => {}
+            Some(cmd) => {
+                // save tokenizer off
+                tok = lc;
+                success.push(Command::Label(cmd));
+            }
+        }
+    }
+
+    if success.len() > 1 {
+        panic!(
+            "succeeded parsing {:?} to multiple commands: {:?}",
+            input, success
+        );
+    }
+
+    // XXX: Check that command did not intersect with code block
+
+    *input = &input[tok.position()..];
 
-    Ok(Some(cmd))
+    Ok(success.pop())
 }

+ 225 - 0
parser/src/command/label.rs

@@ -0,0 +1,225 @@
+//! The labels command parser.
+//!
+//! This can parse arbitrary input, giving the list of labels added/removed.
+//!
+//! The grammar is as follows:
+//!
+//! ```text
+//! Command: `@bot modify labels:? to? <label-list>.`
+//!
+//! <label-list>:
+//!  - <label-delta>
+//!  - <label-delta> and <label-list>
+//!  - <label-delta>, <label-list>
+//!  - <label-delta>, and <label-list>
+//!
+//! <label-delta>:
+//!  - +<label>
+//!  - -<label>
+//!  this can start with a + or -, but then the only supported way of adding it
+//!  is with the previous two variants of this (i.e., ++label and -+label).
+//!  - <label>
+//!
+//! <label>: \S+
+//! ```
+
+use crate::error::Error;
+use crate::token::{Token, Tokenizer};
+#[cfg(test)]
+use std::error::Error as _;
+use std::fmt;
+
+#[derive(Debug)]
+pub struct LabelCommand<'a>(Vec<LabelDelta<'a>>);
+
+#[derive(Debug, PartialEq, Eq)]
+pub enum LabelDelta<'a> {
+    Add(Label<'a>),
+    Remove(Label<'a>),
+}
+
+#[derive(Debug, PartialEq, Eq, Copy, Clone)]
+pub struct Label<'a>(&'a str);
+
+#[derive(PartialEq, Eq, Debug)]
+pub enum ParseError {
+    EmptyLabel,
+    ExpectedLabelDelta,
+    MisleadingTo,
+    NoSeparator,
+}
+
+impl std::error::Error for ParseError {}
+
+impl fmt::Display for ParseError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            ParseError::EmptyLabel => write!(f, "empty label"),
+            ParseError::ExpectedLabelDelta => write!(f, "a label delta"),
+            ParseError::MisleadingTo => write!(f, "forbidden to, use +to"),
+            ParseError::NoSeparator => write!(f, "must have : or to as label starter"),
+        }
+    }
+}
+
+impl<'a> Label<'a> {
+    fn parse(input: &'a str) -> Result<Label<'a>, ParseError> {
+        if input.is_empty() {
+            Err(ParseError::EmptyLabel)
+        } else {
+            Ok(Label(input))
+        }
+    }
+
+    pub fn as_str(&self) -> &'a str {
+        self.0
+    }
+}
+
+impl<'a> std::ops::Deref for Label<'a> {
+    type Target = str;
+    fn deref(&self) -> &str {
+        self.0
+    }
+}
+
+impl<'a> LabelDelta<'a> {
+    fn parse(input: &mut Tokenizer<'a>) -> Result<LabelDelta<'a>, Error<'a>> {
+        let delta = match input.peek_token()? {
+            Some(Token::Word(delta)) => {
+                input.next_token()?;
+                delta
+            }
+            _ => {
+                return Err(input.error(ParseError::ExpectedLabelDelta));
+            }
+        };
+        if delta.starts_with('+') {
+            Ok(LabelDelta::Add(
+                Label::parse(&delta[1..]).map_err(|e| input.error(e))?,
+            ))
+        } else if delta.starts_with('-') {
+            Ok(LabelDelta::Remove(
+                Label::parse(&delta[1..]).map_err(|e| input.error(e))?,
+            ))
+        } else {
+            Ok(LabelDelta::Add(
+                Label::parse(delta).map_err(|e| input.error(e))?,
+            ))
+        }
+    }
+}
+
+#[test]
+fn delta_empty() {
+    let mut tok = Tokenizer::new("+ testing");
+    let err = LabelDelta::parse(&mut tok).unwrap_err();
+    assert_eq!(
+        err.source().unwrap().downcast_ref::<ParseError>(),
+        Some(&ParseError::EmptyLabel)
+    );
+    assert_eq!(err.position(), 1);
+}
+
+impl<'a> LabelCommand<'a> {
+    pub fn parse(input: &mut Tokenizer<'a>) -> Result<Option<Self>, Error<'a>> {
+        let mut toks = input.clone();
+        if let Some(Token::Word("modify")) = toks.next_token()? {
+            // continue
+        } else {
+            return Ok(None);
+        }
+        if let Some(Token::Word("labels")) = toks.next_token()? {
+            // continue
+        } else {
+            return Ok(None);
+        }
+        if let Some(Token::Colon) = toks.peek_token()? {
+            toks.next_token()?;
+        } else if let Some(Token::Word("to")) = toks.peek_token()? {
+            toks.next_token()?;
+        } else {
+            return Err(toks.error(ParseError::NoSeparator));
+        }
+        if let Some(Token::Word("to")) = toks.peek_token()? {
+            return Err(toks.error(ParseError::MisleadingTo));
+        }
+        // start parsing deltas
+        let mut deltas = Vec::new();
+        loop {
+            deltas.push(LabelDelta::parse(&mut toks)?);
+
+            // optional `, and` separator
+            if let Some(Token::Comma) = toks.peek_token()? {
+                toks.next_token()?;
+            }
+            if let Some(Token::Word("and")) = toks.peek_token()? {
+                toks.next_token()?;
+            }
+
+            if let Some(Token::Dot) = toks.peek_token()? {
+                toks.next_token()?;
+                *input = toks;
+                return Ok(Some(LabelCommand(deltas)));
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+fn parse<'a>(input: &'a str) -> Result<Option<Vec<LabelDelta<'a>>>, Error<'a>> {
+    let mut toks = Tokenizer::new(input);
+    Ok(LabelCommand::parse(&mut toks)?.map(|c| c.0))
+}
+
+#[test]
+fn parse_simple() {
+    assert_eq!(
+        parse("modify labels: +T-compiler -T-lang bug."),
+        Ok(Some(vec![
+            LabelDelta::Add(Label("T-compiler")),
+            LabelDelta::Remove(Label("T-lang")),
+            LabelDelta::Add(Label("bug")),
+        ]))
+    );
+}
+
+#[test]
+fn parse_leading_to_label() {
+    assert_eq!(
+        parse("modify labels: to -T-lang")
+            .unwrap_err()
+            .source()
+            .unwrap()
+            .downcast_ref(),
+        Some(&ParseError::MisleadingTo)
+    );
+}
+
+#[test]
+fn parse_no_label_paragraph() {
+    assert_eq!(
+        parse("modify labels yep; Labels do in fact exist but this is not a label paragraph.")
+            .unwrap_err()
+            .source()
+            .unwrap()
+            .downcast_ref(),
+        Some(&ParseError::NoSeparator)
+    );
+    assert_eq!(
+        parse("Labels do in fact exist but this is not a label paragraph."),
+        Ok(None),
+    );
+}
+
+#[test]
+fn parse_no_end() {
+    assert_eq!(
+        parse("modify labels to +T-compiler -T-lang bug")
+            .unwrap_err()
+            .source()
+            .unwrap()
+            .downcast_ref(),
+        Some(&ParseError::ExpectedLabelDelta),
+    );
+}

+ 41 - 0
parser/src/error.rs

@@ -0,0 +1,41 @@
+use std::error;
+use std::fmt;
+
+#[derive(Debug)]
+pub struct Error<'a> {
+    pub input: &'a str,
+    pub position: usize,
+    pub source: Box<dyn error::Error>,
+}
+
+impl<'a> PartialEq for Error<'a> {
+    fn eq(&self, other: &Self) -> bool {
+        self.input == other.input && self.position == other.position
+    }
+}
+
+impl<'a> error::Error for Error<'a> {
+    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
+        Some(&*self.source)
+    }
+}
+
+impl<'a> Error<'a> {
+    pub fn position(&self) -> usize {
+        self.position
+    }
+}
+
+impl<'a> fmt::Display for Error<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let space = 10;
+        let end = std::cmp::min(self.input.len(), self.position + space);
+        write!(
+            f,
+            "...{}|error: {} at >|{}...",
+            &self.input[self.position.saturating_sub(space)..self.position],
+            self.source,
+            &self.input[self.position..end],
+        )
+    }
+}

+ 1 - 2
parser/src/lib.rs

@@ -1,5 +1,4 @@
 pub mod code_block;
 pub mod command;
-//pub mod label;
-pub mod label {}
+pub mod error;
 pub mod token;

+ 24 - 28
parser/src/token.rs

@@ -1,3 +1,4 @@
+use crate::error::Error;
 use std::fmt;
 use std::iter::Peekable;
 use std::str::CharIndices;
@@ -14,19 +15,27 @@ pub enum Token<'a> {
     Word(&'a str),
 }
 
+impl fmt::Display for Token<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Token::Dot => write!(f, "."),
+            Token::Comma => write!(f, ","),
+            Token::Semi => write!(f, ";"),
+            Token::Exclamation => write!(f, "!"),
+            Token::Question => write!(f, "?"),
+            Token::Colon => write!(f, ":"),
+            Token::Quote(body) => write!(f, r#""{}""#, body),
+            Token::Word(word) => write!(f, "{}", word),
+        }
+    }
+}
+
 #[derive(Clone, Debug)]
 pub struct Tokenizer<'a> {
     input: &'a str,
     chars: Peekable<CharIndices<'a>>,
 }
 
-#[derive(Debug, Copy, Clone)]
-pub struct Error<'a> {
-    input: &'a str,
-    position: usize,
-    kind: ErrorKind,
-}
-
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub enum ErrorKind {
     UnterminatedString,
@@ -34,6 +43,8 @@ pub enum ErrorKind {
     RawString,
 }
 
+impl std::error::Error for ErrorKind {}
+
 impl fmt::Display for ErrorKind {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(
@@ -48,27 +59,12 @@ impl fmt::Display for ErrorKind {
     }
 }
 
+#[cfg(test)]
 impl<'a> Error<'a> {
-    pub fn kind(&self) -> ErrorKind {
-        self.kind
-    }
-
-    #[cfg(test)]
     fn position_and_kind(&self) -> (usize, ErrorKind) {
-        (self.position, self.kind)
-    }
-}
-
-impl<'a> fmt::Display for Error<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let space = 10;
-        let end = std::cmp::min(self.input.len(), self.position + space);
-        write!(
-            f,
-            "...{}|error: {} at >|{}...",
-            &self.input[self.position.saturating_sub(space)..self.position],
-            self.kind,
-            &self.input[self.position..end],
+        (
+            self.position,
+            *self.source.downcast_ref::<ErrorKind>().unwrap(),
         )
     }
 }
@@ -81,11 +77,11 @@ impl<'a> Tokenizer<'a> {
         }
     }
 
-    fn error(&mut self, kind: ErrorKind) -> Error<'a> {
+    pub fn error<T: 'static + std::error::Error>(&mut self, source: T) -> Error<'a> {
         Error {
             input: self.input,
             position: self.cur_pos(),
-            kind,
+            source: Box::new(source),
         }
     }