From 924b10b0cd6c09fa7afa58e6f7a068646d1e56af Mon Sep 17 00:00:00 2001 From: Joris Date: Mon, 14 Oct 2024 22:30:47 +0200 Subject: Use nom to parse lines --- src/deck.rs | 73 ++++++---------- src/main.rs | 6 +- src/parser.rs | 206 ++++++++++++++++++++++++++++++++++++++++++++++ src/sync.rs | 10 +-- src/util/serialization.rs | 5 +- 5 files changed, 242 insertions(+), 58 deletions(-) create mode 100644 src/parser.rs (limited to 'src') diff --git a/src/deck.rs b/src/deck.rs index 4491d9b..3fff77f 100644 --- a/src/deck.rs +++ b/src/deck.rs @@ -1,4 +1,5 @@ -use crate::{model::Line, util::serialization}; +use crate::model::Line; +use crate::parser; use anyhow::{Error, Result}; use std::fmt; use std::fs::File; @@ -7,13 +8,17 @@ use std::path::Path; #[derive(Debug, Clone)] struct ParseError { - line: usize, - message: String, + line: String, + line_number: usize, } impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "Line {}: {}", self.line, self.message) + write!( + f, + "Error parsing line {}:\n\n{}", + self.line_number, self.line + ) } } @@ -40,39 +45,12 @@ pub fn read_file(path: &str) -> Result> { } fn read_line(index: usize, line: &str) -> Result> { - let line = line.trim(); - - if line.starts_with('#') || line.is_empty() { - Ok(None) - } else if !line.starts_with('-') { - Err(Error::from(ParseError { - line: index + 1, - message: "an entry should starts with “-”.".to_string(), - })) - } else { - let without_minus = line.split('-').skip(1).collect::>().join("-"); - let without_comment = without_minus.split('#').collect::>()[0].trim(); - let translation = without_comment.split(':').collect::>(); - if translation.len() != 2 { - Err(Error::from(ParseError { - line: index + 1, - message: "an entry should contain two parts separated by “:”.".to_string(), - })) - } else { - let t1 = translation[0].trim(); - let t2 = translation[1].trim(); - if t1.is_empty() || t2.is_empty() { - Err(Error::from(ParseError { - line: index + 1, - message: "an entry should contain two parts separated by “:”.".to_string(), - })) - } else { - Ok(Some(Line { - part_1: serialization::line_to_words(t1), - part_2: serialization::line_to_words(t2), - })) - } - } + match parser::parse_line(line) { + Ok(("", line)) => Ok(line), + _ => Err(Error::from(ParseError { + line_number: index + 1, + line: line.to_string(), + })), } } @@ -98,12 +76,11 @@ pub mod tests { #[test] fn errors() { - is_error("A : a"); - is_error("- A"); - is_error("- A -> a"); - is_error("- A : B : C"); - is_error("- : "); - is_error("- A : a\n-") + is_error("A"); + is_error("A -> a"); + is_error("A : B : C"); + is_error(":"); + is_error("A : a\n-") } #[test] @@ -117,19 +94,19 @@ pub mod tests { #[test] fn card() { - check("- A : a", &[(&["A"], &["a"])]); + check("A : a", &[(&["A"], &["a"])]); } #[test] fn cards() { - check("- A : a\n- B : b", &[(&["A"], &["a"]), (&["B"], &["b"])]); + check("A : a\nB : b", &[(&["A"], &["a"]), (&["B"], &["b"])]); } #[test] fn alternatives() { - check("- A : a1 | a2", &[(&["A"], &["a1", "a2"])]); - check("- A1 | A2 : a", &[(&["A1", "A2"], &["a"])]); - check("- A1 | A2 : a1 | a2", &[(&["A1", "A2"], &["a1", "a2"])]); + check("A : a1 | a2", &[(&["A"], &["a1", "a2"])]); + check("A1 | A2 : a", &[(&["A1", "A2"], &["a"])]); + check("A1 | A2 : a1 | a2", &[(&["A1", "A2"], &["a1", "a2"])]); } fn is_error(content: &str) { diff --git a/src/main.rs b/src/main.rs index ebc2d7e..3454897 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ mod db; mod deck; mod gui; mod model; +mod parser; mod space_repetition; mod sync; mod util; @@ -47,13 +48,10 @@ fn main() -> Result<()> { gui::message::show(&mut term, &deck_name, &format!("{msg}"), true)? } } - }, - Err(msg) => { - gui::message::show(&mut term, &deck_name, &format!("{msg}"), true)? } + Err(msg) => gui::message::show(&mut term, &deck_name, &format!("{msg}"), true)?, } - gui::restore_terminal(&mut term) } diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..62df908 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,206 @@ +use crate::model::Line; +use nom::{ + bytes::complete::{is_not, take, take_until}, + character::complete::{char, space0}, + combinator::{opt, peek}, + error::{Error, ErrorKind}, + multi::separated_list1, + sequence::delimited, + IResult, +}; + +// Temporary API +// Only expose parse_next ATM, so that tracking line number is done outside of the parser. + +pub fn parse_line(s: &str) -> IResult<&str, Option> { + let (s, _) = space0(s)?; + let (s, line) = opt(parse_parts)(s)?; + let (s, _) = parse_end_of_line(s)?; + Ok((s, line)) +} + +fn parse_parts(s: &str) -> IResult<&str, Line> { + let (s, part_1) = parse_options(s)?; + let (s, _) = sep(':')(s)?; + let (s, part_2) = parse_options(s)?; + Ok((s, Line { part_1, part_2 })) +} + +// Rest + +fn parse_options(s: &str) -> IResult<&str, Vec> { + Ok(separated_list1(sep('|'), parse_term)(s)?) +} + +pub fn parse_term(s: &str) -> IResult<&str, String> { + let mut term = String::from(""); + let mut s = s; + + loop { + match peek(take::(1 as usize))(s) { + Ok((_, c)) => { + if c == "[" { + let (s1, cs) = take_until("]")(s)?; + s = s1; + term.push_str(cs); + } else if c == "(" { + let (s1, cs) = take_until(")")(s)?; + s = s1; + term.push_str(cs); + } else if c == ":" || c == "|" || c == "#" || c == "\n" || c == "\r" { + break; + } else { + let (s1, cs) = is_not("[(:|#\n\r")(s)?; + s = s1; + term.push_str(cs); + } + } + _ => break, + } + } + + if term.is_empty() { + Err(nom::Err::Error(Error { + input: s, + code: ErrorKind::Fail, + })) + } else { + Ok((s, term.trim().to_string())) + } +} + +fn parse_end_of_line(s: &str) -> IResult<&str, ()> { + let (s, _) = space0(s)?; + let (s, _) = opt(parse_comment)(s)?; + Ok((s, ())) +} + +fn parse_comment(s: &str) -> IResult<&str, ()> { + let (s, _) = char('#')(s)?; + let (s, _) = is_not("\n\r")(s)?; + Ok((s, ())) +} + +// Helpers + +pub fn sep(c: char) -> impl FnMut(&str) -> IResult<&str, ()> { + move |s: &str| { + let (s, _) = delimited(space0, char(c), space0)(s)?; + Ok((s, ())) + } +} + +// Tests + +#[cfg(test)] +mod tests { + + use super::*; + use nom::{character::complete::newline, multi::many0}; + + fn parse(s: &str) -> IResult<&str, Vec> { + let (s, lines) = many0(parse_next)(s)?; + let (s, _) = many0(parse_empty_line)(s)?; + Ok((s, lines)) + } + + fn parse_next(s: &str) -> IResult<&str, Line> { + let (s, _) = many0(parse_empty_line)(s)?; + let (s, _) = space0(s)?; + let (s, part_1) = parse_options(s)?; + let (s, _) = sep(':')(s)?; + let (s, part_2) = parse_options(s)?; + let (s, _) = parse_end_of_line(s)?; + let (s, _) = opt(newline)(s)?; + Ok((s, Line { part_1, part_2 })) + } + + fn parse_empty_line(s: &str) -> IResult<&str, ()> { + let (s, _) = parse_end_of_line(s)?; + let (s, _) = newline(s)?; + Ok((s, ())) + } + + #[test] + fn simple() { + assert_eq!(parse("foo : bar"), lines(vec!((vec!("foo"), vec!("bar"))))) + } + + #[test] + fn spaces() { + assert_eq!( + parse(" foo : bar "), + lines(vec!((vec!("foo"), vec!("bar")))) + ) + } + + #[test] + fn comments() { + assert_eq!( + parse("foo : bar # This is a comment"), + lines(vec!((vec!("foo"), vec!("bar")))) + ) + } + + #[test] + fn options() { + assert_eq!( + parse("foo | bar | baz : A | B | C"), + lines(vec!((vec!("foo", "bar", "baz"), vec!("A", "B", "C")))) + ) + } + + #[test] + fn term_with_spaces() { + assert_eq!( + parse("foo bar : baz baz"), + lines(vec!((vec!("foo bar"), vec!("baz baz")))) + ) + } + + #[test] + fn paren() { + assert_eq!( + parse("foo (|:[) : bar [::|)]"), + lines(vec!((vec!("foo (|:[)"), vec!("bar [::|)]")))) + ) + } + + #[test] + fn empty_lines() { + assert_eq!(parse("\n \n \n# Hello\n # Test\n\n\n"), lines(vec!())) + } + + #[test] + fn multi_lines() { + assert_eq!( + parse("foo : FOO\nbar : BAR\nbaz : BAZ"), + lines(vec!( + (vec!("foo"), vec!("FOO")), + (vec!("bar"), vec!("BAR")), + (vec!("baz"), vec!("BAZ")) + )) + ) + } + + // Helpers + + static EMPTY_STRING: &str = ""; + + fn lines(lines: Vec<(Vec<&str>, Vec<&str>)>) -> IResult<&'static str, Vec> { + Ok(( + EMPTY_STRING, + lines + .into_iter() + .map(|line| to_line(line.0, line.1)) + .collect(), + )) + } + + fn to_line(part_1: Vec<&str>, part_2: Vec<&str>) -> Line { + Line { + part_1: part_1.into_iter().map(|s| s.to_string()).collect(), + part_2: part_2.into_iter().map(|s| s.to_string()).collect(), + } + } +} diff --git a/src/sync.rs b/src/sync.rs index 6e3d84b..974e838 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -113,7 +113,7 @@ mod tests { #[test] fn test_added() { - let diff = deck_diff("- A : a", "- A : a\n- B : b"); + let diff = deck_diff("A : a", "A : a\nB : b"); has_questions(diff.new, vec![("B", vec!["b"]), ("b", vec!["B"])]); assert!(diff.deleted.is_empty()); @@ -122,7 +122,7 @@ mod tests { #[test] fn test_updated() { - let diff = deck_diff("- A : a1", "- A : a2"); + let diff = deck_diff("A : a1", "A : a2"); has_questions(diff.new, vec![("A", vec!["a2"]), ("a2", vec!["A"])]); has_questions(diff.deleted, vec![("A", vec!["a1"]), ("a1", vec!["A"])]); @@ -131,7 +131,7 @@ mod tests { #[test] fn test_deleted() { - let diff = deck_diff("- A : a", ""); + let diff = deck_diff("A : a", ""); assert!(diff.new.is_empty()); has_questions(diff.deleted, vec![("A", vec!["a"]), ("a", vec!["A"])]); @@ -146,7 +146,7 @@ mod tests { deleted: Some(0), }]; - let diff = super::diff(db_entries, deck::tests::read_string("- A : a").unwrap()); + let diff = super::diff(db_entries, deck::tests::read_string("A : a").unwrap()); has_questions(diff.new, vec![("a", vec!["A"])]); assert!(diff.deleted.is_empty()); @@ -154,7 +154,7 @@ mod tests { } #[test] fn regroup_same_question() { - let diff = deck_diff("", "- A : a\n- A | B : b"); + let diff = deck_diff("", "A : a\nA | B : b"); has_questions( diff.new, diff --git a/src/util/serialization.rs b/src/util/serialization.rs index 189a41a..a8b5438 100644 --- a/src/util/serialization.rs +++ b/src/util/serialization.rs @@ -23,6 +23,9 @@ mod tests { #[test] fn test_words_to_line() { assert_eq!(words_to_line(&["a".to_string()]), "a"); - assert_eq!(words_to_line(&["a".to_string(), "b".to_string(), "c".to_string()]), "a | b | c"); + assert_eq!( + words_to_line(&["a".to_string(), "b".to_string(), "c".to_string()]), + "a | b | c" + ); } } -- cgit v1.2.3