diff options
author | Joris | 2024-10-14 22:30:47 +0200 |
---|---|---|
committer | Joris | 2024-10-14 22:30:47 +0200 |
commit | 924b10b0cd6c09fa7afa58e6f7a068646d1e56af (patch) | |
tree | 3f20b5db131e54ceb73f6e2b932525e9fe6f2c63 | |
parent | a126fba82cb4f9f4467b130fca7fd5a269fe3a5e (diff) |
Use nom to parse linesmain
-rw-r--r-- | Cargo.lock | 23 | ||||
-rw-r--r-- | Cargo.toml | 1 | ||||
-rw-r--r-- | README.md | 14 | ||||
-rw-r--r-- | src/deck.rs | 73 | ||||
-rw-r--r-- | src/main.rs | 6 | ||||
-rw-r--r-- | src/parser.rs | 206 | ||||
-rw-r--r-- | src/sync.rs | 10 | ||||
-rw-r--r-- | src/util/serialization.rs | 5 |
8 files changed, 275 insertions, 63 deletions
@@ -251,6 +251,7 @@ dependencies = [ "chrono", "clap", "crossterm", + "nom", "ratatui", "rusqlite", "rusqlite_migration", @@ -378,6 +379,18 @@ dependencies = [ ] [[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] name = "mio" version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -390,6 +403,16 @@ dependencies = [ ] [[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] name = "num-traits" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -9,6 +9,7 @@ anyhow = "1.0" chrono = "0.4" clap = { version = "4.4", features = ["derive"] } crossterm = { version = "0.27" } +nom = { version = "7.1", features = ["alloc"] } rusqlite = { version = "0.29", features = [ "chrono" ] } rusqlite_migration = "1.0" serde = { version = "1.0", features = ["derive"] } @@ -20,13 +20,17 @@ Cards are created from a plain text `./deck` file: ``` # This is a comment -- good moorning : bonjour -- alternative 1 | alternative 2 : choix 1 | choix 2 -- cat (indication) : chat [ʃa] +good moorning : bonjour +hi : salut # Another comment +alternative 1 | alternative 2 : choix 1 | choix 2 +cat (indication) : chat [ʃa] ``` # Backlog +- migrate to FSRS https://github.com/open-spaced-repetition/rs-fsrs - Fix crashes on zoom / changing vertical size -- Show a message when the deck can’t be read -- Get a rough idea of card learning state +- Get a rough idea of card learning state: + - number of cards + - amount of cards coming in in the following days + - show presentation page? diff --git a/src/deck.rs b/src/deck.rs index 4491d9b..3fff77f 100644 --- a/src/deck.rs +++ b/src/deck.rs @@ -1,4 +1,5 @@ -use crate::{model::Line, util::serialization}; +use crate::model::Line; +use crate::parser; use anyhow::{Error, Result}; use std::fmt; use std::fs::File; @@ -7,13 +8,17 @@ use std::path::Path; #[derive(Debug, Clone)] struct ParseError { - line: usize, - message: String, + line: String, + line_number: usize, } impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "Line {}: {}", self.line, self.message) + write!( + f, + "Error parsing line {}:\n\n{}", + self.line_number, self.line + ) } } @@ -40,39 +45,12 @@ pub fn read_file(path: &str) -> Result<Vec<Line>> { } fn read_line(index: usize, line: &str) -> Result<Option<Line>> { - let line = line.trim(); - - if line.starts_with('#') || line.is_empty() { - Ok(None) - } else if !line.starts_with('-') { - Err(Error::from(ParseError { - line: index + 1, - message: "an entry should starts with “-”.".to_string(), - })) - } else { - let without_minus = line.split('-').skip(1).collect::<Vec<&str>>().join("-"); - let without_comment = without_minus.split('#').collect::<Vec<&str>>()[0].trim(); - let translation = without_comment.split(':').collect::<Vec<&str>>(); - if translation.len() != 2 { - Err(Error::from(ParseError { - line: index + 1, - message: "an entry should contain two parts separated by “:”.".to_string(), - })) - } else { - let t1 = translation[0].trim(); - let t2 = translation[1].trim(); - if t1.is_empty() || t2.is_empty() { - Err(Error::from(ParseError { - line: index + 1, - message: "an entry should contain two parts separated by “:”.".to_string(), - })) - } else { - Ok(Some(Line { - part_1: serialization::line_to_words(t1), - part_2: serialization::line_to_words(t2), - })) - } - } + match parser::parse_line(line) { + Ok(("", line)) => Ok(line), + _ => Err(Error::from(ParseError { + line_number: index + 1, + line: line.to_string(), + })), } } @@ -98,12 +76,11 @@ pub mod tests { #[test] fn errors() { - is_error("A : a"); - is_error("- A"); - is_error("- A -> a"); - is_error("- A : B : C"); - is_error("- : "); - is_error("- A : a\n-") + is_error("A"); + is_error("A -> a"); + is_error("A : B : C"); + is_error(":"); + is_error("A : a\n-") } #[test] @@ -117,19 +94,19 @@ pub mod tests { #[test] fn card() { - check("- A : a", &[(&["A"], &["a"])]); + check("A : a", &[(&["A"], &["a"])]); } #[test] fn cards() { - check("- A : a\n- B : b", &[(&["A"], &["a"]), (&["B"], &["b"])]); + check("A : a\nB : b", &[(&["A"], &["a"]), (&["B"], &["b"])]); } #[test] fn alternatives() { - check("- A : a1 | a2", &[(&["A"], &["a1", "a2"])]); - check("- A1 | A2 : a", &[(&["A1", "A2"], &["a"])]); - check("- A1 | A2 : a1 | a2", &[(&["A1", "A2"], &["a1", "a2"])]); + check("A : a1 | a2", &[(&["A"], &["a1", "a2"])]); + check("A1 | A2 : a", &[(&["A1", "A2"], &["a"])]); + check("A1 | A2 : a1 | a2", &[(&["A1", "A2"], &["a1", "a2"])]); } fn is_error(content: &str) { diff --git a/src/main.rs b/src/main.rs index ebc2d7e..3454897 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ mod db; mod deck; mod gui; mod model; +mod parser; mod space_repetition; mod sync; mod util; @@ -47,13 +48,10 @@ fn main() -> Result<()> { gui::message::show(&mut term, &deck_name, &format!("{msg}"), true)? } } - }, - Err(msg) => { - gui::message::show(&mut term, &deck_name, &format!("{msg}"), true)? } + Err(msg) => gui::message::show(&mut term, &deck_name, &format!("{msg}"), true)?, } - gui::restore_terminal(&mut term) } diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..62df908 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,206 @@ +use crate::model::Line; +use nom::{ + bytes::complete::{is_not, take, take_until}, + character::complete::{char, space0}, + combinator::{opt, peek}, + error::{Error, ErrorKind}, + multi::separated_list1, + sequence::delimited, + IResult, +}; + +// Temporary API +// Only expose parse_next ATM, so that tracking line number is done outside of the parser. + +pub fn parse_line(s: &str) -> IResult<&str, Option<Line>> { + let (s, _) = space0(s)?; + let (s, line) = opt(parse_parts)(s)?; + let (s, _) = parse_end_of_line(s)?; + Ok((s, line)) +} + +fn parse_parts(s: &str) -> IResult<&str, Line> { + let (s, part_1) = parse_options(s)?; + let (s, _) = sep(':')(s)?; + let (s, part_2) = parse_options(s)?; + Ok((s, Line { part_1, part_2 })) +} + +// Rest + +fn parse_options(s: &str) -> IResult<&str, Vec<String>> { + Ok(separated_list1(sep('|'), parse_term)(s)?) +} + +pub fn parse_term(s: &str) -> IResult<&str, String> { + let mut term = String::from(""); + let mut s = s; + + loop { + match peek(take::<usize, &str, ()>(1 as usize))(s) { + Ok((_, c)) => { + if c == "[" { + let (s1, cs) = take_until("]")(s)?; + s = s1; + term.push_str(cs); + } else if c == "(" { + let (s1, cs) = take_until(")")(s)?; + s = s1; + term.push_str(cs); + } else if c == ":" || c == "|" || c == "#" || c == "\n" || c == "\r" { + break; + } else { + let (s1, cs) = is_not("[(:|#\n\r")(s)?; + s = s1; + term.push_str(cs); + } + } + _ => break, + } + } + + if term.is_empty() { + Err(nom::Err::Error(Error { + input: s, + code: ErrorKind::Fail, + })) + } else { + Ok((s, term.trim().to_string())) + } +} + +fn parse_end_of_line(s: &str) -> IResult<&str, ()> { + let (s, _) = space0(s)?; + let (s, _) = opt(parse_comment)(s)?; + Ok((s, ())) +} + +fn parse_comment(s: &str) -> IResult<&str, ()> { + let (s, _) = char('#')(s)?; + let (s, _) = is_not("\n\r")(s)?; + Ok((s, ())) +} + +// Helpers + +pub fn sep(c: char) -> impl FnMut(&str) -> IResult<&str, ()> { + move |s: &str| { + let (s, _) = delimited(space0, char(c), space0)(s)?; + Ok((s, ())) + } +} + +// Tests + +#[cfg(test)] +mod tests { + + use super::*; + use nom::{character::complete::newline, multi::many0}; + + fn parse(s: &str) -> IResult<&str, Vec<Line>> { + let (s, lines) = many0(parse_next)(s)?; + let (s, _) = many0(parse_empty_line)(s)?; + Ok((s, lines)) + } + + fn parse_next(s: &str) -> IResult<&str, Line> { + let (s, _) = many0(parse_empty_line)(s)?; + let (s, _) = space0(s)?; + let (s, part_1) = parse_options(s)?; + let (s, _) = sep(':')(s)?; + let (s, part_2) = parse_options(s)?; + let (s, _) = parse_end_of_line(s)?; + let (s, _) = opt(newline)(s)?; + Ok((s, Line { part_1, part_2 })) + } + + fn parse_empty_line(s: &str) -> IResult<&str, ()> { + let (s, _) = parse_end_of_line(s)?; + let (s, _) = newline(s)?; + Ok((s, ())) + } + + #[test] + fn simple() { + assert_eq!(parse("foo : bar"), lines(vec!((vec!("foo"), vec!("bar"))))) + } + + #[test] + fn spaces() { + assert_eq!( + parse(" foo : bar "), + lines(vec!((vec!("foo"), vec!("bar")))) + ) + } + + #[test] + fn comments() { + assert_eq!( + parse("foo : bar # This is a comment"), + lines(vec!((vec!("foo"), vec!("bar")))) + ) + } + + #[test] + fn options() { + assert_eq!( + parse("foo | bar | baz : A | B | C"), + lines(vec!((vec!("foo", "bar", "baz"), vec!("A", "B", "C")))) + ) + } + + #[test] + fn term_with_spaces() { + assert_eq!( + parse("foo bar : baz baz"), + lines(vec!((vec!("foo bar"), vec!("baz baz")))) + ) + } + + #[test] + fn paren() { + assert_eq!( + parse("foo (|:[) : bar [::|)]"), + lines(vec!((vec!("foo (|:[)"), vec!("bar [::|)]")))) + ) + } + + #[test] + fn empty_lines() { + assert_eq!(parse("\n \n \n# Hello\n # Test\n\n\n"), lines(vec!())) + } + + #[test] + fn multi_lines() { + assert_eq!( + parse("foo : FOO\nbar : BAR\nbaz : BAZ"), + lines(vec!( + (vec!("foo"), vec!("FOO")), + (vec!("bar"), vec!("BAR")), + (vec!("baz"), vec!("BAZ")) + )) + ) + } + + // Helpers + + static EMPTY_STRING: &str = ""; + + fn lines(lines: Vec<(Vec<&str>, Vec<&str>)>) -> IResult<&'static str, Vec<Line>> { + Ok(( + EMPTY_STRING, + lines + .into_iter() + .map(|line| to_line(line.0, line.1)) + .collect(), + )) + } + + fn to_line(part_1: Vec<&str>, part_2: Vec<&str>) -> Line { + Line { + part_1: part_1.into_iter().map(|s| s.to_string()).collect(), + part_2: part_2.into_iter().map(|s| s.to_string()).collect(), + } + } +} diff --git a/src/sync.rs b/src/sync.rs index 6e3d84b..974e838 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -113,7 +113,7 @@ mod tests { #[test] fn test_added() { - let diff = deck_diff("- A : a", "- A : a\n- B : b"); + let diff = deck_diff("A : a", "A : a\nB : b"); has_questions(diff.new, vec![("B", vec!["b"]), ("b", vec!["B"])]); assert!(diff.deleted.is_empty()); @@ -122,7 +122,7 @@ mod tests { #[test] fn test_updated() { - let diff = deck_diff("- A : a1", "- A : a2"); + let diff = deck_diff("A : a1", "A : a2"); has_questions(diff.new, vec![("A", vec!["a2"]), ("a2", vec!["A"])]); has_questions(diff.deleted, vec![("A", vec!["a1"]), ("a1", vec!["A"])]); @@ -131,7 +131,7 @@ mod tests { #[test] fn test_deleted() { - let diff = deck_diff("- A : a", ""); + let diff = deck_diff("A : a", ""); assert!(diff.new.is_empty()); has_questions(diff.deleted, vec![("A", vec!["a"]), ("a", vec!["A"])]); @@ -146,7 +146,7 @@ mod tests { deleted: Some(0), }]; - let diff = super::diff(db_entries, deck::tests::read_string("- A : a").unwrap()); + let diff = super::diff(db_entries, deck::tests::read_string("A : a").unwrap()); has_questions(diff.new, vec![("a", vec!["A"])]); assert!(diff.deleted.is_empty()); @@ -154,7 +154,7 @@ mod tests { } #[test] fn regroup_same_question() { - let diff = deck_diff("", "- A : a\n- A | B : b"); + let diff = deck_diff("", "A : a\nA | B : b"); has_questions( diff.new, diff --git a/src/util/serialization.rs b/src/util/serialization.rs index 189a41a..a8b5438 100644 --- a/src/util/serialization.rs +++ b/src/util/serialization.rs @@ -23,6 +23,9 @@ mod tests { #[test] fn test_words_to_line() { assert_eq!(words_to_line(&["a".to_string()]), "a"); - assert_eq!(words_to_line(&["a".to_string(), "b".to_string(), "c".to_string()]), "a | b | c"); + assert_eq!( + words_to_line(&["a".to_string(), "b".to_string(), "c".to_string()]), + "a | b | c" + ); } } |