aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoris2024-10-14 22:30:47 +0200
committerJoris2024-10-14 22:30:47 +0200
commit924b10b0cd6c09fa7afa58e6f7a068646d1e56af (patch)
tree3f20b5db131e54ceb73f6e2b932525e9fe6f2c63 /src
parenta126fba82cb4f9f4467b130fca7fd5a269fe3a5e (diff)
Use nom to parse linesmain
Diffstat (limited to 'src')
-rw-r--r--src/deck.rs73
-rw-r--r--src/main.rs6
-rw-r--r--src/parser.rs206
-rw-r--r--src/sync.rs10
-rw-r--r--src/util/serialization.rs5
5 files changed, 242 insertions, 58 deletions
diff --git a/src/deck.rs b/src/deck.rs
index 4491d9b..3fff77f 100644
--- a/src/deck.rs
+++ b/src/deck.rs
@@ -1,4 +1,5 @@
-use crate::{model::Line, util::serialization};
+use crate::model::Line;
+use crate::parser;
use anyhow::{Error, Result};
use std::fmt;
use std::fs::File;
@@ -7,13 +8,17 @@ use std::path::Path;
#[derive(Debug, Clone)]
struct ParseError {
- line: usize,
- message: String,
+ line: String,
+ line_number: usize,
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- write!(f, "Line {}: {}", self.line, self.message)
+ write!(
+ f,
+ "Error parsing line {}:\n\n{}",
+ self.line_number, self.line
+ )
}
}
@@ -40,39 +45,12 @@ pub fn read_file(path: &str) -> Result<Vec<Line>> {
}
fn read_line(index: usize, line: &str) -> Result<Option<Line>> {
- let line = line.trim();
-
- if line.starts_with('#') || line.is_empty() {
- Ok(None)
- } else if !line.starts_with('-') {
- Err(Error::from(ParseError {
- line: index + 1,
- message: "an entry should starts with “-”.".to_string(),
- }))
- } else {
- let without_minus = line.split('-').skip(1).collect::<Vec<&str>>().join("-");
- let without_comment = without_minus.split('#').collect::<Vec<&str>>()[0].trim();
- let translation = without_comment.split(':').collect::<Vec<&str>>();
- if translation.len() != 2 {
- Err(Error::from(ParseError {
- line: index + 1,
- message: "an entry should contain two parts separated by “:”.".to_string(),
- }))
- } else {
- let t1 = translation[0].trim();
- let t2 = translation[1].trim();
- if t1.is_empty() || t2.is_empty() {
- Err(Error::from(ParseError {
- line: index + 1,
- message: "an entry should contain two parts separated by “:”.".to_string(),
- }))
- } else {
- Ok(Some(Line {
- part_1: serialization::line_to_words(t1),
- part_2: serialization::line_to_words(t2),
- }))
- }
- }
+ match parser::parse_line(line) {
+ Ok(("", line)) => Ok(line),
+ _ => Err(Error::from(ParseError {
+ line_number: index + 1,
+ line: line.to_string(),
+ })),
}
}
@@ -98,12 +76,11 @@ pub mod tests {
#[test]
fn errors() {
- is_error("A : a");
- is_error("- A");
- is_error("- A -> a");
- is_error("- A : B : C");
- is_error("- : ");
- is_error("- A : a\n-")
+ is_error("A");
+ is_error("A -> a");
+ is_error("A : B : C");
+ is_error(":");
+ is_error("A : a\n-")
}
#[test]
@@ -117,19 +94,19 @@ pub mod tests {
#[test]
fn card() {
- check("- A : a", &[(&["A"], &["a"])]);
+ check("A : a", &[(&["A"], &["a"])]);
}
#[test]
fn cards() {
- check("- A : a\n- B : b", &[(&["A"], &["a"]), (&["B"], &["b"])]);
+ check("A : a\nB : b", &[(&["A"], &["a"]), (&["B"], &["b"])]);
}
#[test]
fn alternatives() {
- check("- A : a1 | a2", &[(&["A"], &["a1", "a2"])]);
- check("- A1 | A2 : a", &[(&["A1", "A2"], &["a"])]);
- check("- A1 | A2 : a1 | a2", &[(&["A1", "A2"], &["a1", "a2"])]);
+ check("A : a1 | a2", &[(&["A"], &["a1", "a2"])]);
+ check("A1 | A2 : a", &[(&["A1", "A2"], &["a"])]);
+ check("A1 | A2 : a1 | a2", &[(&["A1", "A2"], &["a1", "a2"])]);
}
fn is_error(content: &str) {
diff --git a/src/main.rs b/src/main.rs
index ebc2d7e..3454897 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,6 +2,7 @@ mod db;
mod deck;
mod gui;
mod model;
+mod parser;
mod space_repetition;
mod sync;
mod util;
@@ -47,13 +48,10 @@ fn main() -> Result<()> {
gui::message::show(&mut term, &deck_name, &format!("{msg}"), true)?
}
}
- },
- Err(msg) => {
- gui::message::show(&mut term, &deck_name, &format!("{msg}"), true)?
}
+ Err(msg) => gui::message::show(&mut term, &deck_name, &format!("{msg}"), true)?,
}
-
gui::restore_terminal(&mut term)
}
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..62df908
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,206 @@
+use crate::model::Line;
+use nom::{
+ bytes::complete::{is_not, take, take_until},
+ character::complete::{char, space0},
+ combinator::{opt, peek},
+ error::{Error, ErrorKind},
+ multi::separated_list1,
+ sequence::delimited,
+ IResult,
+};
+
+// Temporary API
+// Only expose parse_next ATM, so that tracking line number is done outside of the parser.
+
+pub fn parse_line(s: &str) -> IResult<&str, Option<Line>> {
+ let (s, _) = space0(s)?;
+ let (s, line) = opt(parse_parts)(s)?;
+ let (s, _) = parse_end_of_line(s)?;
+ Ok((s, line))
+}
+
+fn parse_parts(s: &str) -> IResult<&str, Line> {
+ let (s, part_1) = parse_options(s)?;
+ let (s, _) = sep(':')(s)?;
+ let (s, part_2) = parse_options(s)?;
+ Ok((s, Line { part_1, part_2 }))
+}
+
+// Rest
+
+fn parse_options(s: &str) -> IResult<&str, Vec<String>> {
+ Ok(separated_list1(sep('|'), parse_term)(s)?)
+}
+
+pub fn parse_term(s: &str) -> IResult<&str, String> {
+ let mut term = String::from("");
+ let mut s = s;
+
+ loop {
+ match peek(take::<usize, &str, ()>(1 as usize))(s) {
+ Ok((_, c)) => {
+ if c == "[" {
+ let (s1, cs) = take_until("]")(s)?;
+ s = s1;
+ term.push_str(cs);
+ } else if c == "(" {
+ let (s1, cs) = take_until(")")(s)?;
+ s = s1;
+ term.push_str(cs);
+ } else if c == ":" || c == "|" || c == "#" || c == "\n" || c == "\r" {
+ break;
+ } else {
+ let (s1, cs) = is_not("[(:|#\n\r")(s)?;
+ s = s1;
+ term.push_str(cs);
+ }
+ }
+ _ => break,
+ }
+ }
+
+ if term.is_empty() {
+ Err(nom::Err::Error(Error {
+ input: s,
+ code: ErrorKind::Fail,
+ }))
+ } else {
+ Ok((s, term.trim().to_string()))
+ }
+}
+
+fn parse_end_of_line(s: &str) -> IResult<&str, ()> {
+ let (s, _) = space0(s)?;
+ let (s, _) = opt(parse_comment)(s)?;
+ Ok((s, ()))
+}
+
+fn parse_comment(s: &str) -> IResult<&str, ()> {
+ let (s, _) = char('#')(s)?;
+ let (s, _) = is_not("\n\r")(s)?;
+ Ok((s, ()))
+}
+
+// Helpers
+
+pub fn sep(c: char) -> impl FnMut(&str) -> IResult<&str, ()> {
+ move |s: &str| {
+ let (s, _) = delimited(space0, char(c), space0)(s)?;
+ Ok((s, ()))
+ }
+}
+
+// Tests
+
+#[cfg(test)]
+mod tests {
+
+ use super::*;
+ use nom::{character::complete::newline, multi::many0};
+
+ fn parse(s: &str) -> IResult<&str, Vec<Line>> {
+ let (s, lines) = many0(parse_next)(s)?;
+ let (s, _) = many0(parse_empty_line)(s)?;
+ Ok((s, lines))
+ }
+
+ fn parse_next(s: &str) -> IResult<&str, Line> {
+ let (s, _) = many0(parse_empty_line)(s)?;
+ let (s, _) = space0(s)?;
+ let (s, part_1) = parse_options(s)?;
+ let (s, _) = sep(':')(s)?;
+ let (s, part_2) = parse_options(s)?;
+ let (s, _) = parse_end_of_line(s)?;
+ let (s, _) = opt(newline)(s)?;
+ Ok((s, Line { part_1, part_2 }))
+ }
+
+ fn parse_empty_line(s: &str) -> IResult<&str, ()> {
+ let (s, _) = parse_end_of_line(s)?;
+ let (s, _) = newline(s)?;
+ Ok((s, ()))
+ }
+
+ #[test]
+ fn simple() {
+ assert_eq!(parse("foo : bar"), lines(vec!((vec!("foo"), vec!("bar")))))
+ }
+
+ #[test]
+ fn spaces() {
+ assert_eq!(
+ parse(" foo : bar "),
+ lines(vec!((vec!("foo"), vec!("bar"))))
+ )
+ }
+
+ #[test]
+ fn comments() {
+ assert_eq!(
+ parse("foo : bar # This is a comment"),
+ lines(vec!((vec!("foo"), vec!("bar"))))
+ )
+ }
+
+ #[test]
+ fn options() {
+ assert_eq!(
+ parse("foo | bar | baz : A | B | C"),
+ lines(vec!((vec!("foo", "bar", "baz"), vec!("A", "B", "C"))))
+ )
+ }
+
+ #[test]
+ fn term_with_spaces() {
+ assert_eq!(
+ parse("foo bar : baz baz"),
+ lines(vec!((vec!("foo bar"), vec!("baz baz"))))
+ )
+ }
+
+ #[test]
+ fn paren() {
+ assert_eq!(
+ parse("foo (|:[) : bar [::|)]"),
+ lines(vec!((vec!("foo (|:[)"), vec!("bar [::|)]"))))
+ )
+ }
+
+ #[test]
+ fn empty_lines() {
+ assert_eq!(parse("\n \n \n# Hello\n # Test\n\n\n"), lines(vec!()))
+ }
+
+ #[test]
+ fn multi_lines() {
+ assert_eq!(
+ parse("foo : FOO\nbar : BAR\nbaz : BAZ"),
+ lines(vec!(
+ (vec!("foo"), vec!("FOO")),
+ (vec!("bar"), vec!("BAR")),
+ (vec!("baz"), vec!("BAZ"))
+ ))
+ )
+ }
+
+ // Helpers
+
+ static EMPTY_STRING: &str = "";
+
+ fn lines(lines: Vec<(Vec<&str>, Vec<&str>)>) -> IResult<&'static str, Vec<Line>> {
+ Ok((
+ EMPTY_STRING,
+ lines
+ .into_iter()
+ .map(|line| to_line(line.0, line.1))
+ .collect(),
+ ))
+ }
+
+ fn to_line(part_1: Vec<&str>, part_2: Vec<&str>) -> Line {
+ Line {
+ part_1: part_1.into_iter().map(|s| s.to_string()).collect(),
+ part_2: part_2.into_iter().map(|s| s.to_string()).collect(),
+ }
+ }
+}
diff --git a/src/sync.rs b/src/sync.rs
index 6e3d84b..974e838 100644
--- a/src/sync.rs
+++ b/src/sync.rs
@@ -113,7 +113,7 @@ mod tests {
#[test]
fn test_added() {
- let diff = deck_diff("- A : a", "- A : a\n- B : b");
+ let diff = deck_diff("A : a", "A : a\nB : b");
has_questions(diff.new, vec![("B", vec!["b"]), ("b", vec!["B"])]);
assert!(diff.deleted.is_empty());
@@ -122,7 +122,7 @@ mod tests {
#[test]
fn test_updated() {
- let diff = deck_diff("- A : a1", "- A : a2");
+ let diff = deck_diff("A : a1", "A : a2");
has_questions(diff.new, vec![("A", vec!["a2"]), ("a2", vec!["A"])]);
has_questions(diff.deleted, vec![("A", vec!["a1"]), ("a1", vec!["A"])]);
@@ -131,7 +131,7 @@ mod tests {
#[test]
fn test_deleted() {
- let diff = deck_diff("- A : a", "");
+ let diff = deck_diff("A : a", "");
assert!(diff.new.is_empty());
has_questions(diff.deleted, vec![("A", vec!["a"]), ("a", vec!["A"])]);
@@ -146,7 +146,7 @@ mod tests {
deleted: Some(0),
}];
- let diff = super::diff(db_entries, deck::tests::read_string("- A : a").unwrap());
+ let diff = super::diff(db_entries, deck::tests::read_string("A : a").unwrap());
has_questions(diff.new, vec![("a", vec!["A"])]);
assert!(diff.deleted.is_empty());
@@ -154,7 +154,7 @@ mod tests {
}
#[test]
fn regroup_same_question() {
- let diff = deck_diff("", "- A : a\n- A | B : b");
+ let diff = deck_diff("", "A : a\nA | B : b");
has_questions(
diff.new,
diff --git a/src/util/serialization.rs b/src/util/serialization.rs
index 189a41a..a8b5438 100644
--- a/src/util/serialization.rs
+++ b/src/util/serialization.rs
@@ -23,6 +23,9 @@ mod tests {
#[test]
fn test_words_to_line() {
assert_eq!(words_to_line(&["a".to_string()]), "a");
- assert_eq!(words_to_line(&["a".to_string(), "b".to_string(), "c".to_string()]), "a | b | c");
+ assert_eq!(
+ words_to_line(&["a".to_string(), "b".to_string(), "c".to_string()]),
+ "a | b | c"
+ );
}
}