From 924b10b0cd6c09fa7afa58e6f7a068646d1e56af Mon Sep 17 00:00:00 2001 From: Joris Date: Mon, 14 Oct 2024 22:30:47 +0200 Subject: Use nom to parse lines --- src/parser.rs | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 src/parser.rs (limited to 'src/parser.rs') diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..62df908 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,206 @@ +use crate::model::Line; +use nom::{ + bytes::complete::{is_not, take, take_until}, + character::complete::{char, space0}, + combinator::{opt, peek}, + error::{Error, ErrorKind}, + multi::separated_list1, + sequence::delimited, + IResult, +}; + +// Temporary API +// Only expose parse_next ATM, so that tracking line number is done outside of the parser. + +pub fn parse_line(s: &str) -> IResult<&str, Option> { + let (s, _) = space0(s)?; + let (s, line) = opt(parse_parts)(s)?; + let (s, _) = parse_end_of_line(s)?; + Ok((s, line)) +} + +fn parse_parts(s: &str) -> IResult<&str, Line> { + let (s, part_1) = parse_options(s)?; + let (s, _) = sep(':')(s)?; + let (s, part_2) = parse_options(s)?; + Ok((s, Line { part_1, part_2 })) +} + +// Rest + +fn parse_options(s: &str) -> IResult<&str, Vec> { + Ok(separated_list1(sep('|'), parse_term)(s)?) +} + +pub fn parse_term(s: &str) -> IResult<&str, String> { + let mut term = String::from(""); + let mut s = s; + + loop { + match peek(take::(1 as usize))(s) { + Ok((_, c)) => { + if c == "[" { + let (s1, cs) = take_until("]")(s)?; + s = s1; + term.push_str(cs); + } else if c == "(" { + let (s1, cs) = take_until(")")(s)?; + s = s1; + term.push_str(cs); + } else if c == ":" || c == "|" || c == "#" || c == "\n" || c == "\r" { + break; + } else { + let (s1, cs) = is_not("[(:|#\n\r")(s)?; + s = s1; + term.push_str(cs); + } + } + _ => break, + } + } + + if term.is_empty() { + Err(nom::Err::Error(Error { + input: s, + code: ErrorKind::Fail, + })) + } else { + Ok((s, term.trim().to_string())) + } +} + +fn parse_end_of_line(s: &str) -> IResult<&str, ()> { + let (s, _) = space0(s)?; + let (s, _) = opt(parse_comment)(s)?; + Ok((s, ())) +} + +fn parse_comment(s: &str) -> IResult<&str, ()> { + let (s, _) = char('#')(s)?; + let (s, _) = is_not("\n\r")(s)?; + Ok((s, ())) +} + +// Helpers + +pub fn sep(c: char) -> impl FnMut(&str) -> IResult<&str, ()> { + move |s: &str| { + let (s, _) = delimited(space0, char(c), space0)(s)?; + Ok((s, ())) + } +} + +// Tests + +#[cfg(test)] +mod tests { + + use super::*; + use nom::{character::complete::newline, multi::many0}; + + fn parse(s: &str) -> IResult<&str, Vec> { + let (s, lines) = many0(parse_next)(s)?; + let (s, _) = many0(parse_empty_line)(s)?; + Ok((s, lines)) + } + + fn parse_next(s: &str) -> IResult<&str, Line> { + let (s, _) = many0(parse_empty_line)(s)?; + let (s, _) = space0(s)?; + let (s, part_1) = parse_options(s)?; + let (s, _) = sep(':')(s)?; + let (s, part_2) = parse_options(s)?; + let (s, _) = parse_end_of_line(s)?; + let (s, _) = opt(newline)(s)?; + Ok((s, Line { part_1, part_2 })) + } + + fn parse_empty_line(s: &str) -> IResult<&str, ()> { + let (s, _) = parse_end_of_line(s)?; + let (s, _) = newline(s)?; + Ok((s, ())) + } + + #[test] + fn simple() { + assert_eq!(parse("foo : bar"), lines(vec!((vec!("foo"), vec!("bar"))))) + } + + #[test] + fn spaces() { + assert_eq!( + parse(" foo : bar "), + lines(vec!((vec!("foo"), vec!("bar")))) + ) + } + + #[test] + fn comments() { + assert_eq!( + parse("foo : bar # This is a comment"), + lines(vec!((vec!("foo"), vec!("bar")))) + ) + } + + #[test] + fn options() { + assert_eq!( + parse("foo | bar | baz : A | B | C"), + lines(vec!((vec!("foo", "bar", "baz"), vec!("A", "B", "C")))) + ) + } + + #[test] + fn term_with_spaces() { + assert_eq!( + parse("foo bar : baz baz"), + lines(vec!((vec!("foo bar"), vec!("baz baz")))) + ) + } + + #[test] + fn paren() { + assert_eq!( + parse("foo (|:[) : bar [::|)]"), + lines(vec!((vec!("foo (|:[)"), vec!("bar [::|)]")))) + ) + } + + #[test] + fn empty_lines() { + assert_eq!(parse("\n \n \n# Hello\n # Test\n\n\n"), lines(vec!())) + } + + #[test] + fn multi_lines() { + assert_eq!( + parse("foo : FOO\nbar : BAR\nbaz : BAZ"), + lines(vec!( + (vec!("foo"), vec!("FOO")), + (vec!("bar"), vec!("BAR")), + (vec!("baz"), vec!("BAZ")) + )) + ) + } + + // Helpers + + static EMPTY_STRING: &str = ""; + + fn lines(lines: Vec<(Vec<&str>, Vec<&str>)>) -> IResult<&'static str, Vec> { + Ok(( + EMPTY_STRING, + lines + .into_iter() + .map(|line| to_line(line.0, line.1)) + .collect(), + )) + } + + fn to_line(part_1: Vec<&str>, part_2: Vec<&str>) -> Line { + Line { + part_1: part_1.into_iter().map(|s| s.to_string()).collect(), + part_2: part_2.into_iter().map(|s| s.to_string()).collect(), + } + } +} -- cgit v1.2.3