seam

Symbolic-Expressions As Markup.
git clone git://git.knutsen.co/seam
Log | Files | Refs | README | LICENSE

commit 3566ca3f0bdfc2c0684253c013b5d0953c898a36
parent 6782df906b1cb2c6dbc29fba56e5e2b84ae65145
Author: Demonstrandum <moi@knutsen.co>
Date:   Mon, 22 Jun 2020 01:55:18 +0100

Make lexer and parser more whitespace aware.

Diffstat:
Msrc/assemble/html.rs | 35++++++++++++++++++++++++-----------
Msrc/bin.rs | 5+++--
Msrc/lib.rs | 2++
Msrc/parse/lexer.rs | 63++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Msrc/parse/parser.rs | 25+++++++++++++++++++++++--
Msrc/parse/tokens.rs | 1+
Mtest.html | 28++++++++--------------------
Mtest.sex | 3++-
8 files changed, 117 insertions(+), 45 deletions(-)

diff --git a/src/assemble/html.rs b/src/assemble/html.rs @@ -16,7 +16,7 @@ impl HTMLFormatter { } pub const DEFAULT : &str = - "<!DOCTYPE>\n\ + "<!DOCTYPE html>\n\ <html>\n\ <head></head>\n\ <body></body>\n\ @@ -43,7 +43,7 @@ impl Documentise for HTMLFormatter { if has_declaration { current_node = &self.tree[1]; } else { - doc += "<!DOCTYPE html>" + doc += "<!DOCTYPE html>\n" } // Check if <html></html> root object exists. // Check if head exits, if not, make an empty one. @@ -69,11 +69,21 @@ impl Documentise for HTMLFormatter { /// Converting the tree to an HTML string. impl Display for HTMLFormatter { fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { - for node in &self.tree { + let mut tree_iter = self.tree.iter().peekable(); + while let Some(node) = tree_iter.next() { match node { - ParseNode::Symbol(node) => write!(f, " {}", node.value)?, - ParseNode::Number(node) => write!(f, " {}", node.value)?, - ParseNode::String(node) => write!(f, " {}", node.value)?, + ParseNode::Symbol(node) + | ParseNode::Number(node) => { + // If symbol ahead is so-called "symbolic", we can + // infere there was a space between them. + write!(f, "{}", node.value)?; + if let Some(peek) = tree_iter.peek() { + if peek.symbolic().is_some() { + write!(f, " ")? + } + } + }, + ParseNode::String(node) => write!(f, "{}", node.value)?, ParseNode::List(list) => { let head = list.first(); let mut tag = ""; @@ -92,9 +102,12 @@ impl Display for HTMLFormatter { // Declarations behave differently. if tag.as_bytes()[0] == '!' as u8 { - // TODO: Following can only be symbols. while !rest.is_empty() { - write!(f, " {}", rest[0])?; + if let Some(node) = rest[0].symbolic() { + write!(f, " {}", node.value)?; + } else { + // TODO: Make and send error (can only be symbolic). + } rest = &rest[1..]; } write!(f, ">")?; @@ -109,13 +122,13 @@ impl Display for HTMLFormatter { // Error! Cannot be non atomic. } } - writeln!(f, ">")?; + write!(f, ">")?; let html_fmt = HTMLFormatter::new(rest.to_owned()); - writeln!(f, "{}", html_fmt)?; + write!(f, "{}", html_fmt)?; write!(f, "</{}>", tag)?; }, - _ => write!(f, "hi")?, + _ => panic!("Uh {:?}", node), } } write!(f, "") diff --git a/src/bin.rs b/src/bin.rs @@ -50,9 +50,10 @@ fn main() -> Result<(), Box<dyn Error>> { for file in files { let tree = seam::parse_file(&file)?; - /*eprintln!("{}", &tree + #[cfg(feature="debug")] + eprintln!("{}", &tree .iter().fold(String::new(), - |acc, s| acc + "\n" + &s.to_string()));*/ + |acc, s| acc + "\n" + &s.to_string())); if target == "html" { let fmt = seam::assemble::html::HTMLFormatter::new(tree); let result = fmt.document(); diff --git a/src/lib.rs b/src/lib.rs @@ -11,6 +11,8 @@ pub const VERSION : (u8, u8, u8) = (0, 1, 0); pub fn parse<P: AsRef<Path>>(string : String, source : Option<P>) -> Result<parser::ParseTree, Box<dyn Error>> { let tokens = lexer::lex(string, source)?; + #[cfg(feature="debug")] + eprintln!("{:#?}", &tokens); let tree = parser::parse_stream(tokens)?; Ok(tree) } diff --git a/src/parse/lexer.rs b/src/parse/lexer.rs @@ -15,6 +15,10 @@ impl fmt::Display for LexError { impl Error for LexError { } +fn is_whitespace(character : char) -> bool { + ['\n', '\r', '\t', ' '].contains(&character) +} + fn character_kind(character : char, prev : Option<tokens::Kind>) -> Option<tokens::Kind> { let kind = match character { @@ -47,7 +51,7 @@ pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>) let mut line_bytes : usize = 0; let mut accumulator : Vec<u8> = Vec::new(); - let mut tokens = Vec::new(); + let mut tokens : TokenStream = Vec::new(); let mut token_start : usize = 0; let mut current_kind = None; @@ -65,33 +69,74 @@ pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>) let character = current_byte as char; + if character == ';' { // EON Comment + let mut i = 0; + while string.as_bytes()[bytes + i] != '\n' as u8 { + i += 1; + } + bytes += i; + continue; + } + let mut prev_kind = current_kind; current_kind = character_kind(character, current_kind); let string_start = character == '"' - && prev_kind != Some(tokens::Kind::String); + && prev_kind != Some(tokens::Kind::String); if string_start { current_kind = None; } - let mut peek_kind = if bytes == eof - 1 { + let peek_char = if bytes == eof - 1 { None } else { let peek_char = string.as_bytes()[bytes + 1] as char; - character_kind(peek_char, current_kind) + Some(peek_char) }; + let mut peek_kind = if let Some(peeked) = peek_char { + character_kind(peeked, current_kind) + } else { None }; + + let some_lparen = Some(tokens::Kind::LParen); + let some_rparen = Some(tokens::Kind::RParen); + + let was_lparen = current_kind == some_lparen; + let was_rparen = current_kind == some_rparen; + + let peek_string = peek_char == Some('"'); + let peek_lparen = peek_kind == some_lparen; + let peek_rparen = peek_kind == some_rparen; - let was_lparen = current_kind == Some(tokens::Kind::LParen); - let was_rparen = current_kind == Some(tokens::Kind::RParen); - let peek_rparen = peek_kind == Some(tokens::Kind::RParen); if was_lparen || was_rparen { peek_kind = None; prev_kind = None; - } - if peek_rparen { + } else if peek_rparen || peek_lparen { + peek_kind = None; + } else if peek_string { peek_kind = None; } + // If we're on a whitespace, and there's a bracket (or quote) ahead, + // we need to explicitly say there's whitespace between the + // last token and the next bracket/quotation. + // (Ignore the whitespace, if it is consecutive to another whitespace) + match tokens.last() { + Some(token) if token.kind != tokens::Kind::Whitespace + && token.kind != tokens::Kind::Keyword + && is_whitespace(character) + && (peek_rparen + || peek_lparen + || peek_char == Some('"') + || token.kind == tokens::Kind::String + || token.kind == tokens::Kind::RParen) => { + let kind = tokens::Kind::Whitespace; + let site = tokens::Site::from_line(lines, line_bytes, 1); + let value = character.to_string(); + tokens.push(Token::new(kind, value, site)); + }, + Some(_) | None => (), + } + if let Some(kind_current) = current_kind { if prev_kind.is_none() { old_kind = current_kind; diff --git a/src/parse/parser.rs b/src/parse/parser.rs @@ -32,6 +32,13 @@ pub enum ParseNode { } impl ParseNode { + pub fn symbolic(&self) -> Option<Node> { + match self { + Self::Symbol(node) + | Self::Number(node) => Some(node.to_owned()), + _ => None + } + } pub fn atomic(&self) -> Option<Node> { match self { Self::Symbol(node) @@ -62,6 +69,7 @@ fn parse_atomic(token : &Token) -> Result<ParseNode, ParseError> { Kind::Symbol => Ok(ParseNode::Symbol(node)), Kind::String => Ok(ParseNode::String(node)), Kind::Number => Ok(ParseNode::Number(node)), + Kind::Whitespace => Ok(ParseNode::String(node)), _ => Err(ParseError( String::from("Atomic token not found here."), token.site.clone())) @@ -96,11 +104,17 @@ pub fn parse(tokens : &[Token]) }, Kind::Keyword => { // Parse second token, make attribute. - let (node, slice) = parse(&tokens[1..])?; + let (node, mut slice) = parse(&tokens[1..])?; let attribute = AttributeNode { keyword: token.value[1..].to_owned(), node: Box::new(node) }; + // White space after attributes don't count. + if let Some(next) = slice.first() { + if next.kind == Kind::Whitespace { + slice = &slice[1..]; + } + } Ok((ParseNode::Attribute(attribute), slice)) }, Kind::RParen => { @@ -127,12 +141,19 @@ pub fn parse_stream(tokens: tokens::TokenStream) } /// Pretty printing for parse nodes. +#[cfg(feature="debug")] impl fmt::Display for ParseNode { fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { match self { ParseNode::Symbol(node) | ParseNode::Number(node) => write!(f, "{}", &node.value), - ParseNode::String(node) => write!(f, "\"{}\"", &node.value), + ParseNode::String(node) => { + if node.value.trim().is_empty() { + write!(f, "") + } else { + write!(f, "\"{}\"", &node.value) + } + }, ParseNode::Attribute(attr) => write!(f, ":{} {}", &attr.keyword, &*attr.node), ParseNode::List(list) => write!(f, "({}{})", &list[0], diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs @@ -36,6 +36,7 @@ pub enum Kind { String, Number, Keyword, + Whitespace, } #[derive(Debug, Clone)] diff --git a/test.html b/test.html @@ -1,21 +1,9 @@ -<!DOCTYPE html><html> -<head> -<title> - Example HTML Document -</title> -</head><body> -<p id="hello"> - Hello, World! -</p><p> - something something text... -</p><h1> - A (big) Header! -</h1><p> - Yet some more<span style="color: red"> - text -</span> <3 -</p><img alt="Cute Cat" src="https://static.insider.com/image/5d24d6b921a861093e71fef3.jpg" width="300"> +<!DOCTYPE html> +<html> <head> <title>Example HTML Document</title></head> +<body> <p id="hello">Hello, World!</p> +<p>something something text...</p> +<h1> A (big) Header!</h1> +<p>Yet some more <span style="color: red">text</span> <3</p> +<p>Hello<span style="color: green">World</span>!</p> +<img alt="Cat" src="https://static.insider.com/image/5d24d6b921a861093e71fef3.jpg" width="300"></img></body></html> -</img> -</body> -</html> diff --git a/test.sex b/test.sex @@ -8,8 +8,9 @@ (h1 "A (big) Header!") (p Yet some more (span :style "color: red" text) <3) + (p Hello(span :style "color: green" World)!) (img - :alt "Cute Cat" + :alt Cat :src "https://static.insider.com/image/5d24d6b921a861093e71fef3.jpg" :width 300)))