seam

Symbolic-Expressions As Markup.
git clone git://git.knutsen.co/seam
Log | Files | Refs | README | LICENSE

commit 7a08dc915e546affdd3679d8011253819c8c29ba
parent 9f192ee4eca807893380b5e2a39ab4de60321045
Author: Demonstrandum <samuel@knutsen.co>
Date:   Fri, 28 Jun 2024 19:55:02 +0100

bug: html void tags would stop any further generation.

Due to using `return Ok(())` instead of a `continue` after finishing
generation early for html void elements, the html generation would just
stop after the first such tag.  This was fixed with a refactor, taking
the individual node generation out into its own function, and converting
all the old `continue`s into `return`s.

Diffstat:
MCargo.lock | 2+-
MCargo.toml | 4++--
Msrc/assemble/html.rs | 340++++++++++++++++++++++++++++++++++++++++---------------------------------------
Msrc/assemble/xml.rs | 207++++++++++++++++++++++++++++++++++++++++---------------------------------------
Msrc/lib.rs | 2+-
5 files changed, 281 insertions(+), 274 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock @@ -162,7 +162,7 @@ dependencies = [ [[package]] name = "seam" -version = "0.2.1" +version = "0.2.2" dependencies = [ "chrono", "colored", diff --git a/Cargo.toml b/Cargo.toml @@ -4,12 +4,12 @@ description = "Symbolic Expressions As Markup." keywords = ["markup", "lisp", "macro", "symbolic-expression", "sexp"] license-file = "LICENSE" homepage = "https://git.knutsen.co/seam" -version = "0.2.1" +version = "0.2.2" authors = ["Demonstrandum <samuel@knutsen.co>"] edition = "2021" [features] -default = ["debug"] +# default = ["debug"] debug = [] [lib] diff --git a/src/assemble/html.rs b/src/assemble/html.rs @@ -98,189 +98,193 @@ impl<'a> MarkupDisplay for HTMLFormatter<'a> { Ok(doc) } - fn generate(&self, f : Formatter) - -> Result<(), GenerationError> { + fn generate(&self, f: Formatter) -> Result<(), GenerationError> { let mut tree_iter = self.tree.iter().peekable(); while let Some(node) = tree_iter.next() { - match node { - ParseNode::Symbol(node) - | ParseNode::Number(node) => { - write!(f, "{}", node.leading_whitespace)?; - write!(f, "{}", escape_xml(&node.value))?; - }, - ParseNode::String(node) => { - write!(f, "{}", node.leading_whitespace)?; - write!(f, "{}", escape_xml(&node.value))?; - }, - ParseNode::List { nodes: list, leading_whitespace, end_token, .. } => { - write!(f, "{}", leading_whitespace)?; - let head = list.first(); - let tag: &str; // html <tag> name. - if let Some(head_node) = head { - if let ParseNode::Symbol(head_symbol) = head_node { - tag = &head_symbol.value; - write!(f, "<{}", tag)?; - } else { - // Error, tags can only have symbol values. - return Err(GenerationError::new("HTML", - "HTML tags can only be given as symbols.", - head_node.site())); - } - } else { - // Error, empty tags not supported. - return Err(GenerationError::new("HTML", - "Empty lists cannot be converted into a valid HTML tag.", - node.site())); - } - let tag = tag.to_ascii_lowercase(); + generate_html_node(f, node)?; + } + Ok(()) + } +} - let mut rest = &list[1..]; +fn generate_html_node<'a>(f: Formatter, node: &'a ParseNode<'a>) -> Result<(), GenerationError<'a>> { + match node { + ParseNode::Symbol(node) + | ParseNode::Number(node) => { + write!(f, "{}", node.leading_whitespace)?; + write!(f, "{}", escape_xml(&node.value))?; + }, + ParseNode::String(node) => { + write!(f, "{}", node.leading_whitespace)?; + write!(f, "{}", escape_xml(&node.value))?; + }, + ParseNode::List { nodes: list, leading_whitespace, end_token, .. } => { + write!(f, "{}", leading_whitespace)?; + let head = list.first(); + let tag: &str; // html <tag> name. + if let Some(head_node) = head { + if let ParseNode::Symbol(head_symbol) = head_node { + tag = &head_symbol.value; + write!(f, "<{}", tag)?; + } else { + // Error, tags can only have symbol values. + return Err(GenerationError::new("HTML", + "HTML tags can only be given as symbols.", + head_node.site())); + } + } else { + // Error, empty tags not supported. + return Err(GenerationError::new("HTML", + "Empty lists cannot be converted into a valid HTML tag.", + node.site())); + } + let tag = tag.to_ascii_lowercase(); - // Declarations behave differently. - if tag.as_bytes()[0] == '!' as u8 { - while !rest.is_empty() { - if let Some(node) = rest[0].symbolic() { - write!(f, " {}", node.value)?; - } else { - return Err(GenerationError::new("HTML", - "Non-symbolic item in declaration", - &rest[0].site())); - } - rest = &rest[1..]; - } - write!(f, ">")?; - continue; - } + let mut rest = &list[1..]; - while let Some(ParseNode::Attribute { node, keyword, .. }) = rest.first() { - if let Some(atom) = (*node).atomic() { - write!(f, " {}=\"{}\"", keyword, atom.value)?; - rest = &rest[1..]; - } else { - // Error! Cannot be non atomic. - return Err(GenerationError::new("HTML", - "Attribute cannot contain non-atomic data.", - &(*node).site())); - } + // Declarations behave differently. + if tag.as_bytes()[0] == '!' as u8 { + while !rest.is_empty() { + if let Some(node) = rest[0].symbolic() { + write!(f, " {}", node.value)?; + } else { + return Err(GenerationError::new("HTML", + "Non-symbolic item in declaration", + &rest[0].site())); } - write!(f, ">")?; + rest = &rest[1..]; + } + write!(f, ">")?; + return Ok(()); + } - // Check early if this tag is a void element. - if VOID_ELEMENTS.binary_search(&tag.as_str()).is_ok() { - // Void elements cannot have children. - if let Some(child_node) = rest.first() { - return Err(GenerationError::new("HTML", - &format!("A void element such as `<{}>' cannot have children.", tag), - child_node.site())); - } - // Finished: void elements dont get a closing tag. - return Ok(()); - } + while let Some(ParseNode::Attribute { node, keyword, .. }) = rest.first() { + if let Some(atom) = (*node).atomic() { + write!(f, " {}=\"{}\"", keyword, atom.value)?; + rest = &rest[1..]; + } else { + // Error! Cannot be non atomic. + return Err(GenerationError::new("HTML", + "Attribute cannot contain non-atomic data.", + &(*node).site())); + } + } + write!(f, ">")?; - // The first node to a tag should have its whitespace supressed! - // e.g. `(p hello world)` -> `<p>hello world</p>`. - // But if there's a new line, its likely it should be carreid through. - // e.g. - // ``` - // (div - // hello) - // ``` - // -> - // ``` - // <div> - // hello - // </div> - let rest_with_preserved_whitespace = rest; - let mut rest: Vec<ParseNode<'a>> = rest_with_preserved_whitespace.to_vec(); - let mut is_first_node_on_next_line = false; - if let Some(first_node) = rest.get_mut(0) { - is_first_node_on_next_line = first_node.leading_whitespace().contains('\n'); - if !is_first_node_on_next_line { - first_node.set_leading_whitespace("".to_owned()); - } - } + // Check early if this tag is a void element. + if VOID_ELEMENTS.binary_search(&tag.as_str()).is_ok() { + // Void elements cannot have children. + if let Some(child_node) = rest.first() { + return Err(GenerationError::new("HTML", + &format!("A void element such as `<{}>' cannot have children.", tag), + child_node.site())); + } + // Finished: void elements don't get a closing tag. + return Ok(()); + } - // Handle tags which *do not* contain HTML as syntax: - // <pre>, <style>, <script>, <math>, <svg>, <textarea>, <title> - // Specifically: - // - <svg> and <math> contain XML, not HTML; - // - <pre>, <textarea> and <title> contain raw text, not parsed as HTML; - // - <pre> will display raw text found in source code; - // - <textarea> and <title> however, are escapable (evaluete macros); - // - <script> contains JavaScript, maybe we will parse this in the future!; - // - <style> contains CSS, which we have our own parser for already. - match tag.as_str() { - "pre" => { // <pre> should preserve the raw text in the *source* file. - // Find beginning and end byte offset of first and last token inside - // of `(pre ...)` and simply clone the text between those offsets. - let pre = raw_text(rest_with_preserved_whitespace.first(), end_token); - write!(f, "{}", pre)?; - }, - "textarea" | "title" => { // Not eaw source-code, but plain-text. - // We have to reconsititute what the source-code would look like if all - // macros were expanded by hand, and read as raw source code. - let sexp_fmt = SExpFormatter::new(rest.into_boxed_slice()); - let sexp_fmt = Box::leak(Box::new(sexp_fmt)); // TODO: Store. - sexp_fmt.generate(f)?; - }, - "style" => { // <style> tag needs to generate CSS. - // When just a string is passed, don't convert. Assume raw CSS. - if let Some(ParseNode::String(string_node)) = rest.first() { - if rest.len() != 1 { - // FIXME: Leak doesn't really matter, but should really be a better way. - let second_node = Box::leak(Box::new(rest[1].to_owned())); - return Err(GenerationError::new("HTML+CSS", - "A `style' tag can either have S-expression CSS rules, or\ - a single string containing raw CSS be passed in.\n\ - A string was passed in, but excess expressions were passed \ - in after that!", - second_node.site())); - } - // Otherwise, write that raw CSS. - write!(f, "{}", string_node.value)?; - } else { - writeln!(f, "")?; - let css_fmt = CSSFormatter::new(rest.into_boxed_slice()); - let css_fmt = Box::leak(Box::new(css_fmt)); // FIXME: store formatter. - css_fmt.generate(f)?; - } - }, - "script" => { - // TODO: Generating JavaScript from S-expressions is not implemented. - // For now, just treat it as a raw-text tag (a la <pre>). - let sexp_fmt = SExpFormatter::new(rest.into_boxed_slice()); - let sexp_fmt = Box::leak(Box::new(sexp_fmt)); // TODO: Store. - sexp_fmt.generate(f)?; - }, - "math" | "svg" => { // <math> and <svg> are subsets of XML. - let xml_fmt = XMLFormatter::new(rest.into_boxed_slice()); - let xml_fmt = Box::leak(Box::new(xml_fmt)); // FIXME: store formatter. - xml_fmt.generate(f)?; - }, - _ => { // Tag contains regular old HTML. - let html_fmt = HTMLFormatter::new(rest.into_boxed_slice()); - let html_fmt = Box::leak(Box::new(html_fmt)); // FIXME: store formatter. - html_fmt.generate(f)?; - }, - } - // Closing tag should be equally as spaced as opening tag (?) - if end_token.leading_whitespace.is_empty() { - if is_first_node_on_next_line || tag == "style" { - write!(f, "{}", leading_whitespace)?; + // The first node to a tag should have its whitespace supressed! + // e.g. `(p hello world)` -> `<p>hello world</p>`. + // But if there's a new line, its likely it should be carreid through. + // e.g. + // ``` + // (div + // hello) + // ``` + // -> + // ``` + // <div> + // hello + // </div> + let rest_with_preserved_whitespace = rest; + let mut rest: Vec<ParseNode<'a>> = rest_with_preserved_whitespace.to_vec(); + let mut is_first_node_on_next_line = false; + if let Some(first_node) = rest.get_mut(0) { + is_first_node_on_next_line = first_node.leading_whitespace().contains('\n'); + if !is_first_node_on_next_line { + first_node.set_leading_whitespace("".to_owned()); + } + } + + // Handle tags which *do not* contain HTML as syntax: + // <pre>, <style>, <script>, <math>, <svg>, <textarea>, <title> + // Specifically: + // - <svg> and <math> contain XML, not HTML; + // - <pre>, <textarea> and <title> contain raw text, not parsed as HTML; + // - <pre> will display raw text found in source code; + // - <textarea> and <title> however, are escapable (evaluete macros); + // - <script> contains JavaScript, maybe we will parse this in the future!; + // - <style> contains CSS, which we have our own parser for already. + match tag.as_str() { + "pre" => { // <pre> should preserve the raw text in the *source* file. + // Find beginning and end byte offset of first and last token inside + // of `(pre ...)` and simply clone the text between those offsets. + let pre = raw_text(rest_with_preserved_whitespace.first(), end_token); + write!(f, "{}", pre)?; + }, + "textarea" | "title" => { // Not eaw source-code, but plain-text. + // We have to reconsititute what the source-code would look like if all + // macros were expanded by hand, and read as raw source code. + let sexp_fmt = SExpFormatter::new(rest.into_boxed_slice()); + let sexp_fmt = Box::leak(Box::new(sexp_fmt)); // TODO: Store. + sexp_fmt.generate(f)?; + }, + "style" => { // <style> tag needs to generate CSS. + // When just a string is passed, don't convert. Assume raw CSS. + if let Some(ParseNode::String(string_node)) = rest.first() { + if rest.len() != 1 { + // FIXME: Leak doesn't really matter, but should really be a better way. + let second_node = Box::leak(Box::new(rest[1].to_owned())); + return Err(GenerationError::new("HTML+CSS", + "A `style' tag can either have S-expression CSS rules, or\ + a single string containing raw CSS be passed in.\n\ + A string was passed in, but excess expressions were passed \ + in after that!", + second_node.site())); } + // Otherwise, write that raw CSS. + write!(f, "{}", string_node.value)?; } else { - write!(f, "{}", end_token.leading_whitespace)?; + writeln!(f, "")?; + let css_fmt = CSSFormatter::new(rest.into_boxed_slice()); + let css_fmt = Box::leak(Box::new(css_fmt)); // FIXME: store formatter. + css_fmt.generate(f)?; } - - write!(f, "</{}>", tag)?; }, - ParseNode::Attribute { ref site, .. } => - return Err(GenerationError::new("HTML", - "Unexpected attribute encountered.", site)) + "script" => { + // TODO: Generating JavaScript from S-expressions is not implemented. + // For now, just treat it as a raw-text tag (a la <pre>). + let sexp_fmt = SExpFormatter::new(rest.into_boxed_slice()); + let sexp_fmt = Box::leak(Box::new(sexp_fmt)); // TODO: Store. + sexp_fmt.generate(f)?; + }, + "math" | "svg" => { // <math> and <svg> are subsets of XML. + let xml_fmt = XMLFormatter::new(rest.into_boxed_slice()); + let xml_fmt = Box::leak(Box::new(xml_fmt)); // FIXME: store formatter. + xml_fmt.generate(f)?; + }, + _ => { // Tag contains regular old HTML. + let html_fmt = HTMLFormatter::new(rest.into_boxed_slice()); + let html_fmt = Box::leak(Box::new(html_fmt)); // FIXME: store formatter. + html_fmt.generate(f)?; + }, } - } - Ok(()) + // Closing tag should be equally as spaced as opening tag (?) + if end_token.leading_whitespace.is_empty() { + if is_first_node_on_next_line || tag == "style" { + write!(f, "{}", leading_whitespace)?; + } + } else { + write!(f, "{}", end_token.leading_whitespace)?; + } + + write!(f, "</{}>", tag)?; + }, + ParseNode::Attribute { ref site, .. } => + return Err(GenerationError::new("HTML", + "Unexpected attribute encountered.", site)) } + Ok(()) } /// Get raw text in source-file between a `start_node` and some `end_token`. diff --git a/src/assemble/xml.rs b/src/assemble/xml.rs @@ -11,21 +11,6 @@ impl<'a> XMLFormatter<'a> { pub fn new(tree: ParseTree<'a>) -> Self { Self { tree } } - - fn display_attribute(&'a self, attr: &'a parser::ParseNode<'a>) - -> Result<String, GenerationError> { - let parser::ParseNode::Attribute { keyword, node, .. } = attr else { - panic!("Passed non-attribute to display_attribute.") - }; - if let Some(symbol) = (*node).atomic() { - Ok(format!("{}=\"{}\"", keyword, symbol.value)) - } else { - Err(GenerationError::new("XML", - "Attribute can only contain symbols, numbers or strings", - &(*node).site())) - } - } - } pub const DEFAULT : &str = @@ -61,100 +46,118 @@ impl<'a> MarkupDisplay for XMLFormatter<'a> { fn generate(&self, f : Formatter) -> Result<(), GenerationError> { let mut tree_iter = self.tree.iter().peekable(); while let Some(node) = tree_iter.next() { - match node { - ParseNode::Symbol(node) - | ParseNode::Number(node) => { - write!(f, "{}", node.leading_whitespace)?; - write!(f, "{}", escape_xml(&node.value))?; - }, - ParseNode::String(node) => { - write!(f, "{}", node.leading_whitespace)?; - write!(f, "{}", escape_xml(&node.value))? - }, - ParseNode::List { nodes: list, leading_whitespace, end_token, .. } => { - write!(f, "{}", leading_whitespace)?; - let head = list.first(); - let tag: &str; // xml <tag> name. - if let Some(head_node) = head { - if let ParseNode::Symbol(head_symbol) = head_node { - tag = &head_symbol.value; - write!(f, "<{}", tag)?; - } else { - // Error, tags can only have symbol values. - return Err(GenerationError::new("XML", - "XML tags can only be given as symbols.", - head_node.site())); - } - } else { - // Error, empty tags not supported. - return Err(GenerationError::new("XML", - "Empty lists cannot be converted into a valid XML tag.", - node.site())); - } + generate_xml_node(f, node)?; + } + Ok(()) + } +} - let mut rest = &list[1..]; - - // Declarations behave differently. - let front = tag.as_bytes()[0] as char; - if front == '!' || front == '?' { - while !rest.is_empty() { - if let Some(node) = rest[0].symbolic() { - write!(f, "{}", node.value)?; - } else if let attr@ParseNode::Attribute { .. } = &rest[0] { - write!(f, " {}", self.display_attribute(attr)?)?; - } else { - return Err(GenerationError::new("XML", - "Only identifiers and attributes are allowed in declarations.", - &rest[0].site())); - } - rest = &rest[1..]; - } - if front == '?' { - write!(f, " ?>")?; - } else { - write!(f, ">")?; - } - continue; - } +fn generate_xml_node<'a>(f: Formatter, node: &'a ParseNode<'a>) -> Result<(), GenerationError<'a>> { + match node { + ParseNode::Symbol(node) + | ParseNode::Number(node) => { + write!(f, "{}", node.leading_whitespace)?; + write!(f, "{}", escape_xml(&node.value))?; + }, + ParseNode::String(node) => { + write!(f, "{}", node.leading_whitespace)?; + write!(f, "{}", escape_xml(&node.value))? + }, + ParseNode::List { nodes: list, leading_whitespace, end_token, .. } => { + write!(f, "{}", leading_whitespace)?; + let head = list.first(); + let tag: &str; // xml <tag> name. + if let Some(head_node) = head { + if let ParseNode::Symbol(head_symbol) = head_node { + tag = &head_symbol.value; + write!(f, "<{}", tag)?; + } else { + // Error, tags can only have symbol values. + return Err(GenerationError::new("XML", + "XML tags can only be given as symbols.", + head_node.site())); + } + } else { + // Error, empty tags not supported. + return Err(GenerationError::new("XML", + "Empty lists cannot be converted into a valid XML tag.", + node.site())); + } - while let Some(attr@ParseNode::Attribute { .. }) = rest.first() { - write!(f, " {}", self.display_attribute(&attr)?)?; - rest = &rest[1..]; - } - write!(f, ">")?; + let mut rest = &list[1..]; - // See similar comment for HTML generation: - // We strip leading whitespace from the first child element in a tag. - // This is more natural w.r.t. the S-exp syntax. - let mut rest = rest.to_vec(); - let mut is_first_node_on_next_line = false; - if let Some(first_node) = rest.get_mut(0) { - is_first_node_on_next_line = first_node.leading_whitespace().contains('\n'); - if !is_first_node_on_next_line { - first_node.set_leading_whitespace("".to_owned()); - } + // Declarations behave differently. + let front = tag.as_bytes()[0] as char; + if front == '!' || front == '?' { + while !rest.is_empty() { + if let Some(node) = rest[0].symbolic() { + write!(f, "{}", node.value)?; + } else if let attr@ParseNode::Attribute { .. } = &rest[0] { + write!(f, " {}", display_attribute(attr)?)?; + } else { + return Err(GenerationError::new("XML", + "Only identifiers and attributes are allowed in declarations.", + &rest[0].site())); } + rest = &rest[1..]; + } + if front == '?' { + write!(f, " ?>")?; + } else { + write!(f, ">")?; + } + return Ok(()); + } - let xml_fmt = XMLFormatter::new(rest.to_owned().into_boxed_slice()); - let xml_fmt = Box::leak(Box::new(xml_fmt)); // FIXME: store formatter. - xml_fmt.generate(f)?; + while let Some(attr@ParseNode::Attribute { .. }) = rest.first() { + write!(f, " {}", display_attribute(&attr)?)?; + rest = &rest[1..]; + } + write!(f, ">")?; + + // See similar comment for HTML generation: + // We strip leading whitespace from the first child element in a tag. + // This is more natural w.r.t. the S-exp syntax. + let mut rest = rest.to_vec(); + let mut is_first_node_on_next_line = false; + if let Some(first_node) = rest.get_mut(0) { + is_first_node_on_next_line = first_node.leading_whitespace().contains('\n'); + if !is_first_node_on_next_line { + first_node.set_leading_whitespace("".to_owned()); + } + } - // Closing tag should be equally as spaced as opening tag (?) - if end_token.leading_whitespace.is_empty() { - if is_first_node_on_next_line || tag == "style" { - write!(f, "{}", leading_whitespace)?; - } - } else { - write!(f, "{}", end_token.leading_whitespace)?; - } + let xml_fmt = XMLFormatter::new(rest.to_owned().into_boxed_slice()); + let xml_fmt = Box::leak(Box::new(xml_fmt)); // FIXME: store formatter. + xml_fmt.generate(f)?; - write!(f, "</{}>", tag)?; - }, - _ => return Err(GenerationError::new("XML", - &format!("Unexpected {} node when generating.", node.node_type()), - &node.site())) + // Closing tag should be equally as spaced as opening tag (?) + if end_token.leading_whitespace.is_empty() { + if is_first_node_on_next_line || tag == "style" { + write!(f, "{}", leading_whitespace)?; + } + } else { + write!(f, "{}", end_token.leading_whitespace)?; } - } - Ok(()) + + write!(f, "</{}>", tag)?; + }, + _ => return Err(GenerationError::new("XML", + &format!("Unexpected {} node when generating.", node.node_type()), + &node.site())) + } + Ok(()) +} + +fn display_attribute<'a>(attr: &'a parser::ParseNode<'a>) -> Result<String, GenerationError> { + let parser::ParseNode::Attribute { keyword, node, .. } = attr else { + panic!("Passed non-attribute to display_attribute.") + }; + if let Some(symbol) = (*node).atomic() { + Ok(format!("{}=\"{}\"", keyword, symbol.value)) + } else { + Err(GenerationError::new("XML", + "Attribute can only contain symbols, numbers or strings", + &(*node).site())) } } diff --git a/src/lib.rs b/src/lib.rs @@ -9,7 +9,7 @@ use parse::{expander, parser, lexer}; use std::{fs, io, path::Path}; -pub const VERSION : (u8, u8, u8) = (0, 2, 0); +pub const VERSION: (u8, u8, u8) = (0, 2, 2); pub fn tree_builder<'a, P: AsRef<Path>>(source_path: Option<P>, string: String) -> expander::Expander<'a> {