seam

Symbolic-Expressions As Markup.
git clone git://git.knutsen.co/seam
Log | Files | Refs | README | LICENSE

commit 5be4f703571f29d8cdc06540dba0bfb32b3d005a
parent 385e056052736bf9d6e6f46fa211f561ab6ae4ca
Author: Demonstrandum <samuel@knutsen.co>
Date:   Fri, 19 Jul 2024 15:47:08 +0100

Added `(%raw)` along with a new raw-content string type.

* A string node that is `(%raw "...")` embeds in the outputted target
  format unchanged from the original string, i.e. is not
  escaped/transformed.
* Added a `(%string ...)` macro counterpart, turns any literal type
  (even a raw-content string) back into a regular string.
* Cleaned up behaviour of macros by conditionally stripping the
  leading whitespace from a macro's first argument, which is more
  sensible.  This is one of those cases where storing trailing
  whitespace instead would have been more elegant.

Diffstat:
MCargo.lock | 2+-
MCargo.toml | 2+-
MREADME.md | 12+++++++-----
Msrc/assemble/css.rs | 8++++++--
Msrc/assemble/html.rs | 13+++++++++++--
Msrc/assemble/sexp.rs | 6++++++
Msrc/assemble/text.rs | 3++-
Msrc/lib.rs | 2+-
Msrc/parse/expander.rs | 176+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Msrc/parse/parser.rs | 51++++++++++++++++++++++++++++++++++++++++++---------
10 files changed, 213 insertions(+), 62 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock @@ -168,7 +168,7 @@ dependencies = [ [[package]] name = "seam" -version = "0.2.5" +version = "0.3.0" dependencies = [ "chrono", "colored", diff --git a/Cargo.toml b/Cargo.toml @@ -4,7 +4,7 @@ description = "Symbolic Expressions As Markup." keywords = ["markup", "lisp", "macro", "symbolic-expression", "sexp"] license-file = "LICENSE" homepage = "https://git.knutsen.co/seam" -version = "0.2.5" +version = "0.3.0" authors = ["Demonstrandum <samuel@knutsen.co>"] edition = "2021" diff --git a/README.md b/README.md @@ -91,21 +91,23 @@ seam --sexp <<< '(hello (%define subject world) %subject)' ``` ## Checklist - - [ ] `(%define x %body)` evaluates `%body` eagerly (at definition), + - [x] `(%define x %body)` evaluates `%body` eagerly (at definition), while `(%define (y) %body)` only evaluates `%body` per call-site `(%y)`. - [x] Namespace macro `(%namespace ns (%include "file.sex"))` will prefix all definitions in its body with `ns/`, e.g. `%ns/defn`. Allows for a customizable separator, e.g. `(%namespace ns :separator "-" ...)` will allow for writing `%ns-defn`. Otherwise, the macro leaves the content produced by the body completely unchanged. - [x] Command line `-I` include directory. - - [ ] First argument (of body) in a macro invocation should have its whitespace stripped. + - [x] First argument in a macro invocation should have its whitespace stripped. - [x] `(%os/env ENV_VAR)` environment variable macro. - - [ ] `(%to-string ...)`, `(%join ...)`, `(%map ...)`, `(%filter ...)` macros. + - [ ] Lazy evaluation for *user* macros (like in `ifdef`) with use of new `(%eval ...)` macro. + - [ ] `(%string ...)`, `(%join ...)`, `(%map ...)`, `(%filter ...)` macros. - [ ] Escape evaluating macros with `\%`. - [x] `(%format "{}")` macro with Rust's `format` syntax. e.g. `(%format "Hello {}, age {age:0>2}" "Sam" :age 9)` - - [ ] Add `(%raw ...)` macro which takes a string and leaves it unchanged in the final output. Can also take any othe source code, for which it just embeds the expanded code (plain-text formatter). + - [x] Add `(%raw ...)` macro which takes a string and leaves it unchanged in the final output. + - [ ] `(%formatter/text)` can take any other source code, for which it just embeds the expanded code (plain-text formatter). - [ ] `(%formatter/html ...)` etc. which call the respective available formatters. - [ ] Implement lexical scope by letting macros store a copy of the scope they were defined in (or a reference?). - - [ ] `(%embed "/path")` macro, like `%include`, but just returns the file contents as a string. + - [x] `(%embed "/path")` macro, like `%include`, but just returns the file contents as a string. - [ ] Variadic arguments via `&rest` syntax. - [ ] Delayed evaluation of macros by `%(...)` syntax. [ ] For example `%(f x y)` is the same as `(%f x y)`, so you can have `(%define uneval f x)` and then write `%(%uneval y)`. diff --git a/src/assemble/css.rs b/src/assemble/css.rs @@ -102,10 +102,13 @@ fn convert_value<'a>(node: &'a ParseNode<'a>) -> Result<String, GenerationError< | ParseNode::Symbol(node) | ParseNode::String(node) => Ok(if node.value.chars().any(|c| c.is_whitespace()) { - format!("\"{}\"", node.value) + format!("{:?}", node.value) } else { node.value.to_owned() }), + ParseNode::Raw(node) => { + Ok(node.value.to_owned()) + }, ParseNode::Attribute { .. } => Err(GenerationError::new("CSS-value", "Incompatible structure (attribute) found in CSS \ property value.", @@ -265,7 +268,8 @@ impl<'a> MarkupFormatter for CSSFormatter<'a> { }, ParseNode::Symbol(node) | ParseNode::Number(node) - | ParseNode::String(node) => { + | ParseNode::String(node) + | ParseNode::Raw(node) => { let site = node.site.to_owned(); return Err(GenerationError::new("CSS", "Symbolic node not expected here, CSS documents \ diff --git a/src/assemble/html.rs b/src/assemble/html.rs @@ -45,6 +45,11 @@ impl<'a> HTMLFormatter<'a> { write!(f, "{}", node.leading_whitespace)?; write!(f, "{}", escape_xml(&node.value))?; }, + ParseNode::Raw(node) => { + // Don't escape any symbols in a raw-content string. + write!(f, "{}", node.leading_whitespace)?; + write!(f, "{}", &node.value)?; + }, ParseNode::List { nodes: list, leading_whitespace, end_token, .. } => { write!(f, "{}", leading_whitespace)?; let head = list.first(); @@ -85,9 +90,13 @@ impl<'a> HTMLFormatter<'a> { return Ok(()); } - while let Some(ParseNode::Attribute { node, keyword, .. }) = rest.first() { + while let Some(ParseNode::Attribute { node, keyword, leading_whitespace, .. }) = rest.first() { if let Some(atom) = (*node).atomic() { - write!(f, " {}=\"{}\"", keyword, atom.value)?; + let leading_whitespace + = if leading_whitespace.is_empty() + { " " } else { leading_whitespace }; + write!(f, "{}", leading_whitespace)?; + write!(f, "{}=\"{}\"", keyword, atom.value)?; rest = &rest[1..]; } else { // Error! Cannot be non atomic. diff --git a/src/assemble/sexp.rs b/src/assemble/sexp.rs @@ -41,6 +41,12 @@ impl<'a> SExpFormatter<'a> { write!(f, "{}", node.leading_whitespace)?; write!(f, "{:?}", node.value)?; }, + ParseNode::Raw(node) => { + // This is the only place we can't really expand + // the `(%raw ...)` macro. It is meaningful. + write!(f, "{}", node.leading_whitespace)?; + write!(f, "(%raw {:?})", node.value)?; + }, ParseNode::List { nodes, leading_whitespace, end_token, .. } => { write!(f, "{}", leading_whitespace)?; write!(f, "(")?; diff --git a/src/assemble/text.rs b/src/assemble/text.rs @@ -33,7 +33,8 @@ impl<'a> PlainTextFormatter<'a> { match node { ParseNode::Symbol(node) | ParseNode::Number(node) - | ParseNode::String(node) => { + | ParseNode::String(node) + | ParseNode::Raw(node) => { write!(f, "{}", node.leading_whitespace)?; write!(f, "{}", node.value)?; }, diff --git a/src/lib.rs b/src/lib.rs @@ -9,7 +9,7 @@ use parse::{expander, parser, lexer}; use std::{fs, io, path::Path}; -pub const VERSION: (u8, u8, u8) = (0, 2, 5); +pub const VERSION: (u8, u8, u8) = (0, 3, 0); pub fn tree_builder<'a, P: AsRef<Path>>(source_path: Option<P>, string: String) -> expander::Expander<'a> { diff --git a/src/parse/expander.rs b/src/parse/expander.rs @@ -178,11 +178,14 @@ impl<'a> Expander<'a> { }; // If head is atomic, we assign to a 'variable'. + // Aditionally, we evaluate its body *eagerly*. let def_macro = if let Some(variable) = head.atomic() { + let nodes = nodes.to_owned().into_boxed_slice(); + let body = self.expand_nodes(nodes)?; Rc::new(Macro { name: variable.value.clone(), params: Box::new([]), - body: nodes.to_owned().into_boxed_slice(), + body, }) } else { // Otherwise, we are assigning to a 'function'. let ParseNode::List { nodes: defn_nodes, .. } = head else { @@ -332,6 +335,47 @@ impl<'a> Expander<'a> { Ok(expanded_tree.into_boxed_slice()) } + fn expand_embed_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params: Box<[ParseNode<'a>]> = self.expand_nodes(params)?; + let [path_node] = &*params else { + return Err(ExpansionError( + format!("Incorrect number of arguments \ + to `%embed' macro. Got {}, expected {}.", + params.len(), 1), + node.site().to_owned())); + }; + + let Some(Node { value: path, site, .. }) = path_node.atomic() else { + return Err(ExpansionError( + "Bad argument to `%embed' macro.\n\ + Expected a path, but did not get any value + that could be interpreted as a path.".to_string(), + path_node.site().to_owned())) + }; + + // Open file, and read contents! + let embed_error = |error: Box<dyn Display>| ExpansionError( + format!("{}", error), site.to_owned()); + let mut value: Result<String, ExpansionError> = Err( + embed_error(Box::new("No path tested."))); + // Try all include directories until one is succesful. + for include_dir in &self.includes { + let path = include_dir.join(path); + value = std::fs::read_to_string(path) + .map_err(|err| embed_error(Box::new(err))); + if value.is_ok() { break; } + } + let value = value?; + Ok(Box::new([ + ParseNode::String(Node { + value, + site: node.owned_site(), + leading_whitespace: node.leading_whitespace().to_owned(), + }), + ])) + } + fn expand_date_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) -> Result<ParseTree<'a>, ExpansionError<'a>> { let params = self.expand_nodes(params)?; @@ -456,43 +500,6 @@ impl<'a> Expander<'a> { } } - fn expand_macro(&self, name: &str, node: &ParseNode<'a>, params: ParseTree<'a>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - // Eagerly evaluate parameters passed to macro invocation. - let params = self.expand_nodes(params)?; - - let Some(mac) = self.get_variable(name) else { - return Err(ExpansionError::new( - &format!("Macro not found (`{}').", name), &node.owned_site())) - }; - - // Instance of expansion subcontext. - let subcontext = self.create_subcontext(); - // Check enough arguments were given. - if params.len() != mac.params.len() { - return Err(ExpansionError( - format!("`%{}` macro expects {} arguments, \ - but {} were given.", &mac.name, mac.params.len(), - params.len()), node.site().to_owned())); - } - // Define arguments for body. - for i in 0..params.len() { - let arg_macro = Macro { - name: mac.params[i].to_owned(), - params: Box::new([]), - body: Box::new([params[i].clone()]), //< Argument as evaluated at call-site. - }; - subcontext.insert_variable(mac.params[i].to_string(), Rc::new(arg_macro)); - } - // Expand body. - let mut expanded = subcontext.expand_nodes(mac.body.clone())?.to_vec(); - // Inherit leading whitespace of invocation. - if let Some(first_node) = expanded.get_mut(0) { - first_node.set_leading_whitespace(node.leading_whitespace().to_owned()); - } - Ok(expanded.into_boxed_slice()) - } - fn expand_namespace_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) -> Result<ParseTree<'a>, ExpansionError<'a>> { // Start evaluating all the arguments to the macro in a separate context. @@ -533,6 +540,87 @@ impl<'a> Expander<'a> { Ok(args.cloned().collect()) } + fn expand_raw_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let mut builder = String::new(); + let args = self.expand_nodes(params)?; + for arg in args { + let Some(Node { value, leading_whitespace, .. }) = arg.atomic() else { + return Err(ExpansionError( + format!("Expected a literal, found a {} node instead.", arg.node_type()), + arg.owned_site())); + }; + builder += leading_whitespace; + builder += value; + } + Ok(Box::new([ + ParseNode::Raw(Node { + value: builder, + site: node.owned_site(), + leading_whitespace: node.leading_whitespace().to_owned(), + }) + ])) + } + + fn expand_string_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let mut builder = String::new(); + let args = self.expand_nodes(params)?; + for arg in args { + let Some(Node { value, leading_whitespace, .. }) = arg.atomic() else { + return Err(ExpansionError( + format!("Expected a literal, found a {} node instead.", arg.node_type()), + arg.owned_site())); + }; + builder += leading_whitespace; + builder += value; + } + Ok(Box::new([ + ParseNode::String(Node { + value: builder, + site: node.owned_site(), + leading_whitespace: node.leading_whitespace().to_owned(), + }) + ])) + } + + fn expand_macro(&self, name: &str, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + // Eagerly evaluate parameters passed to macro invocation. + let params = self.expand_nodes(params)?; + + let Some(mac) = self.get_variable(name) else { + return Err(ExpansionError::new( + &format!("Macro not found (`{}').", name), &node.owned_site())) + }; + + // Instance of expansion subcontext. + let subcontext = self.create_subcontext(); + // Check enough arguments were given. + if params.len() != mac.params.len() { + return Err(ExpansionError( + format!("`%{}` macro expects {} arguments, \ + but {} were given.", &mac.name, mac.params.len(), + params.len()), node.site().to_owned())); + } + // Define arguments for body. + for i in 0..params.len() { + let arg_macro = Macro { + name: mac.params[i].to_owned(), + params: Box::new([]), + body: Box::new([params[i].clone()]), //< Argument as evaluated at call-site. + }; + subcontext.insert_variable(mac.params[i].to_string(), Rc::new(arg_macro)); + } + // Expand body. + let mut expanded = subcontext.expand_nodes(mac.body.clone())?.to_vec(); + // Inherit leading whitespace of invocation. + if let Some(first_node) = expanded.get_mut(0) { + first_node.set_leading_whitespace(node.leading_whitespace().to_owned()); + } + Ok(expanded.into_boxed_slice()) + } + fn expand_invocation(&self, name: &str, //< Name of macro (e.g. %define). node: &ParseNode<'a>, //< Node for `%'-macro invocation. @@ -543,7 +631,10 @@ impl<'a> Expander<'a> { match name { "define" => self.expand_define_macro(node, params), "ifdef" => self.expand_ifdef_macro(node, params), + "raw" => self.expand_raw_macro(node, params), + "string" => self.expand_string_macro(node, params), "include" => self.expand_include_macro(node, params), + "embed" => self.expand_embed_macro(node, params), "namespace" => self.expand_namespace_macro(node, params), "date" => self.expand_date_macro(node, params), "log" => self.expand_log_macro(node, params), @@ -592,8 +683,13 @@ impl<'a> Expander<'a> { if name.starts_with("%") { // Rebuild node... let name = &name[1..]; - // Clean macro arguments from whitespace tokens. - let params: Vec<ParseNode> = call.collect(); + let mut params: Vec<ParseNode> = call.collect(); + // Delete leading whitespace of leading argument. + if let Some(leading) = params.first_mut() { + if !leading.leading_whitespace().contains('\n') { + leading.set_leading_whitespace(String::from("")); + } + } return self.expand_invocation(name, node, params.into_boxed_slice()); } } diff --git a/src/parse/parser.rs b/src/parse/parser.rs @@ -4,6 +4,9 @@ use descape::UnescapeExt; use super::{lexer::{LexError, Lexer}, tokens::{Kind, Site, Token}}; +/// The [`Node`] type represents what atomic/literals are parsed +/// into; i.e. not compound types (e.g. lists, attributes). +/// These are just a common storage for the literals in [`ParseNode`]. #[derive(Debug, Clone)] pub struct Node<'a> { pub value: String, @@ -21,11 +24,15 @@ impl<'a> Node<'a> { } } +/// Parse nodes are the components of the syntax tree that +/// the source code is translated into. +/// These nodes are also produced at compile-time by the macro expander. #[derive(Debug, Clone)] pub enum ParseNode<'a> { Symbol(Node<'a>), Number(Node<'a>), String(Node<'a>), + Raw(Node<'a>), //< Raw-content strings are not parsed, only expanded by macros. List { nodes: Box<[ParseNode<'a>]>, site: Site<'a>, @@ -41,83 +48,109 @@ pub enum ParseNode<'a> { } impl<'a> ParseNode<'a> { + /// Unwrap a literal node if it is a symbol or number. pub fn symbolic(&self) -> Option<&Node<'a>> { match self { Self::Symbol(ref node) | Self::Number(ref node) => Some(node), - _ => None + _ => None, } } + /// Unwrap string-like nodes. + pub fn string(&self) -> Option<&Node<'a>> { + match self { + Self::String(ref node) | Self::Raw(ref node) => Some(node), + _ => None, + } + } + + /// Unwrap literal (atomic) nodes into their underlying [`Node`]. pub fn atomic(&self) -> Option<&Node<'a>> { match self { Self::Symbol(ref node) | Self::Number(ref node) - | Self::String(ref node) => Some(node), - _ => None + | Self::String(ref node) + | Self::Raw(ref node) => Some(node), + _ => None, } } + /// Same as [`Self::atomic`], but consumes the node, + /// returning an owned [`Node`]. pub fn into_atomic(self) -> Option<Node<'a>> { match self { Self::Symbol(node) | Self::Number(node) | Self::String(node) => Some(node), - _ => None + _ => None, } } + /// Get a reference to the parse node's underlying [`Site`]. pub fn site(&self) -> &Site<'a> { match self { Self::Symbol(ref node) | Self::Number(ref node) - | Self::String(ref node) => &node.site, + | Self::String(ref node) + | Self::Raw(ref node) => &node.site, Self::List { ref site, .. } => site, Self::Attribute { ref site, .. } => site, } } + /// Clone the underlying [`Site`] of this parse node. pub fn owned_site(&self) -> Site<'a> { match self { Self::Symbol(node) | Self::Number(node) - | Self::String(node) => node.site.clone(), + | Self::String(node) + | Self::Raw(node) => node.site.clone(), Self::List { site, .. } => site.clone(), Self::Attribute { site, .. } => site.clone(), } } + /// Get a reference to the underlying leading whitespace string + /// of this parse node. pub fn leading_whitespace(&self) -> &str { match self { Self::Symbol(ref node) | Self::Number(ref node) - | Self::String(ref node) => &node.leading_whitespace, + | Self::String(ref node) + | Self::Raw(ref node) => &node.leading_whitespace, Self::List { ref leading_whitespace, .. } => leading_whitespace, Self::Attribute { ref leading_whitespace, .. } => leading_whitespace, } } + /// Modify the underlying leading whitespace stored for this parse node. pub fn set_leading_whitespace(&mut self, whitespace: String) { match self { Self::Symbol(ref mut node) | Self::Number(ref mut node) - | Self::String(ref mut node) => node.leading_whitespace = whitespace, + | Self::String(ref mut node) + | Self::Raw(ref mut node) => node.leading_whitespace = whitespace, Self::List { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace, Self::Attribute { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace, }; } + /// Get a `&'static str` string name of what type of parse node this is. pub fn node_type(&self) -> &'static str { match self { Self::Symbol(..) => "symbol", Self::Number(..) => "number", Self::String(..) => "string", + Self::Raw(..) => "raw-content string", Self::List { .. } => "list", Self::Attribute { .. } => "attribute", } } } +/// An array of parse nodes, like in a [`ParseNode::List`], never grows. +/// Hence we prefer the `Box<[...]>` representation over a `Vec<...>`. pub type ParseTree<'a> = Box<[ParseNode<'a>]>; #[derive(Debug, Clone)] @@ -333,7 +366,7 @@ impl<'a> SearchTree<'a> for ParseNode<'a> { None } }, - ParseNode::String(name) => { + ParseNode::String(name) | ParseNode::Raw(name) => { if kind.is_a(SearchType::String) && is_equal(&name.value) { Some(self) } else {