commit 5be4f703571f29d8cdc06540dba0bfb32b3d005a
parent 385e056052736bf9d6e6f46fa211f561ab6ae4ca
Author: Demonstrandum <samuel@knutsen.co>
Date:   Fri, 19 Jul 2024 15:47:08 +0100
Added `(%raw)` along with a new raw-content string type.
* A string node that is `(%raw "...")` embeds in the outputted target
  format unchanged from the original string, i.e. is not
  escaped/transformed.
* Added a `(%string ...)` macro counterpart, turns any literal type
  (even a raw-content string) back into a regular string.
* Cleaned up behaviour of macros by conditionally stripping the
  leading whitespace from a macro's first argument, which is more
  sensible.  This is one of those cases where storing trailing
  whitespace instead would have been more elegant.
Diffstat:
10 files changed, 213 insertions(+), 62 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
@@ -168,7 +168,7 @@ dependencies = [
 
 [[package]]
 name = "seam"
-version = "0.2.5"
+version = "0.3.0"
 dependencies = [
  "chrono",
  "colored",
diff --git a/Cargo.toml b/Cargo.toml
@@ -4,7 +4,7 @@ description = "Symbolic Expressions As Markup."
 keywords = ["markup", "lisp", "macro", "symbolic-expression", "sexp"]
 license-file = "LICENSE"
 homepage = "https://git.knutsen.co/seam"
-version = "0.2.5"
+version = "0.3.0"
 authors = ["Demonstrandum <samuel@knutsen.co>"]
 edition = "2021"
 
diff --git a/README.md b/README.md
@@ -91,21 +91,23 @@ seam --sexp <<< '(hello (%define subject world) %subject)'
 ```
 
 ## Checklist
- - [ ] `(%define x %body)` evaluates `%body` eagerly (at definition),
+ - [x] `(%define x %body)` evaluates `%body` eagerly (at definition),
        while `(%define (y) %body)` only evaluates `%body` per call-site `(%y)`.
  - [x] Namespace macro `(%namespace ns (%include "file.sex"))` will prefix all definitions in its body with `ns/`, e.g. `%ns/defn`.
        Allows for a customizable separator, e.g. `(%namespace ns :separator "-" ...)` will allow for writing `%ns-defn`.
        Otherwise, the macro leaves the content produced by the body completely unchanged.
  - [x] Command line `-I` include directory.
- - [ ] First argument (of body) in a macro invocation should have its whitespace stripped.
+ - [x] First argument in a macro invocation should have its whitespace stripped.
  - [x] `(%os/env ENV_VAR)` environment variable macro.
- - [ ] `(%to-string ...)`, `(%join ...)`, `(%map ...)`, `(%filter ...)` macros.
+ - [ ] Lazy evaluation for *user* macros (like in `ifdef`) with use of new `(%eval ...)` macro.
+ - [ ] `(%string ...)`, `(%join ...)`, `(%map ...)`, `(%filter ...)` macros.
  - [ ] Escape evaluating macros with `\%`.
  - [x] `(%format "{}")` macro with Rust's `format` syntax. e.g. `(%format "Hello {}, age {age:0>2}" "Sam" :age 9)`
- - [ ] Add `(%raw ...)` macro which takes a string and leaves it unchanged in the final output.  Can also take any othe source code, for which it just embeds the expanded code (plain-text formatter).
+ - [x] Add `(%raw ...)` macro which takes a string and leaves it unchanged in the final output.
+ - [ ] `(%formatter/text)` can take any other source code, for which it just embeds the expanded code (plain-text formatter).
  - [ ] `(%formatter/html ...)` etc. which call the respective available formatters.
  - [ ] Implement lexical scope by letting macros store a copy of the scope they were defined in (or a reference?).
- - [ ] `(%embed "/path")` macro, like `%include`, but just returns the file contents as a string.
+ - [x] `(%embed "/path")` macro, like `%include`, but just returns the file contents as a string.
  - [ ] Variadic arguments via `&rest` syntax.
  - [ ] Delayed evaluation of macros by `%(...)` syntax.
    [ ] For example `%(f x y)` is the same as `(%f x y)`, so you can have `(%define uneval f x)` and then write `%(%uneval y)`.
diff --git a/src/assemble/css.rs b/src/assemble/css.rs
@@ -102,10 +102,13 @@ fn convert_value<'a>(node: &'a ParseNode<'a>) -> Result<String, GenerationError<
         | ParseNode::Symbol(node)
         | ParseNode::String(node) =>
             Ok(if node.value.chars().any(|c| c.is_whitespace()) {
-                format!("\"{}\"", node.value)
+                format!("{:?}", node.value)
             } else {
                 node.value.to_owned()
             }),
+        ParseNode::Raw(node) => {
+            Ok(node.value.to_owned())
+        },
         ParseNode::Attribute { .. } => Err(GenerationError::new("CSS-value",
                 "Incompatible structure (attribute) found in CSS \
                  property value.",
@@ -265,7 +268,8 @@ impl<'a> MarkupFormatter for CSSFormatter<'a> {
                 },
                 ParseNode::Symbol(node)
                 | ParseNode::Number(node)
-                | ParseNode::String(node) => {
+                | ParseNode::String(node)
+                | ParseNode::Raw(node) => {
                     let site = node.site.to_owned();
                     return Err(GenerationError::new("CSS",
                         "Symbolic node not expected here, CSS documents \
diff --git a/src/assemble/html.rs b/src/assemble/html.rs
@@ -45,6 +45,11 @@ impl<'a> HTMLFormatter<'a> {
                 write!(f, "{}", node.leading_whitespace)?;
                 write!(f, "{}", escape_xml(&node.value))?;
             },
+            ParseNode::Raw(node) => {
+                // Don't escape any symbols in a raw-content string.
+                write!(f, "{}", node.leading_whitespace)?;
+                write!(f, "{}", &node.value)?;
+            },
             ParseNode::List { nodes: list, leading_whitespace, end_token, .. } => {
                 write!(f, "{}", leading_whitespace)?;
                 let head = list.first();
@@ -85,9 +90,13 @@ impl<'a> HTMLFormatter<'a> {
                     return Ok(());
                 }
 
-                while let Some(ParseNode::Attribute { node, keyword, .. }) = rest.first() {
+                while let Some(ParseNode::Attribute { node, keyword, leading_whitespace, .. }) = rest.first() {
                     if let Some(atom) = (*node).atomic() {
-                        write!(f, " {}=\"{}\"", keyword, atom.value)?;
+                        let leading_whitespace
+                            = if leading_whitespace.is_empty()
+                              { " " } else { leading_whitespace };
+                        write!(f, "{}", leading_whitespace)?;
+                        write!(f, "{}=\"{}\"", keyword, atom.value)?;
                         rest = &rest[1..];
                     } else {
                         // Error! Cannot be non atomic.
diff --git a/src/assemble/sexp.rs b/src/assemble/sexp.rs
@@ -41,6 +41,12 @@ impl<'a> SExpFormatter<'a> {
                 write!(f, "{}", node.leading_whitespace)?;
                 write!(f, "{:?}", node.value)?;
             },
+            ParseNode::Raw(node) => {
+                // This is the only place we can't really expand
+                // the `(%raw ...)` macro. It is meaningful.
+                write!(f, "{}", node.leading_whitespace)?;
+                write!(f, "(%raw {:?})", node.value)?;
+            },
             ParseNode::List { nodes, leading_whitespace, end_token, .. } => {
                 write!(f, "{}", leading_whitespace)?;
                 write!(f, "(")?;
diff --git a/src/assemble/text.rs b/src/assemble/text.rs
@@ -33,7 +33,8 @@ impl<'a> PlainTextFormatter<'a> {
         match node {
             ParseNode::Symbol(node)
           | ParseNode::Number(node)
-          | ParseNode::String(node) => {
+          | ParseNode::String(node)
+          | ParseNode::Raw(node) => {
                 write!(f, "{}", node.leading_whitespace)?;
                 write!(f, "{}", node.value)?;
             },
diff --git a/src/lib.rs b/src/lib.rs
@@ -9,7 +9,7 @@ use parse::{expander, parser, lexer};
 
 use std::{fs, io, path::Path};
 
-pub const VERSION: (u8, u8, u8) = (0, 2, 5);
+pub const VERSION: (u8, u8, u8) = (0, 3, 0);
 
 pub fn tree_builder<'a, P: AsRef<Path>>(source_path: Option<P>, string: String)
     -> expander::Expander<'a> {
diff --git a/src/parse/expander.rs b/src/parse/expander.rs
@@ -178,11 +178,14 @@ impl<'a> Expander<'a> {
         };
 
         // If head is atomic, we assign to a 'variable'.
+        // Aditionally, we evaluate its body *eagerly*.
         let def_macro = if let Some(variable) = head.atomic() {
+            let nodes = nodes.to_owned().into_boxed_slice();
+            let body = self.expand_nodes(nodes)?;
             Rc::new(Macro {
                 name: variable.value.clone(),
                 params: Box::new([]),
-                body: nodes.to_owned().into_boxed_slice(),
+                body,
             })
         } else {  // Otherwise, we are assigning to a 'function'.
             let ParseNode::List { nodes: defn_nodes, .. } = head else {
@@ -332,6 +335,47 @@ impl<'a> Expander<'a> {
         Ok(expanded_tree.into_boxed_slice())
     }
 
+    fn expand_embed_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>)
+    -> Result<ParseTree<'a>, ExpansionError<'a>> {
+        let params: Box<[ParseNode<'a>]> = self.expand_nodes(params)?;
+        let [path_node] = &*params else {
+            return Err(ExpansionError(
+                format!("Incorrect number of arguments \
+                    to `%embed' macro. Got {}, expected {}.",
+                    params.len(), 1),
+                node.site().to_owned()));
+        };
+
+        let Some(Node { value: path, site, .. }) = path_node.atomic()  else {
+            return Err(ExpansionError(
+                "Bad argument to `%embed' macro.\n\
+                    Expected a path, but did not get any value
+                    that could be interpreted as a path.".to_string(),
+                path_node.site().to_owned()))
+        };
+
+        // Open file, and read contents!
+        let embed_error = |error: Box<dyn Display>| ExpansionError(
+            format!("{}", error), site.to_owned());
+        let mut value: Result<String, ExpansionError> = Err(
+            embed_error(Box::new("No path tested.")));
+        // Try all include directories until one is succesful.
+        for include_dir in &self.includes {
+            let path = include_dir.join(path);
+            value = std::fs::read_to_string(path)
+                .map_err(|err| embed_error(Box::new(err)));
+            if value.is_ok() { break; }
+        }
+        let value = value?;
+        Ok(Box::new([
+            ParseNode::String(Node {
+                value,
+                site: node.owned_site(),
+                leading_whitespace: node.leading_whitespace().to_owned(),
+            }),
+        ]))
+    }
+
     fn expand_date_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>)
     -> Result<ParseTree<'a>, ExpansionError<'a>> {
         let params = self.expand_nodes(params)?;
@@ -456,43 +500,6 @@ impl<'a> Expander<'a> {
         }
     }
 
-    fn expand_macro(&self, name: &str, node: &ParseNode<'a>, params: ParseTree<'a>)
-    -> Result<ParseTree<'a>, ExpansionError<'a>> {
-        // Eagerly evaluate parameters passed to macro invocation.
-        let params = self.expand_nodes(params)?;
-
-        let Some(mac) = self.get_variable(name) else {
-            return Err(ExpansionError::new(
-                &format!("Macro not found (`{}').", name), &node.owned_site()))
-        };
-
-        // Instance of expansion subcontext.
-        let subcontext = self.create_subcontext();
-        // Check enough arguments were given.
-        if params.len() != mac.params.len() {
-            return Err(ExpansionError(
-                format!("`%{}` macro expects {} arguments, \
-                        but {} were given.", &mac.name, mac.params.len(),
-                        params.len()), node.site().to_owned()));
-        }
-        // Define arguments for body.
-        for i in 0..params.len() {
-            let arg_macro = Macro {
-                name: mac.params[i].to_owned(),
-                params: Box::new([]),
-                body: Box::new([params[i].clone()]), //< Argument as evaluated at call-site.
-            };
-            subcontext.insert_variable(mac.params[i].to_string(), Rc::new(arg_macro));
-        }
-        // Expand body.
-        let mut expanded = subcontext.expand_nodes(mac.body.clone())?.to_vec();
-        // Inherit leading whitespace of invocation.
-        if let Some(first_node) = expanded.get_mut(0) {
-            first_node.set_leading_whitespace(node.leading_whitespace().to_owned());
-        }
-        Ok(expanded.into_boxed_slice())
-    }
-
     fn expand_namespace_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>)
     -> Result<ParseTree<'a>, ExpansionError<'a>> {
         // Start evaluating all the arguments to the macro in a separate context.
@@ -533,6 +540,87 @@ impl<'a> Expander<'a> {
         Ok(args.cloned().collect())
     }
 
+    fn expand_raw_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>)
+    -> Result<ParseTree<'a>, ExpansionError<'a>> {
+        let mut builder = String::new();
+        let args = self.expand_nodes(params)?;
+        for arg in args {
+            let Some(Node { value, leading_whitespace, .. }) = arg.atomic() else {
+                return Err(ExpansionError(
+                    format!("Expected a literal, found a {} node instead.", arg.node_type()),
+                    arg.owned_site()));
+            };
+            builder += leading_whitespace;
+            builder += value;
+        }
+        Ok(Box::new([
+            ParseNode::Raw(Node {
+                value: builder,
+                site: node.owned_site(),
+                leading_whitespace: node.leading_whitespace().to_owned(),
+            })
+        ]))
+    }
+
+    fn expand_string_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>)
+    -> Result<ParseTree<'a>, ExpansionError<'a>> {
+        let mut builder = String::new();
+        let args = self.expand_nodes(params)?;
+        for arg in args {
+            let Some(Node { value, leading_whitespace, .. }) = arg.atomic() else {
+                return Err(ExpansionError(
+                    format!("Expected a literal, found a {} node instead.", arg.node_type()),
+                    arg.owned_site()));
+            };
+            builder += leading_whitespace;
+            builder += value;
+        }
+        Ok(Box::new([
+            ParseNode::String(Node {
+                value: builder,
+                site: node.owned_site(),
+                leading_whitespace: node.leading_whitespace().to_owned(),
+            })
+        ]))
+    }
+
+    fn expand_macro(&self, name: &str, node: &ParseNode<'a>, params: ParseTree<'a>)
+    -> Result<ParseTree<'a>, ExpansionError<'a>> {
+        // Eagerly evaluate parameters passed to macro invocation.
+        let params = self.expand_nodes(params)?;
+
+        let Some(mac) = self.get_variable(name) else {
+            return Err(ExpansionError::new(
+                &format!("Macro not found (`{}').", name), &node.owned_site()))
+        };
+
+        // Instance of expansion subcontext.
+        let subcontext = self.create_subcontext();
+        // Check enough arguments were given.
+        if params.len() != mac.params.len() {
+            return Err(ExpansionError(
+                format!("`%{}` macro expects {} arguments, \
+                        but {} were given.", &mac.name, mac.params.len(),
+                        params.len()), node.site().to_owned()));
+        }
+        // Define arguments for body.
+        for i in 0..params.len() {
+            let arg_macro = Macro {
+                name: mac.params[i].to_owned(),
+                params: Box::new([]),
+                body: Box::new([params[i].clone()]), //< Argument as evaluated at call-site.
+            };
+            subcontext.insert_variable(mac.params[i].to_string(), Rc::new(arg_macro));
+        }
+        // Expand body.
+        let mut expanded = subcontext.expand_nodes(mac.body.clone())?.to_vec();
+        // Inherit leading whitespace of invocation.
+        if let Some(first_node) = expanded.get_mut(0) {
+            first_node.set_leading_whitespace(node.leading_whitespace().to_owned());
+        }
+        Ok(expanded.into_boxed_slice())
+    }
+
     fn expand_invocation(&self,
                          name: &str, //< Name of macro (e.g. %define).
                          node: &ParseNode<'a>, //< Node for `%'-macro invocation.
@@ -543,7 +631,10 @@ impl<'a> Expander<'a> {
         match name {
             "define"    => self.expand_define_macro(node, params),
             "ifdef"     => self.expand_ifdef_macro(node, params),
+            "raw"       => self.expand_raw_macro(node, params),
+            "string"    => self.expand_string_macro(node, params),
             "include"   => self.expand_include_macro(node, params),
+            "embed"     => self.expand_embed_macro(node, params),
             "namespace" => self.expand_namespace_macro(node, params),
             "date"      => self.expand_date_macro(node, params),
             "log"       => self.expand_log_macro(node, params),
@@ -592,8 +683,13 @@ impl<'a> Expander<'a> {
                     if name.starts_with("%") {
                         // Rebuild node...
                         let name = &name[1..];
-                        // Clean macro arguments from whitespace tokens.
-                        let params: Vec<ParseNode> = call.collect();
+                        let mut params: Vec<ParseNode> = call.collect();
+                        // Delete leading whitespace of leading argument.
+                        if let Some(leading) = params.first_mut() {
+                            if !leading.leading_whitespace().contains('\n') {
+                                leading.set_leading_whitespace(String::from(""));
+                            }
+                        }
                         return self.expand_invocation(name, node, params.into_boxed_slice());
                     }
                 }
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
@@ -4,6 +4,9 @@ use descape::UnescapeExt;
 
 use super::{lexer::{LexError, Lexer}, tokens::{Kind, Site, Token}};
 
+/// The [`Node`] type represents what atomic/literals are parsed
+/// into; i.e. not compound types (e.g. lists, attributes).
+/// These are just a common storage for the literals in [`ParseNode`].
 #[derive(Debug, Clone)]
 pub struct Node<'a> {
     pub value: String,
@@ -21,11 +24,15 @@ impl<'a> Node<'a> {
     }
 }
 
+/// Parse nodes are the components of the syntax tree that
+/// the source code is translated into.
+/// These nodes are also produced at compile-time by the macro expander.
 #[derive(Debug, Clone)]
 pub enum ParseNode<'a> {
     Symbol(Node<'a>),
     Number(Node<'a>),
     String(Node<'a>),
+    Raw(Node<'a>), //< Raw-content strings are not parsed, only expanded by macros.
     List {
         nodes: Box<[ParseNode<'a>]>,
         site: Site<'a>,
@@ -41,83 +48,109 @@ pub enum ParseNode<'a> {
 }
 
 impl<'a> ParseNode<'a> {
+    /// Unwrap a literal node if it is a symbol or number.
     pub fn symbolic(&self) -> Option<&Node<'a>> {
         match self {
             Self::Symbol(ref node)
             | Self::Number(ref node) => Some(node),
-            _ => None
+            _ => None,
         }
     }
 
+    /// Unwrap string-like nodes.
+    pub fn string(&self) -> Option<&Node<'a>> {
+        match self {
+            Self::String(ref node) | Self::Raw(ref node) => Some(node),
+            _ => None,
+        }
+    }
+
+    /// Unwrap literal (atomic) nodes into their underlying [`Node`].
     pub fn atomic(&self) -> Option<&Node<'a>> {
         match self {
             Self::Symbol(ref node)
             | Self::Number(ref node)
-            | Self::String(ref node) => Some(node),
-            _ => None
+            | Self::String(ref node)
+            | Self::Raw(ref node) => Some(node),
+            _ => None,
         }
     }
 
+    /// Same as [`Self::atomic`], but consumes the node,
+    /// returning an owned [`Node`].
     pub fn into_atomic(self) -> Option<Node<'a>> {
         match self {
             Self::Symbol(node)
             | Self::Number(node)
             | Self::String(node) => Some(node),
-            _ => None
+            _ => None,
         }
     }
 
+    /// Get a reference to the parse node's underlying [`Site`].
     pub fn site(&self) -> &Site<'a> {
         match self {
             Self::Symbol(ref node)
             | Self::Number(ref node)
-            | Self::String(ref node) => &node.site,
+            | Self::String(ref node)
+            | Self::Raw(ref node) => &node.site,
             Self::List { ref site, .. } => site,
             Self::Attribute { ref site, .. } => site,
         }
     }
 
+    /// Clone the underlying [`Site`] of this parse node.
     pub fn owned_site(&self) -> Site<'a> {
         match self {
             Self::Symbol(node)
             | Self::Number(node)
-            | Self::String(node) => node.site.clone(),
+            | Self::String(node)
+            | Self::Raw(node) => node.site.clone(),
             Self::List { site, .. } => site.clone(),
             Self::Attribute { site, .. } => site.clone(),
         }
     }
 
+    /// Get a reference to the underlying leading whitespace string
+    /// of this parse node.
     pub fn leading_whitespace(&self) -> &str {
         match self {
             Self::Symbol(ref node)
             | Self::Number(ref node)
-            | Self::String(ref node) => &node.leading_whitespace,
+            | Self::String(ref node)
+            | Self::Raw(ref node) => &node.leading_whitespace,
             Self::List { ref leading_whitespace, .. } => leading_whitespace,
             Self::Attribute { ref leading_whitespace, .. } => leading_whitespace,
         }
     }
 
+    /// Modify the underlying leading whitespace stored for this parse node.
     pub fn set_leading_whitespace(&mut self, whitespace: String) {
         match self {
             Self::Symbol(ref mut node)
             | Self::Number(ref mut node)
-            | Self::String(ref mut node) => node.leading_whitespace = whitespace,
+            | Self::String(ref mut node)
+            | Self::Raw(ref mut node) => node.leading_whitespace = whitespace,
             Self::List { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace,
             Self::Attribute { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace,
         };
     }
 
+    /// Get a `&'static str` string name of what type of parse node this is.
     pub fn node_type(&self) -> &'static str {
         match self {
             Self::Symbol(..) => "symbol",
             Self::Number(..) => "number",
             Self::String(..) => "string",
+            Self::Raw(..) => "raw-content string",
             Self::List { .. } => "list",
             Self::Attribute { .. } => "attribute",
         }
     }
 }
 
+/// An array of parse nodes, like in a [`ParseNode::List`], never grows.
+/// Hence we prefer the `Box<[...]>` representation over a `Vec<...>`.
 pub type ParseTree<'a> = Box<[ParseNode<'a>]>;
 
 #[derive(Debug, Clone)]
@@ -333,7 +366,7 @@ impl<'a> SearchTree<'a> for ParseNode<'a> {
                     None
                 }
             },
-            ParseNode::String(name) => {
+            ParseNode::String(name) | ParseNode::Raw(name) => {
                 if kind.is_a(SearchType::String) && is_equal(&name.value) {
                     Some(self)
                 } else {