commit 5be4f703571f29d8cdc06540dba0bfb32b3d005a
parent 385e056052736bf9d6e6f46fa211f561ab6ae4ca
Author: Demonstrandum <samuel@knutsen.co>
Date: Fri, 19 Jul 2024 15:47:08 +0100
Added `(%raw)` along with a new raw-content string type.
* A string node that is `(%raw "...")` embeds in the outputted target
format unchanged from the original string, i.e. is not
escaped/transformed.
* Added a `(%string ...)` macro counterpart, turns any literal type
(even a raw-content string) back into a regular string.
* Cleaned up behaviour of macros by conditionally stripping the
leading whitespace from a macro's first argument, which is more
sensible. This is one of those cases where storing trailing
whitespace instead would have been more elegant.
Diffstat:
10 files changed, 213 insertions(+), 62 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
@@ -168,7 +168,7 @@ dependencies = [
[[package]]
name = "seam"
-version = "0.2.5"
+version = "0.3.0"
dependencies = [
"chrono",
"colored",
diff --git a/Cargo.toml b/Cargo.toml
@@ -4,7 +4,7 @@ description = "Symbolic Expressions As Markup."
keywords = ["markup", "lisp", "macro", "symbolic-expression", "sexp"]
license-file = "LICENSE"
homepage = "https://git.knutsen.co/seam"
-version = "0.2.5"
+version = "0.3.0"
authors = ["Demonstrandum <samuel@knutsen.co>"]
edition = "2021"
diff --git a/README.md b/README.md
@@ -91,21 +91,23 @@ seam --sexp <<< '(hello (%define subject world) %subject)'
```
## Checklist
- - [ ] `(%define x %body)` evaluates `%body` eagerly (at definition),
+ - [x] `(%define x %body)` evaluates `%body` eagerly (at definition),
while `(%define (y) %body)` only evaluates `%body` per call-site `(%y)`.
- [x] Namespace macro `(%namespace ns (%include "file.sex"))` will prefix all definitions in its body with `ns/`, e.g. `%ns/defn`.
Allows for a customizable separator, e.g. `(%namespace ns :separator "-" ...)` will allow for writing `%ns-defn`.
Otherwise, the macro leaves the content produced by the body completely unchanged.
- [x] Command line `-I` include directory.
- - [ ] First argument (of body) in a macro invocation should have its whitespace stripped.
+ - [x] First argument in a macro invocation should have its whitespace stripped.
- [x] `(%os/env ENV_VAR)` environment variable macro.
- - [ ] `(%to-string ...)`, `(%join ...)`, `(%map ...)`, `(%filter ...)` macros.
+ - [ ] Lazy evaluation for *user* macros (like in `ifdef`) with use of new `(%eval ...)` macro.
+ - [ ] `(%string ...)`, `(%join ...)`, `(%map ...)`, `(%filter ...)` macros.
- [ ] Escape evaluating macros with `\%`.
- [x] `(%format "{}")` macro with Rust's `format` syntax. e.g. `(%format "Hello {}, age {age:0>2}" "Sam" :age 9)`
- - [ ] Add `(%raw ...)` macro which takes a string and leaves it unchanged in the final output. Can also take any othe source code, for which it just embeds the expanded code (plain-text formatter).
+ - [x] Add `(%raw ...)` macro which takes a string and leaves it unchanged in the final output.
+ - [ ] `(%formatter/text)` can take any other source code, for which it just embeds the expanded code (plain-text formatter).
- [ ] `(%formatter/html ...)` etc. which call the respective available formatters.
- [ ] Implement lexical scope by letting macros store a copy of the scope they were defined in (or a reference?).
- - [ ] `(%embed "/path")` macro, like `%include`, but just returns the file contents as a string.
+ - [x] `(%embed "/path")` macro, like `%include`, but just returns the file contents as a string.
- [ ] Variadic arguments via `&rest` syntax.
- [ ] Delayed evaluation of macros by `%(...)` syntax.
[ ] For example `%(f x y)` is the same as `(%f x y)`, so you can have `(%define uneval f x)` and then write `%(%uneval y)`.
diff --git a/src/assemble/css.rs b/src/assemble/css.rs
@@ -102,10 +102,13 @@ fn convert_value<'a>(node: &'a ParseNode<'a>) -> Result<String, GenerationError<
| ParseNode::Symbol(node)
| ParseNode::String(node) =>
Ok(if node.value.chars().any(|c| c.is_whitespace()) {
- format!("\"{}\"", node.value)
+ format!("{:?}", node.value)
} else {
node.value.to_owned()
}),
+ ParseNode::Raw(node) => {
+ Ok(node.value.to_owned())
+ },
ParseNode::Attribute { .. } => Err(GenerationError::new("CSS-value",
"Incompatible structure (attribute) found in CSS \
property value.",
@@ -265,7 +268,8 @@ impl<'a> MarkupFormatter for CSSFormatter<'a> {
},
ParseNode::Symbol(node)
| ParseNode::Number(node)
- | ParseNode::String(node) => {
+ | ParseNode::String(node)
+ | ParseNode::Raw(node) => {
let site = node.site.to_owned();
return Err(GenerationError::new("CSS",
"Symbolic node not expected here, CSS documents \
diff --git a/src/assemble/html.rs b/src/assemble/html.rs
@@ -45,6 +45,11 @@ impl<'a> HTMLFormatter<'a> {
write!(f, "{}", node.leading_whitespace)?;
write!(f, "{}", escape_xml(&node.value))?;
},
+ ParseNode::Raw(node) => {
+ // Don't escape any symbols in a raw-content string.
+ write!(f, "{}", node.leading_whitespace)?;
+ write!(f, "{}", &node.value)?;
+ },
ParseNode::List { nodes: list, leading_whitespace, end_token, .. } => {
write!(f, "{}", leading_whitespace)?;
let head = list.first();
@@ -85,9 +90,13 @@ impl<'a> HTMLFormatter<'a> {
return Ok(());
}
- while let Some(ParseNode::Attribute { node, keyword, .. }) = rest.first() {
+ while let Some(ParseNode::Attribute { node, keyword, leading_whitespace, .. }) = rest.first() {
if let Some(atom) = (*node).atomic() {
- write!(f, " {}=\"{}\"", keyword, atom.value)?;
+ let leading_whitespace
+ = if leading_whitespace.is_empty()
+ { " " } else { leading_whitespace };
+ write!(f, "{}", leading_whitespace)?;
+ write!(f, "{}=\"{}\"", keyword, atom.value)?;
rest = &rest[1..];
} else {
// Error! Cannot be non atomic.
diff --git a/src/assemble/sexp.rs b/src/assemble/sexp.rs
@@ -41,6 +41,12 @@ impl<'a> SExpFormatter<'a> {
write!(f, "{}", node.leading_whitespace)?;
write!(f, "{:?}", node.value)?;
},
+ ParseNode::Raw(node) => {
+ // This is the only place we can't really expand
+ // the `(%raw ...)` macro. It is meaningful.
+ write!(f, "{}", node.leading_whitespace)?;
+ write!(f, "(%raw {:?})", node.value)?;
+ },
ParseNode::List { nodes, leading_whitespace, end_token, .. } => {
write!(f, "{}", leading_whitespace)?;
write!(f, "(")?;
diff --git a/src/assemble/text.rs b/src/assemble/text.rs
@@ -33,7 +33,8 @@ impl<'a> PlainTextFormatter<'a> {
match node {
ParseNode::Symbol(node)
| ParseNode::Number(node)
- | ParseNode::String(node) => {
+ | ParseNode::String(node)
+ | ParseNode::Raw(node) => {
write!(f, "{}", node.leading_whitespace)?;
write!(f, "{}", node.value)?;
},
diff --git a/src/lib.rs b/src/lib.rs
@@ -9,7 +9,7 @@ use parse::{expander, parser, lexer};
use std::{fs, io, path::Path};
-pub const VERSION: (u8, u8, u8) = (0, 2, 5);
+pub const VERSION: (u8, u8, u8) = (0, 3, 0);
pub fn tree_builder<'a, P: AsRef<Path>>(source_path: Option<P>, string: String)
-> expander::Expander<'a> {
diff --git a/src/parse/expander.rs b/src/parse/expander.rs
@@ -178,11 +178,14 @@ impl<'a> Expander<'a> {
};
// If head is atomic, we assign to a 'variable'.
+ // Aditionally, we evaluate its body *eagerly*.
let def_macro = if let Some(variable) = head.atomic() {
+ let nodes = nodes.to_owned().into_boxed_slice();
+ let body = self.expand_nodes(nodes)?;
Rc::new(Macro {
name: variable.value.clone(),
params: Box::new([]),
- body: nodes.to_owned().into_boxed_slice(),
+ body,
})
} else { // Otherwise, we are assigning to a 'function'.
let ParseNode::List { nodes: defn_nodes, .. } = head else {
@@ -332,6 +335,47 @@ impl<'a> Expander<'a> {
Ok(expanded_tree.into_boxed_slice())
}
+ fn expand_embed_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>)
+ -> Result<ParseTree<'a>, ExpansionError<'a>> {
+ let params: Box<[ParseNode<'a>]> = self.expand_nodes(params)?;
+ let [path_node] = &*params else {
+ return Err(ExpansionError(
+ format!("Incorrect number of arguments \
+ to `%embed' macro. Got {}, expected {}.",
+ params.len(), 1),
+ node.site().to_owned()));
+ };
+
+ let Some(Node { value: path, site, .. }) = path_node.atomic() else {
+ return Err(ExpansionError(
+ "Bad argument to `%embed' macro.\n\
+ Expected a path, but did not get any value
+ that could be interpreted as a path.".to_string(),
+ path_node.site().to_owned()))
+ };
+
+ // Open file, and read contents!
+ let embed_error = |error: Box<dyn Display>| ExpansionError(
+ format!("{}", error), site.to_owned());
+ let mut value: Result<String, ExpansionError> = Err(
+ embed_error(Box::new("No path tested.")));
+ // Try all include directories until one is succesful.
+ for include_dir in &self.includes {
+ let path = include_dir.join(path);
+ value = std::fs::read_to_string(path)
+ .map_err(|err| embed_error(Box::new(err)));
+ if value.is_ok() { break; }
+ }
+ let value = value?;
+ Ok(Box::new([
+ ParseNode::String(Node {
+ value,
+ site: node.owned_site(),
+ leading_whitespace: node.leading_whitespace().to_owned(),
+ }),
+ ]))
+ }
+
fn expand_date_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>)
-> Result<ParseTree<'a>, ExpansionError<'a>> {
let params = self.expand_nodes(params)?;
@@ -456,43 +500,6 @@ impl<'a> Expander<'a> {
}
}
- fn expand_macro(&self, name: &str, node: &ParseNode<'a>, params: ParseTree<'a>)
- -> Result<ParseTree<'a>, ExpansionError<'a>> {
- // Eagerly evaluate parameters passed to macro invocation.
- let params = self.expand_nodes(params)?;
-
- let Some(mac) = self.get_variable(name) else {
- return Err(ExpansionError::new(
- &format!("Macro not found (`{}').", name), &node.owned_site()))
- };
-
- // Instance of expansion subcontext.
- let subcontext = self.create_subcontext();
- // Check enough arguments were given.
- if params.len() != mac.params.len() {
- return Err(ExpansionError(
- format!("`%{}` macro expects {} arguments, \
- but {} were given.", &mac.name, mac.params.len(),
- params.len()), node.site().to_owned()));
- }
- // Define arguments for body.
- for i in 0..params.len() {
- let arg_macro = Macro {
- name: mac.params[i].to_owned(),
- params: Box::new([]),
- body: Box::new([params[i].clone()]), //< Argument as evaluated at call-site.
- };
- subcontext.insert_variable(mac.params[i].to_string(), Rc::new(arg_macro));
- }
- // Expand body.
- let mut expanded = subcontext.expand_nodes(mac.body.clone())?.to_vec();
- // Inherit leading whitespace of invocation.
- if let Some(first_node) = expanded.get_mut(0) {
- first_node.set_leading_whitespace(node.leading_whitespace().to_owned());
- }
- Ok(expanded.into_boxed_slice())
- }
-
fn expand_namespace_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>)
-> Result<ParseTree<'a>, ExpansionError<'a>> {
// Start evaluating all the arguments to the macro in a separate context.
@@ -533,6 +540,87 @@ impl<'a> Expander<'a> {
Ok(args.cloned().collect())
}
+ fn expand_raw_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>)
+ -> Result<ParseTree<'a>, ExpansionError<'a>> {
+ let mut builder = String::new();
+ let args = self.expand_nodes(params)?;
+ for arg in args {
+ let Some(Node { value, leading_whitespace, .. }) = arg.atomic() else {
+ return Err(ExpansionError(
+ format!("Expected a literal, found a {} node instead.", arg.node_type()),
+ arg.owned_site()));
+ };
+ builder += leading_whitespace;
+ builder += value;
+ }
+ Ok(Box::new([
+ ParseNode::Raw(Node {
+ value: builder,
+ site: node.owned_site(),
+ leading_whitespace: node.leading_whitespace().to_owned(),
+ })
+ ]))
+ }
+
+ fn expand_string_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>)
+ -> Result<ParseTree<'a>, ExpansionError<'a>> {
+ let mut builder = String::new();
+ let args = self.expand_nodes(params)?;
+ for arg in args {
+ let Some(Node { value, leading_whitespace, .. }) = arg.atomic() else {
+ return Err(ExpansionError(
+ format!("Expected a literal, found a {} node instead.", arg.node_type()),
+ arg.owned_site()));
+ };
+ builder += leading_whitespace;
+ builder += value;
+ }
+ Ok(Box::new([
+ ParseNode::String(Node {
+ value: builder,
+ site: node.owned_site(),
+ leading_whitespace: node.leading_whitespace().to_owned(),
+ })
+ ]))
+ }
+
+ fn expand_macro(&self, name: &str, node: &ParseNode<'a>, params: ParseTree<'a>)
+ -> Result<ParseTree<'a>, ExpansionError<'a>> {
+ // Eagerly evaluate parameters passed to macro invocation.
+ let params = self.expand_nodes(params)?;
+
+ let Some(mac) = self.get_variable(name) else {
+ return Err(ExpansionError::new(
+ &format!("Macro not found (`{}').", name), &node.owned_site()))
+ };
+
+ // Instance of expansion subcontext.
+ let subcontext = self.create_subcontext();
+ // Check enough arguments were given.
+ if params.len() != mac.params.len() {
+ return Err(ExpansionError(
+ format!("`%{}` macro expects {} arguments, \
+ but {} were given.", &mac.name, mac.params.len(),
+ params.len()), node.site().to_owned()));
+ }
+ // Define arguments for body.
+ for i in 0..params.len() {
+ let arg_macro = Macro {
+ name: mac.params[i].to_owned(),
+ params: Box::new([]),
+ body: Box::new([params[i].clone()]), //< Argument as evaluated at call-site.
+ };
+ subcontext.insert_variable(mac.params[i].to_string(), Rc::new(arg_macro));
+ }
+ // Expand body.
+ let mut expanded = subcontext.expand_nodes(mac.body.clone())?.to_vec();
+ // Inherit leading whitespace of invocation.
+ if let Some(first_node) = expanded.get_mut(0) {
+ first_node.set_leading_whitespace(node.leading_whitespace().to_owned());
+ }
+ Ok(expanded.into_boxed_slice())
+ }
+
fn expand_invocation(&self,
name: &str, //< Name of macro (e.g. %define).
node: &ParseNode<'a>, //< Node for `%'-macro invocation.
@@ -543,7 +631,10 @@ impl<'a> Expander<'a> {
match name {
"define" => self.expand_define_macro(node, params),
"ifdef" => self.expand_ifdef_macro(node, params),
+ "raw" => self.expand_raw_macro(node, params),
+ "string" => self.expand_string_macro(node, params),
"include" => self.expand_include_macro(node, params),
+ "embed" => self.expand_embed_macro(node, params),
"namespace" => self.expand_namespace_macro(node, params),
"date" => self.expand_date_macro(node, params),
"log" => self.expand_log_macro(node, params),
@@ -592,8 +683,13 @@ impl<'a> Expander<'a> {
if name.starts_with("%") {
// Rebuild node...
let name = &name[1..];
- // Clean macro arguments from whitespace tokens.
- let params: Vec<ParseNode> = call.collect();
+ let mut params: Vec<ParseNode> = call.collect();
+ // Delete leading whitespace of leading argument.
+ if let Some(leading) = params.first_mut() {
+ if !leading.leading_whitespace().contains('\n') {
+ leading.set_leading_whitespace(String::from(""));
+ }
+ }
return self.expand_invocation(name, node, params.into_boxed_slice());
}
}
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
@@ -4,6 +4,9 @@ use descape::UnescapeExt;
use super::{lexer::{LexError, Lexer}, tokens::{Kind, Site, Token}};
+/// The [`Node`] type represents what atomic/literals are parsed
+/// into; i.e. not compound types (e.g. lists, attributes).
+/// These are just a common storage for the literals in [`ParseNode`].
#[derive(Debug, Clone)]
pub struct Node<'a> {
pub value: String,
@@ -21,11 +24,15 @@ impl<'a> Node<'a> {
}
}
+/// Parse nodes are the components of the syntax tree that
+/// the source code is translated into.
+/// These nodes are also produced at compile-time by the macro expander.
#[derive(Debug, Clone)]
pub enum ParseNode<'a> {
Symbol(Node<'a>),
Number(Node<'a>),
String(Node<'a>),
+ Raw(Node<'a>), //< Raw-content strings are not parsed, only expanded by macros.
List {
nodes: Box<[ParseNode<'a>]>,
site: Site<'a>,
@@ -41,83 +48,109 @@ pub enum ParseNode<'a> {
}
impl<'a> ParseNode<'a> {
+ /// Unwrap a literal node if it is a symbol or number.
pub fn symbolic(&self) -> Option<&Node<'a>> {
match self {
Self::Symbol(ref node)
| Self::Number(ref node) => Some(node),
- _ => None
+ _ => None,
}
}
+ /// Unwrap string-like nodes.
+ pub fn string(&self) -> Option<&Node<'a>> {
+ match self {
+ Self::String(ref node) | Self::Raw(ref node) => Some(node),
+ _ => None,
+ }
+ }
+
+ /// Unwrap literal (atomic) nodes into their underlying [`Node`].
pub fn atomic(&self) -> Option<&Node<'a>> {
match self {
Self::Symbol(ref node)
| Self::Number(ref node)
- | Self::String(ref node) => Some(node),
- _ => None
+ | Self::String(ref node)
+ | Self::Raw(ref node) => Some(node),
+ _ => None,
}
}
+ /// Same as [`Self::atomic`], but consumes the node,
+ /// returning an owned [`Node`].
pub fn into_atomic(self) -> Option<Node<'a>> {
match self {
Self::Symbol(node)
| Self::Number(node)
| Self::String(node) => Some(node),
- _ => None
+ _ => None,
}
}
+ /// Get a reference to the parse node's underlying [`Site`].
pub fn site(&self) -> &Site<'a> {
match self {
Self::Symbol(ref node)
| Self::Number(ref node)
- | Self::String(ref node) => &node.site,
+ | Self::String(ref node)
+ | Self::Raw(ref node) => &node.site,
Self::List { ref site, .. } => site,
Self::Attribute { ref site, .. } => site,
}
}
+ /// Clone the underlying [`Site`] of this parse node.
pub fn owned_site(&self) -> Site<'a> {
match self {
Self::Symbol(node)
| Self::Number(node)
- | Self::String(node) => node.site.clone(),
+ | Self::String(node)
+ | Self::Raw(node) => node.site.clone(),
Self::List { site, .. } => site.clone(),
Self::Attribute { site, .. } => site.clone(),
}
}
+ /// Get a reference to the underlying leading whitespace string
+ /// of this parse node.
pub fn leading_whitespace(&self) -> &str {
match self {
Self::Symbol(ref node)
| Self::Number(ref node)
- | Self::String(ref node) => &node.leading_whitespace,
+ | Self::String(ref node)
+ | Self::Raw(ref node) => &node.leading_whitespace,
Self::List { ref leading_whitespace, .. } => leading_whitespace,
Self::Attribute { ref leading_whitespace, .. } => leading_whitespace,
}
}
+ /// Modify the underlying leading whitespace stored for this parse node.
pub fn set_leading_whitespace(&mut self, whitespace: String) {
match self {
Self::Symbol(ref mut node)
| Self::Number(ref mut node)
- | Self::String(ref mut node) => node.leading_whitespace = whitespace,
+ | Self::String(ref mut node)
+ | Self::Raw(ref mut node) => node.leading_whitespace = whitespace,
Self::List { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace,
Self::Attribute { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace,
};
}
+ /// Get a `&'static str` string name of what type of parse node this is.
pub fn node_type(&self) -> &'static str {
match self {
Self::Symbol(..) => "symbol",
Self::Number(..) => "number",
Self::String(..) => "string",
+ Self::Raw(..) => "raw-content string",
Self::List { .. } => "list",
Self::Attribute { .. } => "attribute",
}
}
}
+/// An array of parse nodes, like in a [`ParseNode::List`], never grows.
+/// Hence we prefer the `Box<[...]>` representation over a `Vec<...>`.
pub type ParseTree<'a> = Box<[ParseNode<'a>]>;
#[derive(Debug, Clone)]
@@ -333,7 +366,7 @@ impl<'a> SearchTree<'a> for ParseNode<'a> {
None
}
},
- ParseNode::String(name) => {
+ ParseNode::String(name) | ParseNode::Raw(name) => {
if kind.is_a(SearchType::String) && is_equal(&name.value) {
Some(self)
} else {