seam

Symbolic-Expressions As Markup.
git clone git://git.knutsen.co/seam
Log | Files | Refs | README | LICENSE

commit 370fa67e3b9b2b017a8f48fe19d8f3050137001d
parent 81fe6acf8f40bfabc68aaea46cfd57895bcc46f1
Author: Demonstrandum <samuel@knutsen.co>
Date:   Wed,  4 Dec 2024 22:49:45 +0000

release: v0.3.0: new macros, pattern matching, performance.

all user macros now do pattern matching on argument, including:
- named optional argument that may appear out of order.
- matching rigid required values.
- variadic arguments with &rest syntax
- match against lists containing named arguments (essentially hashmaps)
  and nested variadic &-syntax.

added new macros:
- %apply
- %lambda
- %list
- %splat
- %strip
- %join
- %filter
- %map
- %embed
- %namespace
- %raw
- %string

Diffstat:
M.gitignore | 4+++-
MCargo.lock | 8++++----
MREADME.md | 30++++++++++++++----------------
Mcrates/seam/Cargo.toml | 4++--
Mcrates/seam/src/assemble/html.rs | 2+-
Mcrates/seam/src/assemble/mod.rs | 2+-
Mcrates/seam/src/parse/expander.rs | 499++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Mcrates/seam/src/parse/lexer.rs | 2+-
Mcrates/seam/src/parse/macros.rs | 264+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mcrates/seam/src/parse/parser.rs | 147++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Mcrates/seam/src/parse/tokens.rs | 9++++++++-
Mcrates/seam_argparse_proc_macro/src/lib.rs | 69+++++++++++++++++++++++++++++++++++----------------------------------
12 files changed, 810 insertions(+), 230 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,3 +1,5 @@ /target/ - **/*.rs.bk +rustc-*.txt +._* +.DS* diff --git a/Cargo.lock b/Cargo.lock @@ -85,9 +85,9 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "descape" -version = "1.1.3" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "214de5502a2fa31149b291f594c1cc0d3929e93c6f4be6842d3944a16a9ef336" +checksum = "7c1113b908df80c963b107424498e37fba986b424b605729d1492dfbe4b2a630" [[package]] name = "formatx" @@ -260,9 +260,9 @@ checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "unicode-width" -version = "0.1.14" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" [[package]] name = "wasm-bindgen" diff --git a/README.md b/README.md @@ -91,7 +91,11 @@ seam --sexp <<< '(hello (%define subject world) %subject)' ``` ## Checklist - - [ ] A *splat* operation: `(%splat (a b c))` becomes `a b c`. + - [ ] User `(%error msg)` macro for aborting compilation. + - [ ] Pattern-matching `(%match expr (pat1 ...) (pat2 ...) default)` macro. + Pattern matching is already implemented for `%define` internally. + - [x] Implement `(%strip ...)` which evaluates to the `...` without any of the leading whitespace. + - [x] Implement *splat* operation: `(%splat (a b c))` becomes `a b c`. - [x] `(%define x %body)` evaluates `%body` eagerly (at definition), while `(%define (y) %body)` only evaluates `%body` per call-site `(%y)`. - [x] Namespace macro `(%namespace ns (%include "file.sex"))` will prefix all definitions in its body with `ns/`, e.g. `%ns/defn`. @@ -101,28 +105,23 @@ seam --sexp <<< '(hello (%define subject world) %subject)' - [x] First argument in a macro invocation should have its whitespace stripped. - [x] `(%os/env ENV_VAR)` environment variable macro. - [ ] Lazy evaluation for *user* macros (like in `ifdef`) with use of new `(%eval ...)` macro. - - [ ] `(%string ...)`, `(%join ...)`, `(%map ...)`, `(%filter ...)` macros. - - [ ] Escape evaluating macros with `\%`. + - [x] `(%apply name x y z)` macro which is equivalent to `(%name x y z)`. + - [x] `(%lambda (x y) ...)` macro which just evaluates to an secret symbol, e.g. `__lambda0`. + used by applying `%apply`, e.g. `(%apply (%lambda (a b) b a) x y)` becomes `y x` + - [x] `(%string ...)`, `(%join ...)`, `(%map ...)`, `(%filter ...)` macros. - [x] `(%format "{}")` macro with Rust's `format` syntax. e.g. `(%format "Hello {}, age {age:0>2}" "Sam" :age 9)` - [x] Add `(%raw ...)` macro which takes a string and leaves it unchanged in the final output. - [ ] `(%formatter/text)` can take any other source code, for which it just embeds the expanded code (plain-text formatter). - [ ] `(%formatter/html ...)` etc. which call the respective available formatters. - [ ] Implement lexical scope by letting macros store a copy of the scope they were defined in (or a reference?). - [x] `(%embed "/path")` macro, like `%include`, but just returns the file contents as a string. - - [ ] Variadic arguments via `&rest` syntax. - - [ ] Type-checking facilities for user macros (?). - - [ ] Delayed evaluation of macros by `%(...)` syntax. - [ ] For example `%(f x y)` is the same as `(%f x y)`, so you can have `(%define uneval f x)` and then write `%(%uneval y)`. - - [ ] `%list` macro which expands from `(p (%list a b c))` to `(p a b c)`. - Defined as such: - ```lisp - (%define (list &rest) rest) - ``` + - [x] Variadic arguments via `&rest` syntax. + - [ ] Type-checking facilities for user macros. + - [x] `%list` macro which expands from `(%list %a %b %c)` to `( %a %b %c )` but *without* calling `%a` as a macro with `%b` and `%c` as argument. - [ ] `%for`-loop macro, iterating over `%list`s. - [ ] `%glob` which returns a list of files/directories matching a glob. - - [ ] `%markdown` renders Markdown given to it as html. - - [ ] `%html`, `%xml`, `%css`, etc. macros which goes into the specific rendering mode. - - [ ] Add variadic and keyword macro arguments. + - [ ] `%markdown` renders Markdown given to it as `%raw` html-string. + - [ ] Add keyword macro arguments. - [ ] Caching or checking time-stamps as to not regenerate unmodified source files. - [ ] HTML object `style="..."` object should handle s-expressions well, (e.g. `(p :style (:color red :border none) Hello World)`) - [ ] Add more supported formats (`JSON`, `JS`, `TOML`, &c.). @@ -133,7 +132,6 @@ seam --sexp <<< '(hello (%define subject world) %subject)' (const f (=> (a b) (+ a b)) ((. console log) (== (f y z) x)) ``` - - [ ] Add more helpful/generic macros (e.g. `(%include ...)`, which already exists). - [ ] Allow for arbitrary embedding of code, that can be run by a LISP interpreter (or any other langauge), for example. (e.g. `(%chez (+ 1 2))` executes `(+ 1 2)` with Chez-Scheme LISP, and places the result in the source diff --git a/crates/seam/Cargo.toml b/crates/seam/Cargo.toml @@ -24,7 +24,7 @@ path = "src/bin.rs" seam_argparse_proc_macro = { path = "../seam_argparse_proc_macro" } colored = "2.1" chrono = "0.4" -unicode-width = "0.1.12" -descape = "1.1.2" +unicode-width = "0.2.0" +descape = "2.0.3" formatx = "0.2.2" regex = "1.10.5" diff --git a/crates/seam/src/assemble/html.rs b/crates/seam/src/assemble/html.rs @@ -148,7 +148,7 @@ impl<'a> HTMLFormatter<'a> { // - <svg> and <math> contain XML, not HTML; // - <pre>, <textarea> and <title> contain raw text, not parsed as HTML; // - <pre> will display raw text found in source code; - // - <textarea> and <title> however, are escapable (evaluete macros); + // - <textarea> and <title> however, are escapable (evaluate macros); // - <script> contains JavaScript, maybe we will parse this in the future!; // - <style> contains CSS, which we have our own parser for already. match tag.as_str() { diff --git a/crates/seam/src/assemble/mod.rs b/crates/seam/src/assemble/mod.rs @@ -107,7 +107,7 @@ impl fmt::Display for dyn MarkupFormatter { } } -/// Parforms the following escapes: +/// Performs the following escapes: /// - `<` → `&lt;` /// - `>` → `&gt;` /// - `"` → `&quot;` diff --git a/crates/seam/src/parse/expander.rs b/crates/seam/src/parse/expander.rs @@ -55,11 +55,12 @@ impl<'a> Error for ExpansionError<'a> { } /// A macro consists of: /// - its name; /// - its argument list (if any); -/// - and its defintion (i.e. *body*). +/// - and its definition (i.e. *body*). #[derive(Debug, Clone)] pub struct Macro<'a> { + #[allow(dead_code)] name: String, - params: Box<[String]>, + params: Box<[ParseNode<'a>]>, body: Box<[ParseNode<'a>]> } // TODO: Macro to also store its own scope (at place of definition) @@ -108,7 +109,7 @@ impl<'a> Expander<'a> { self.parser.get_source() } - /// Supply additonal include-directories for the macros + /// Supply additional include-directories for the macros /// to use when searching for files to include/emebed. /// Files are searched for in the order that of the directories. pub fn add_includes<T: Iterator>(&mut self, dirs: T) @@ -180,6 +181,214 @@ impl<'a> Expander<'a> { self.definitions.borrow().get(name).map(|m| m.clone()) } + /// Pattern-matching variable bind for two nodes (the pattern and the value). + fn bind(&self, pattern: &ParseNode<'a>, value: &ParseNode<'a>) + -> Result<(), ExpansionError<'a>> { + match pattern { + // Bind :named argument. + ParseNode::Attribute { keyword: k0, node: node0, .. } => match value { + ParseNode::Attribute { keyword: k1, node: node1, .. } => if k0 == k1 { + self.bind(node0, node1) + } else { + Err(ExpansionError( + format!("Mismatch named argument, looking for :{}, found :{}.", k0, k1), + value.owned_site(), + )) + }, + _ => Err(ExpansionError( + format!("Looking for named argument :{}, got {} instead.", k0, value.node_type()), + value.owned_site(), + )), + }, + // Bind a list containing &-rest syntax and :named arguments. + ParseNode::List { nodes: nodes0, .. } => match value { + ParseNode::List { nodes: nodes1, .. } => self.bind_list(value, nodes0, nodes1), + _ => Err(ExpansionError( + format!("Cannot assign {} to a list.", value.node_type()), + value.owned_site(), + )) + }, + // Symbols are simply assigned as variable names. + ParseNode::Symbol(symbol) => { + self.insert_variable(symbol.value.clone(), Rc::new(Macro { + name: symbol.value.clone(), + params: Box::new([]), + body: Box::new([ value.clone() ]), + })); + Ok(()) + }, + // Other literals must match exactly and no assignment takes place. + ParseNode::Number(number0) => match value { + ParseNode::Number(number1) => if number0 == number1 { Ok(()) } else { + Err(ExpansionError( + format!("Expected the number {} here, got the number {} instead.", + number0.value, number1.value), + number1.site.to_owned(), + )) + }, + _ => Err(ExpansionError( + format!("Expected a number here, got {} instead.", value.node_type()), + value.owned_site(), + )), + }, + ParseNode::String(string0) | ParseNode::Raw(string0) => match value { + ParseNode::String(string1) | ParseNode::Raw(string1) => if string0 == string1 { Ok(()) } else { + Err(ExpansionError( + format!("Expected the string {:?} here, got the string {:?} instead.", + string0.value, string1.value), + string1.site.to_owned(), + )) + }, + _ => Err(ExpansionError( + format!("Expected a string here, got {} instead.", value.node_type()), + value.owned_site(), + )), + } + } + } + + fn bind_list(&self, assigned: &ParseNode<'a>, nodes0: &ParseTree<'a>, nodes1: &ParseTree<'a>) + -> Result<(), ExpansionError<'a>> { + let mut rest_node = None; + let mut rhs_index: usize = 0; + let mut expected: usize = 0; + let mut rhs_named = HashMap::new(); + let mut lhs_named = HashMap::new(); + // We loop this way (not a for loop) so we can control + // when exactly we advance to the next LHS node, potentially + // doing multiple iterations on the same node. + let mut nodes0_iter = nodes0.iter(); + let mut maybe_node0 = nodes0_iter.next(); + while let Some(node0) = maybe_node0 { + // Named arguments (attributes) can appear out of order. + // We'll remember them from later. + if let ParseNode::Attribute { keyword, node, .. } = node0 { + lhs_named.insert(keyword, node); + // A named argument in the LHS does not mean we saw one in the RHS, + // so we continue and do not increment rhs_index. + maybe_node0 = nodes0_iter.next(); + continue; + } + let found_rest = node0.symbol().map(|name| name.value.starts_with('&')).unwrap_or(false); + if found_rest { + // If another &-rest node has been found, report an error. + if rest_node.is_some() { + return Err(ExpansionError::new( + "Found multiple nodes matching &-rest syntax.", + node0.site(), + )); + } + // Otherwise, make note of the node it corresponds to. + rest_node = Some(node0); + // Note that we don't increment the `rhs_index`, + // since a &rest node does not match the corresponding item in the RHS. + maybe_node0 = nodes0_iter.next(); + continue; + } + // Assign matched node unless the RHS has too few nodes. + if rhs_index >= nodes1.len() { + return Err(ExpansionError( + format!("Too few values given, looking for value {} out of only {}.", rhs_index + 1, nodes1.len()), + assigned.owned_site(), + )); + } + let node1 = &nodes1[rhs_index]; + if let ParseNode::Attribute { keyword, node, .. } = node1 { + // This is a named argument given in the RSH, so it does not correspond to + // the specific non-named argument in the LHS, so we keep looking until we + // get to it, and remember all the named arguments we find along the way. + rhs_named.insert(keyword.clone(), node); + rhs_index += 1; + // Continue without advancing to the next LHS `node0`. + continue; + } + self.bind(node0, node1)?; + maybe_node0 = nodes0_iter.next(); + expected += 1; + rhs_index += 1; + } + // Assign any remaining arguments in the RHS to &rest. + let mut rest = vec![]; + while rhs_index < nodes1.len() { + let node1 = &nodes1[rhs_index]; + if let ParseNode::Attribute { keyword, node, .. } = node1 { + // There might be remaining named argument further down the RHS list. + rhs_named.insert(keyword.clone(), node); + } else { + rest.push(node1.clone()); + } + rhs_index += 1; + } + // Now, whether the &rest argument was given or not... + if let Some(rest_node) = rest_node { + // Assign the &rest variable to a list containing the extra nodes. + let rest_symbol = rest_node.symbol().unwrap(); + let rest_name = rest_symbol.value[1..].to_owned(); + self.insert_variable( + rest_name.to_owned(), + Rc::new(Macro { + name: rest_name, + params: Box::new([]), + body: rest.into_boxed_slice(), + }), + ); + } else if let Some(last_excess_node) = rest.last() { + // No &rest node mentioned, but excess arguments collected? That's an error. + let got = expected + rest.len(); + return Err(ExpansionError( + format!("Excess number of arguments, expected {}, got {}.", expected, got), + last_excess_node.owned_site(), + )); + } + // Assign all the named arguments. + for (keyword, default) in lhs_named.iter() { + // Remove memory of assigned node from RHS. + let value = match rhs_named.remove(*keyword) { + // Found the named argument in the RHS, so don't use the default. + Some(value) => value, + // No named corresponding argument in the RHS means we have to use its default. + None => default, + }; + // Bind it to a symbol with the same name as the keyword. + self.insert_variable( + (*keyword).to_owned(), + Rc::new(Macro { + name: (*keyword).to_owned(), + params: Box::new([]), + body: Box::new([ *value.clone() ]), + }), + ); + } + // Any remaining RHS named nodes not covered by the LHS, are excess/errors. + if !rhs_named.is_empty() { + // Go through RHS named nodes and list all the excess/invalid names. + let mut excess_keywords: Vec<&str> = vec![]; + let mut rhs = rhs_named.iter(); + let (keyword, some_node) = rhs.next().unwrap(); // Non-empty. + excess_keywords.push(keyword); + for (keyword, _) in rhs { + excess_keywords.push(keyword.as_ref()); + } + let known_keywords: Vec<String> = lhs_named + .iter() + .map(|(kw, _)| format!(":{}", kw)) + .collect(); + let known_keywords = known_keywords.join(", "); + let excess_keywords = excess_keywords.join(", "); + return Err(ExpansionError( + format!(concat!( + "Unknown excess keywords provided, namely: {}.", + "\n", "Expected one of: {}." + ), + excess_keywords, + known_keywords, + ), + some_node.owned_site(), + )); + } + Ok(()) + } + /// Define a macro with `(%define a b)` --- `a` is a symbol or a list `(c ...)` where `c` is a symbol. /// macro definitions will eliminate any preceding whitespace, so make sure trailing whitespace provides /// the whitespace you need. @@ -188,66 +397,50 @@ impl<'a> Expander<'a> { let [head, nodes@..] = &*params else { return Err(ExpansionError( format!("`%define` macro takes at least \ - two (2) arguments ({} were given.", params.len()), + two (2) arguments, while {} were given.", params.len()), node.owned_site())); }; // If head is atomic, we assign to a 'variable'. - // Aditionally, we evaluate its body *eagerly*. - let def_macro = if let Some(variable) = head.atomic() { + // Additionally, we evaluate its body *eagerly*. + let (name, arguments, body): (String, Vec<ParseNode<'a>>, ParseTree) + = if let Some(variable) = head.symbol() { let nodes = nodes.to_owned().into_boxed_slice(); let body = self.expand_nodes(nodes)?; - Rc::new(Macro { - name: variable.value.clone(), - params: Box::new([]), - body, - }) + (variable.value.clone(), vec![], body) } else { // Otherwise, we are assigning to a 'function'. let ParseNode::List { nodes: defn_nodes, .. } = head else { return Err(ExpansionError( "First argument of `%define` macro must be a list \ or variable name/identifier.".to_owned(), - node.site().to_owned())); + head.site().to_owned())); }; let [name, params@..] = &**defn_nodes else { return Err(ExpansionError( "`%define` macro definition must at \ least have a name.".to_owned(), - node.site().to_owned())); + head.site().to_owned())); }; - let mut arguments: Vec<String> = Vec::with_capacity(params.len()); - for param_node in params { // Verify arguments are symbols. - if let ParseNode::Symbol(param) = param_node { - arguments.push(param.value.clone()); - } else { - return Err(ExpansionError( - "`define` function arguments must be \ - symbols/identifers.".to_owned(), - node.site().to_owned())); - }; - } + let arguments: Vec<ParseNode<'a>> = params.to_vec(); let ParseNode::Symbol(name_node) = name else { return Err(ExpansionError( "`define` function name must be \ a symbol/identifier.".to_owned(), - node.site().to_owned())); + name.site().to_owned())); }; let name = name_node.value.clone(); - Rc::new(Macro { - name, - params: arguments.into_boxed_slice(), - body: nodes.to_owned().into_boxed_slice(), - }) + let body = nodes.to_owned().into_boxed_slice(); + (name, arguments, body) }; - self.insert_variable(def_macro.name.to_owned(), def_macro); + self.create_macro(name, arguments, body)?; Ok(Box::new([])) } /// `(%ifdef symbol a b)` --- `b` is optional, however, if not provided *and* /// the symbol is not defined, it will erase the whole expression, and whitespace will not - /// be preseved before it. If that's a concern, provide `b` as the empty string `""`. + /// be preserved before it. If that's a concern, provide `b` as the empty string `""`. fn expand_ifdef_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) -> Result<ParseTree<'a>, ExpansionError<'a>> { if params.len() < 2 || params.len() > 3 { @@ -260,10 +453,9 @@ impl<'a> Expander<'a> { let symbol = if let Some(node) = params[0].atomic() { node.value.to_owned() } else { - // FIXME: Borrow-checker won't let me use params[0].site() as site! return Err(ExpansionError( "The first argument to `ifdef` must be a symbol/name.".to_string(), - node.site().clone())); + params[0].owned_site())); }; let mut expanded = if self.has_variable(&symbol) { @@ -527,12 +719,12 @@ impl<'a> Expander<'a> { return Err(ExpansionError::new("Expected a namespace name.", node.site())); }; // Parse options to macro. - let mut seperator = "/"; // Default namespace seperator is `/`. + let mut separator = "/"; // Default namespace separator is `/`. while let Some(ParseNode::Attribute { keyword, node, site, .. }) = args.peek() { let _ = args.next(); match keyword.as_str() { "separator" => match node.atomic() { - Some(Node { value, .. }) => seperator = &value, + Some(Node { value, .. }) => separator = &value, None => return Err(ExpansionError( format!("`%namespace' separator must be a symbol, got a {}.", node.node_type()), node.owned_site())), @@ -543,13 +735,13 @@ impl<'a> Expander<'a> { } } // Find all the definitions made within the context of the - // `%namespace` macro and include the defintion prefixed by + // `%namespace` macro and include the definition prefixed by // the namespace in the *current* scope. { let mut self_defs = self.definitions.borrow_mut(); let defs = context.definitions.borrow(); for (key, value) in defs.iter() { - let new_key = format!("{}{}{}", namespace.value, seperator, key); + let new_key = format!("{}{}{}", namespace.value, separator, key); self_defs.insert(new_key, value.clone()); } } @@ -623,45 +815,198 @@ impl<'a> Expander<'a> { ])) } - fn expand_macro(&self, name: &str, node: &ParseNode<'a>, params: ParseTree<'a>) + + + fn expand_map_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) -> Result<ParseTree<'a>, ExpansionError<'a>> { - // Eagerly evaluate parameters passed to macro invocation. - let params = self.expand_nodes(params)?; + let params = self.expand_nodes(params)?; // Eager. + let (_, args) = arguments! { [&params] + mandatory(1): symbol, + rest: any, + }?; - let Some(mac) = self.get_variable(name) else { - return Err(ExpansionError::new( - &format!("Macro not found (`{}').", name), &node.owned_site())) + let Some(found) = self.get_variable(&args.number.1.value) else { + return Err(ExpansionError::new("Unknown macro.", &args.number.1.site)); }; - // Instance of expansion subcontext. - let subcontext = self.create_subcontext(); - // Check enough arguments were given. - if params.len() != mac.params.len() { - return Err(ExpansionError( - format!("`%{}` macro expects {} arguments, \ - but {} were given.", &mac.name, mac.params.len(), - params.len()), node.site().to_owned())); - } - // Define arguments for body. - for i in 0..params.len() { - let arg_macro = Macro { - name: mac.params[i].to_owned(), - params: Box::new([]), - body: Box::new([params[i].clone()]), //< Argument as evaluated at call-site. + let callee = ParseNode::Symbol(args.number.1); + let mut expanded = vec![]; + for arg in args.rest { + expanded.extend(self.apply_macro(found.clone(), &callee, Box::new([arg]))?); + } + Ok(expanded.into_boxed_slice()) + } + + /// Filters all null nodes (`()`-nodes) out of the list. + fn expand_filter_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params = self.expand_nodes(params)?; // Eager. + let (_, args) = arguments! { [&params] + mandatory(1): symbol, + rest: any, + }?; + + let Some(found) = self.get_variable(&args.number.1.value) else { + return Err(ExpansionError::new("Unknown macro.", &args.number.1.site)); + }; + + let callee = ParseNode::Symbol(args.number.1); + let mut expanded = vec![]; + for arg in args.rest { + let nodes = self.apply_macro(found.clone(), &callee, Box::new([arg]))?; + match &*nodes { + [node,] if node.null() => {}, + _ => expanded.extend(nodes), }; - subcontext.insert_variable(mac.params[i].to_string(), Rc::new(arg_macro)); } - // Expand body. - let mut expanded = subcontext.expand_nodes(mac.body.clone())?.to_vec(); - // Inherit leading whitespace of invocation. - if let Some(first_node) = expanded.get_mut(0) { - first_node.set_leading_whitespace(node.leading_whitespace().to_owned()); + Ok(expanded.into_boxed_slice()) + } + + fn expand_splat_macro(&self, _node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params = self.expand_nodes(params)?; // Eager. + let mut expanded = vec![]; + for param in params { + if let ParseNode::List { nodes, leading_whitespace, ..} = param { + let mut nodes = nodes.to_vec(); + if let [first, ..] = nodes.as_mut_slice() { + first.set_leading_whitespace(leading_whitespace); + } + expanded.extend(nodes); + } else { + expanded.push(param.clone()); + } } + Ok(expanded.into_boxed_slice()) + } + + fn expand_list_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params = self.expand_nodes(params)?; // Eager. + let ParseNode::List { site, end_token, leading_whitespace, .. } = node else { + panic!("expand macro call given non-list call node."); + }; + Ok(Box::new([ + ParseNode::List { + nodes: params, + site: site.to_owned(), + end_token: end_token.to_owned(), + leading_whitespace: leading_whitespace.to_owned(), + } + ])) + } + + fn expand_strip_macro(&self, _node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let mut params = self.expand_nodes(params)?; // Eager. + if let Some(first) = params.get_mut(0) { + first.set_leading_whitespace(String::new()); + } + Ok(params) + } + + fn expand_apply_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params = self.expand_nodes(params)?; // Eager. + let (_parser, args) = arguments! { [&params] + mandatory(1): symbol, + rest: any, + }?; + + let Some(found) = self.get_variable(args.number.1.value.as_ref()) else { + return Err(ExpansionError( + format!("No such macro found under the name `{}`.", args.number.1.value), + args.number.1.site.clone(), + )) + }; + + let callee = &ParseNode::Symbol(args.number.1); + self.apply_macro(found, callee, args.rest.into_boxed_slice()) + } + + fn expand_lambda_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let (_parser, args) = arguments! { [&params] + mandatory(1): any, + rest: any, + }?; + + let head: ParseNode<'a> = args.number.1; + let arglist = match head.list() { + Some(list) => list.to_vec(), + None => match head.symbol() { + Some(_) => vec![head.clone()], + None => Err(ExpansionError::new( + "Expected argument(s) as symbol or list of arguments.", + head.site(), + ))? + } + }; + + let name = format!("__lambda{}", node.site().uuid()); + + self.create_macro(name.clone(), arglist, args.rest.into_boxed_slice())?; + + Ok(Box::new([ + ParseNode::Symbol(Node { + value: name, + site: node.owned_site(), + leading_whitespace: node.leading_whitespace().to_owned(), + }) + ])) + } + + fn create_macro(&self, name: String, arglist: Vec<ParseNode<'a>>, body: ParseTree<'a>) + -> Result<Rc<Macro<'a>>, ExpansionError<'a>> { + // Check excess &-macros are not present. + let rest_params: Vec<&ParseNode> = arglist.iter() + .filter(|node| node.symbol().map(|name| name.value.starts_with('&')).unwrap_or(false)) + .collect(); + match rest_params.as_slice() { + [_, excess, ..] => return Err(ExpansionError::new( + "Excess `&`-variadic argument capture variables.", + excess.site() + )), + _ => {} + }; + + // Create and insert macro. + let mac = Rc::new(Macro { + name: name.clone(), + params: arglist.into_boxed_slice(), + body, + }); + self.insert_variable(name, mac.clone()); + Ok(mac) + } + + fn apply_macro(&self, mac: Rc<Macro<'a>>, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + // Instance of expansion subcontext. + let subcontext = self.create_subcontext(); + // Construct fake list of arguments and parameters and pattern match on them. + subcontext.bind_list(node, &mac.params, &params)?; + // Expand body. + let expanded = subcontext.expand_nodes(mac.body.clone())?.to_vec(); // Finished expanding macro, delete the subcontext. self.remove_subcontext(); + // Return the body of the evaluated macro. Ok(expanded.into_boxed_slice()) } + fn expand_macro(&self, name: &str, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + // Eagerly evaluate parameters passed to macro invocation. + let params = self.expand_nodes(params)?; + + let Some(mac) = self.get_variable(name) else { + return Err(ExpansionError::new( + &format!("Macro not found (`{}').", name), &node.owned_site())) + }; + + self.apply_macro(mac, node, params) + } + fn expand_invocation(&self, name: &str, //< Name of macro (e.g. %define). node: &ParseNode<'a>, //< Node for `%'-macro invocation. @@ -679,6 +1024,13 @@ impl<'a> Expander<'a> { "namespace" => self.expand_namespace_macro(node, params), "date" => self.expand_date_macro(node, params), "join" => self.expand_join_macro(node, params), + "map" => self.expand_map_macro(node, params), + "filter" => self.expand_filter_macro(node, params), + "splat" => self.expand_splat_macro(node, params), + "list" => self.expand_list_macro(node, params), + "strip" => self.expand_strip_macro(node, params), + "apply" => self.expand_apply_macro(node, params), + "lambda" => self.expand_lambda_macro(node, params), "log" => self.expand_log_macro(node, params), "format" => self.expand_format_macro(node, params), "os/env" => self.expand_os_env_macro(node, params), @@ -701,7 +1053,12 @@ impl<'a> Expander<'a> { and cannot be used as a variable.", name), &sym.site)) } - Ok(def.body.clone()) + let mut expanded = def.body.clone(); + // Inherit the whitespace of the call-site. + if let Some(first) = expanded.first_mut() { + first.set_leading_whitespace(sym.leading_whitespace.to_owned()); + } + Ok(expanded) } else { // Not found. Err(ExpansionError( format!("No such macro, `{}`.", name), @@ -716,12 +1073,14 @@ impl<'a> Expander<'a> { // Recurse over every element. let len = nodes.len(); let mut call = nodes.to_vec().into_iter(); - let head = call.next(); + let Some(head) = call.next() else { + return Ok(Box::new([node])); + }; // Pathway: (%_ _ _) macro invocation. - if let Some(ref symbol@ParseNode::Symbol(..)) = head { + if let Some(symbol) = head.symbol() { let node = self.register_invocation(node.clone()); - let name = symbol.atomic().unwrap().value.clone(); + let name = symbol.value.clone(); if name.starts_with("%") { // Rebuild node... let name = &name[1..]; @@ -737,7 +1096,7 @@ impl<'a> Expander<'a> { } // Otherwise, if not a macro, just expand child nodes incase they are macros. let mut expanded_list = Vec::with_capacity(len); - expanded_list.extend(self.expand_node(head.unwrap().clone())?); + expanded_list.extend(self.expand_node(head.clone())?); for elem in call { expanded_list.extend(self.expand_node(elem)?); } diff --git a/crates/seam/src/parse/lexer.rs b/crates/seam/src/parse/lexer.rs @@ -230,7 +230,7 @@ impl<'a> Lexer { let Some(next_char) = self.peek_char() else { let site = Site::new(&self.source_path, &self.source, line_no, start, line_offset, 3); return Err(LexError( - String::from("Unclosed tripple-quoted string."), + String::from("Unclosed triple-quoted string."), site)); }; if next_char == '\n' { self.next_line(); } diff --git a/crates/seam/src/parse/macros.rs b/crates/seam/src/parse/macros.rs @@ -6,30 +6,47 @@ use regex::Regex; use super::{ expander::ExpansionError, - parser::{Node, ParseNode, ParseTree}, tokens::Site, + parser::{Node, ParseNode, ParseTree}, }; #[derive(Debug, Clone)] -pub enum ArgPredicate { +pub enum ArgPredicate<'tree> { Exactly(String), Matching(Regex), - Satisfying(fn(ParseNode) -> bool), + Satisfying(fn(&ParseNode<'tree>) -> Result<(), ExpansionError<'tree>>), } -impl ArgPredicate { - pub fn check_node<'tree>(&self, node: &Node<'tree>) -> Result<(), ExpansionError<'tree>> { +impl<'tree> ArgPredicate<'tree> { + pub fn check_node(&self, node: &Node<'tree>, parse_node: &ParseNode<'tree>) -> Result<(), ExpansionError<'tree>> { match self { Self::Exactly(value) => if node.value == *value { Ok(()) } else { Err(ExpansionError( - format!("value must be equal to `{}`.", value), + format!("value must be equal to `{}`", value), node.site.to_owned(), )) - } - _ => Ok(()) + }, + Self::Matching(expr) => if expr.is_match(&node.value) { Ok(()) } else { + Err(ExpansionError( + format!("value must be match /{}/", expr.as_str()), + node.site.to_owned(), + )) + }, + Self::Satisfying(test) => test(parse_node), } } - pub fn check<'tree>(&self, node: &ParseNode<'tree>) -> Result<(), ExpansionError<'tree>> { - Ok(()) + pub fn check(&self, node: &ParseNode<'tree>) -> Result<(), ExpansionError<'tree>> { + match self { + Self::Exactly(..) | Self::Matching(..) => if let Some(atomic) = node.atomic() { + self.check_node(atomic, node) + } else { + // If used correctly, this should really be an unreachable!(). + Err(ExpansionError( + format!("Expected a literal, got a {} instead.", node.node_type()), + node.owned_site(), + )) + }, + Self::Satisfying(test) => test(node), + } } } @@ -42,21 +59,21 @@ impl ArgPredicate { /// Symbolic ⊆ Literal; /// * ⊆ Any. #[derive(Debug, Clone)] -pub enum ArgType { - Literal(Vec<ArgPredicate>), - String(Vec<ArgPredicate>), - Symbol(Vec<ArgPredicate>), - Number(Vec<ArgPredicate>), - Symbolic(Vec<ArgPredicate>), - List(Vec<ArgType>), - Any(Vec<ArgPredicate>), +pub enum ArgType<'tree> { + Literal(Vec<ArgPredicate<'tree>>), + String(Vec<ArgPredicate<'tree>>), + Symbol(Vec<ArgPredicate<'tree>>), + Number(Vec<ArgPredicate<'tree>>), + Symbolic(Vec<ArgPredicate<'tree>>), + List(Vec<ArgType<'tree>>), + Any(Vec<ArgPredicate<'tree>>), } -fn check_all_node<'tree>(preds: &Vec<ArgPredicate>, node: &Node<'tree>) -> Result<(), ExpansionError<'tree>> { +fn check_all<'tree>(preds: &Vec<ArgPredicate<'tree>>, node: &ParseNode<'tree>) -> Result<(), ExpansionError<'tree>> { if preds.is_empty() { return Ok(()); } let mut issues = vec![]; for pred in preds { - match pred.check_node(node) { + match pred.check(node) { Ok(()) => return Ok(()), Err(err) => issues.push(err), } @@ -64,35 +81,18 @@ fn check_all_node<'tree>(preds: &Vec<ArgPredicate>, node: &Node<'tree>) -> Resul if issues.is_empty() { return Ok(()); } // Amalgamate errors. let mut error = String::new(); - let _ = writeln!(error, "This argument's value did not satisfy one of the follwining:"); + let _ = writeln!(error, "This argument's value did not satisfy one of the following:"); for (i, issue) in issues.iter().enumerate() { let _ = write!(error, " * {}", issue.0); if i != issues.len() - 1 { - let _ = write!(error, "\n"); + let _ = write!(error, "; or\n"); } } - Err(ExpansionError(error, node.site.clone())) -} - -fn check_all<'tree>(preds: &Vec<ArgPredicate>, node: &ParseNode<'tree>) -> Result<(), ExpansionError<'tree>> { - if preds.is_empty() { return Ok(()); } - let mut issues = vec![]; - for pred in preds { - match pred.check(node) { - Ok(()) => return Ok(()), - Err(err) => issues.push(err), - } - } - if issues.is_empty() { return Ok(()); } - // Amalgamate errors. - let mut error = String::from("This argument's value did not satisfy one of the follwining:\n"); - for issue in issues { - error += &format!(" * {}", issue.0); - } + let _ = write!(error, "."); Err(ExpansionError(error, node.owned_site())) } -impl ArgType { +impl<'tree> ArgType<'tree> { pub fn name(&self) -> &'static str { use ArgType::*; match self { @@ -106,23 +106,23 @@ impl ArgType { } } - pub fn check<'tree>(&self, node: &ParseNode<'tree>) -> Result<(), ExpansionError<'tree>> { + pub fn check(&self, node: &ParseNode<'tree>) -> Result<(), ExpansionError<'tree>> { use ArgType::*; // Compute the generic type-mismatch error beforehand, even if not used. let mismatch = ExpansionError( format!("Expected a {} node, got a {} instead.", self.name(), node.node_type()), node.owned_site()); match node { - ParseNode::Symbol(v) => match self { - Literal(pred) | Symbol(pred) | Symbolic(pred) | Any(pred) => check_all_node(pred, v), + ParseNode::Symbol(..) => match self { + Literal(pred) | Symbol(pred) | Symbolic(pred) | Any(pred) => check_all(pred, node), _ => Err(mismatch), }, - ParseNode::String(v) | ParseNode::Raw(v) => match self { - Literal(pred) | String(pred) | Any(pred) => check_all_node(pred, v), + ParseNode::String(..) | ParseNode::Raw(..) => match self { + Literal(pred) | String(pred) | Any(pred) => check_all(pred, node), _ => Err(mismatch), }, - ParseNode::Number(v) => match self { - Literal(pred) | Symbolic(pred) | Number(pred) | Any(pred) => check_all_node(pred, v), + ParseNode::Number(..) => match self { + Literal(pred) | Symbolic(pred) | Number(pred) | Any(pred) => check_all(pred, node), _ => Err(mismatch), }, ParseNode::List { nodes, .. } => match self { @@ -150,23 +150,23 @@ impl ArgType { } } -/// Kind of arguemnt (optional, mandatory). +/// Kind of argument (optional, mandatory). #[derive(Debug, Clone)] -pub enum Arg { - Mandatory(ArgType), - Optional(ArgType), +pub enum Arg<'tree> { + Mandatory(ArgType<'tree>), + Optional(ArgType<'tree>), } -impl Arg { - pub fn argtype(&self) -> &ArgType { +impl<'tree> Arg<'tree> { + pub fn argtype(&self) -> &ArgType<'tree> { match self { Arg::Mandatory(typ) | Arg::Optional(typ) => typ } } } -/// Positonal or named argument position. -#[derive(Debug, Clone)] +/// Positional or named argument position. +#[derive(Debug, Clone, Copy)] pub enum ArgPos<'a> { Int(usize), Str(&'a str) } /// What kind of types can be matched against /// when determining an arguments positionality. @@ -176,8 +176,11 @@ pub trait ArgMatcher { impl ArgMatcher for usize { fn unwrap(&self) -> ArgPos { ArgPos::Int(*self) } } -impl ArgMatcher for &str { - fn unwrap(&self) -> ArgPos { ArgPos::Str(self) } +impl<'a> ArgMatcher for &'a str { + fn unwrap(&self) -> ArgPos<'a> { ArgPos::Str(self) } +} +impl<'a> ArgMatcher for ArgPos<'a> { + fn unwrap<'b>(&'b self) -> ArgPos<'b> { *self } } impl<'a> From<&'a Box<dyn ArgMatcher + 'a>> for Option<usize> { fn from(value: &'a Box<dyn ArgMatcher + 'a>) -> Option<usize> { @@ -195,6 +198,11 @@ impl<'a> From<&'a Box<dyn ArgMatcher + 'a>> for Option<&'a str> { } } } +impl<'a> From<&'a Box<dyn ArgMatcher + 'a>> for ArgPos<'a> { + fn from(value: &'a Box<dyn ArgMatcher + 'a>) -> ArgPos<'a> { + value.unwrap() + } +} impl<'a> From<usize> for Box<dyn ArgMatcher + 'a> { fn from(value: usize) -> Box<dyn ArgMatcher + 'a> { Box::new(value) } } @@ -204,32 +212,40 @@ impl<'a> From<&'a str> for Box<dyn ArgMatcher + 'a> { impl<'a> From<&'a String> for Box<dyn ArgMatcher + 'a> { fn from(value: &'a String) -> Box<dyn ArgMatcher + 'a> { Box::new(value.as_ref()) } } +impl<'a> From<ArgPos<'a>> for Box<dyn ArgMatcher + 'a> { + fn from(value: ArgPos<'a>) -> Box<dyn ArgMatcher + 'a> { + match value { + ArgPos::Int(i) => Box::new(i), + ArgPos::Str(k) => Box::new(k), + } + } +} /// Holds information as to what kind rules -/// must be satsified for an argument's given +/// must be satisfied for an argument's given /// position. /// Pattern pertains to how to argument sits /// in the macro-call's argument list. #[derive(Debug, Clone)] -struct ArgPattern<'a> { - argument: Arg, +struct ArgPattern<'a, 'tree> { + argument: Arg<'tree>, pattern: fn(&Box<dyn ArgMatcher + 'a>) -> bool, } /// A complete description of how a macro's arguments /// should be parsed. #[derive(Debug, Clone)] -pub struct ArgRules<'a> { - patterns: Vec<ArgPattern<'a>>, - trailing: Option<ArgType>, +pub struct ArgRules<'a, 'tree> { + patterns: Vec<ArgPattern<'a, 'tree>>, + trailing: Option<ArgType<'tree>>, } -impl<'a> ArgRules<'a> { +impl<'a, 'tree> ArgRules<'a, 'tree> { pub fn new() -> Self { Self { patterns: Vec::new(), trailing: None } } /// Register a pattern to match. - pub fn register(&mut self, matcher: fn(&Box<dyn ArgMatcher + 'a>) -> bool, arg: Arg) + pub fn register(&mut self, matcher: fn(&Box<dyn ArgMatcher + 'a>) -> bool, arg: Arg<'tree>) { self.patterns.push(ArgPattern { argument: arg, @@ -237,12 +253,12 @@ impl<'a> ArgRules<'a> { }); } /// Register matching on all remaining arguments. - pub fn register_remaining(&mut self, arg_type: ArgType) { + pub fn register_remaining(&mut self, arg_type: ArgType<'tree>) { self.trailing = Some(arg_type); } /// Turn this structure into a parser. - pub fn parser<'params, 'tree>(self, params: &'params Box<[ParseNode<'tree>]>) -> ArgParser<'params, 'a, 'tree> { - ArgParser::new(self, params).unwrap() + pub fn parser<'params>(self, call_node: &'params ParseNode<'tree>, params: &'params Box<[ParseNode<'tree>]>) -> Result<ArgParser<'params, 'a, 'tree>, ExpansionError<'tree>> { + ArgParser::new(self, call_node, params) } /// Count how many mandatory arguments there are. pub fn count_mandatory(&self) -> usize { @@ -259,14 +275,16 @@ impl<'a> ArgRules<'a> { #[derive(Debug, Clone)] pub struct ArgParser<'params: 'rules, 'rules, 'tree> { - pub rules: ArgRules<'rules>, + pub call_node: &'params ParseNode<'tree>, + pub rules: ArgRules<'rules, 'tree>, pub positional: HashMap<usize, &'params ParseNode<'tree>>, pub named: HashMap<String, &'params ParseNode<'tree>>, pub trailing: Vec<&'params ParseNode<'tree>> } impl<'params, 'rules, 'tree> ArgParser<'params, 'rules, 'tree> { - pub fn new(rules: ArgRules<'rules>, + pub fn new(rules: ArgRules<'rules, 'tree>, + call_node: &'params ParseNode<'tree>, params: &'params ParseTree<'tree>) -> Result<Self, ExpansionError<'tree>> { let mut position = 0; @@ -323,9 +341,9 @@ impl<'params, 'rules, 'tree> ArgParser<'params, 'rules, 'tree> { trailing.push(param); }, None => { - // Error on fact that an errenous positional or named argument - // has been given. Only error on additional errenous named - // arguemnts if trailing argument capture is enabled. + // Error on fact that an erroneous positional or named argument + // has been given. Only error on additional erroneous named + // arguments if trailing argument capture is enabled. return Err(ExpansionError(if let ParseNode::Attribute { keyword, .. } = param { format!("Unexpected named argument `:{}`.", keyword) } else { @@ -338,22 +356,21 @@ impl<'params, 'rules, 'tree> ArgParser<'params, 'rules, 'tree> { // After checking all the arguments are *valid*, now check // that all mandatory arguments are given. let needed_count = rules.count_mandatory(); - // TODO: pass in site of macro-call - let last_site = params.last().map(|node| node.owned_site()).unwrap_or(Site::unknown()); + let last_site = params.last().map(|node| node.owned_site()).unwrap_or(call_node.owned_site()); if mandatory_count < needed_count { return Err(ExpansionError( - format!("Missing {} non-optional arguments from macro call.", needed_count - mandatory_count), + format!("Missing {} non-optional argument(s) from macro call.", needed_count - mandatory_count), last_site )); } - Ok(Self { rules, positional, named, trailing }) + Ok(Self { call_node, rules, positional, named, trailing }) } pub fn get_optional<P>(&self, key: P) -> Option<&&ParseNode<'tree>> where P: Into<Box<dyn ArgMatcher + 'rules>> { - let matcher: &Box<dyn ArgMatcher + 'rules> = &key.into(); + let matcher: Box<dyn ArgMatcher + 'rules> = key.into(); match matcher.unwrap() { ArgPos::Int(i) => self.positional.get(&i), ArgPos::Str(k) => self.named.get(k), @@ -363,7 +380,23 @@ impl<'params, 'rules, 'tree> ArgParser<'params, 'rules, 'tree> { pub fn get<P>(&self, key: P) -> Result<&&ParseNode<'tree>, ExpansionError<'tree>> where P: Into<Box<dyn ArgMatcher + 'rules>> { - Ok(self.get_optional(key).unwrap()) + let matcher: Box<dyn ArgMatcher + 'rules> = key.into(); + match matcher.unwrap() { + ArgPos::Int(i) => match self.positional.get(&i) { + Some(value) => Ok(value), + None => Err(ExpansionError( + format!("Argument in position {} not given.", i), + self.call_node.owned_site(), + )), + }, + ArgPos::Str(k) => match self.named.get(k) { + Some(value) => Ok(value), + None => Err(ExpansionError( + format!("Named argument :{} not given.", k), + self.call_node.owned_site(), + )), + }, + } } } @@ -435,3 +468,74 @@ pub fn extract_list<'a>(node: ParseNode<'a>) -> Result<Vec<ParseNode<'a>>, Expan pub fn extract_any<'a>(node: ParseNode<'a>) -> Result<ParseNode<'a>, ExpansionError<'a>> { Ok(node) } + +// Same as above but does not move the node. + +pub fn extract_literal_ref<'a, 'b>(node: &'b ParseNode<'a>) -> Result<&'b Node<'a>, ExpansionError<'a>> { + match node { + ParseNode::Symbol(lit) + | ParseNode::Number(lit) + | ParseNode::String(lit) + | ParseNode::Raw(lit) => Ok(lit), + _ => Err(ExpansionError( + format!("Expected a literal, got a {} instead.", node.node_type()), + node.owned_site() + )) + } +} + +pub fn extract_string_ref<'a, 'b>(node: &'b ParseNode<'a>) -> Result<&'b Node<'a>, ExpansionError<'a>> { + match node { + ParseNode::String(string) + | ParseNode::Raw(string) => Ok(string), + _ => Err(ExpansionError( + format!("Expected a string, got a {} instead.", node.node_type()), + node.owned_site() + )) + } +} + +pub fn extract_symbol_ref<'a, 'b>(node: &'b ParseNode<'a>) -> Result<&'b Node<'a>, ExpansionError<'a>> { + match node { + ParseNode::Symbol(sym) => Ok(sym), + _ => Err(ExpansionError( + format!("Expected a symbol, got a {} instead.", node.node_type()), + node.owned_site() + )) + } +} + +pub fn extract_number_ref<'a, 'b>(node: &'b ParseNode<'a>) -> Result<&'b Node<'a>, ExpansionError<'a>> { + match node { + ParseNode::Number(lit) => Ok(lit), + _ => Err(ExpansionError( + format!("Expected a number, got a {} instead.", node.node_type()), + node.owned_site() + )) + } +} + +pub fn extract_symbolic_ref<'a, 'b>(node: &'b ParseNode<'a>) -> Result<&'b Node<'a>, ExpansionError<'a>> { + match node { + ParseNode::Symbol(sym) + | ParseNode::Number(sym) => Ok(sym), + _ => Err(ExpansionError( + format!("Expected a symbolic literal, got a {} instead.", node.node_type()), + node.owned_site() + )) + } +} + +pub fn extract_list_ref<'a, 'b>(node: &'b ParseNode<'a>) -> Result<Vec<&'b ParseNode<'a>>, ExpansionError<'a>> { + match node { + ParseNode::List { nodes, .. } => Ok(nodes.iter().collect()), + _ => Err(ExpansionError( + format!("Expected a list, got a {} instead.", node.node_type()), + node.owned_site() + )) + } +} + +pub fn extract_any_ref<'a, 'b>(node: &'b ParseNode<'a>) -> Result<&'b ParseNode<'a>, ExpansionError<'a>> { + Ok(node) +} diff --git a/crates/seam/src/parse/parser.rs b/crates/seam/src/parse/parser.rs @@ -14,6 +14,19 @@ pub struct Node<'a> { pub leading_whitespace: String, } +impl<'a> PartialEq for Node<'a> { + fn eq(&self, other: &Self) -> bool { + self.value == other.value + } +} +impl<'a> Eq for Node<'a> { } + +impl<'a> std::hash::Hash for Node<'a> { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + self.value.hash(state) + } +} + impl<'a> Node<'a> { pub fn new(value: &str, site: &Site<'a>, leading_whitespace: &str) -> Self { Self { @@ -47,12 +60,84 @@ pub enum ParseNode<'a> { }, } +impl<'a> PartialEq for ParseNode<'a> { + fn eq(&self, other: &Self) -> bool { + match self { + Self::Symbol(node0) => match other { + Self::Symbol(node1) => node0 == node1, + _ => false, + }, + Self::Number(node0) => match other { + Self::Number(node1) => node0 == node1, + _ => false, + }, + Self::String(node0) => match other { + Self::String(node1) => node0 == node1, + _ => false, + }, + Self::Raw(node0) => match other { + Self::Raw(node1) => node0 == node1, + _ => false, + }, + Self::List { nodes: nodes0, .. } => match other { + Self::List { nodes: nodes1, .. } => nodes0 == nodes1, + _ => false, + }, + Self::Attribute { keyword: keyword0, node: node0, .. } => match other { + Self::Attribute { keyword: keyword1, node: node1, .. } => + keyword0 == keyword1 && node0 == node1, + _ => false, + } + } + } +} +impl<'a> Eq for ParseNode<'a> { } + +impl<'a> std::hash::Hash for ParseNode<'a> { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + match self { + Self::Symbol(node) => { + state.write_u8(0); + node.hash(state); + }, + Self::Number(node) => { + state.write_u8(1); + node.hash(state); + }, + Self::String(node) =>{ + state.write_u8(2); + node.hash(state); + }, + Self::Raw(node) => { + state.write_u8(3); + node.hash(state); + }, + Self::List { nodes, .. } => { + state.write_u8(4); + nodes.hash(state); + }, + Self::Attribute { keyword, node, .. } => { + state.write_u8(5); + keyword.hash(state); + node.hash(state); + }, + } + } +} + impl<'a> ParseNode<'a> { + /// Returns true if and only if self is the empty list `()`. + pub fn null(&self) -> bool { + match self { + Self::List { nodes, .. } => nodes.is_empty(), + _ => false, + } + } + /// Unwrap a literal node if it is a symbol or number. pub fn symbolic(&self) -> Option<&Node<'a>> { match self { - Self::Symbol(ref node) - | Self::Number(ref node) => Some(node), + Self::Symbol(ref node) | Self::Number(ref node) => Some(node), _ => None, } } @@ -65,6 +150,22 @@ impl<'a> ParseNode<'a> { } } + /// Unwrap a number node into a [`Node<'a>`]. + pub fn number(&self) -> Option<&Node<'a>> { + match self { + Self::Number(ref node) => Some(node), + _ => None, + } + } + + /// Unwrap a symbol node into a [`Node<'a>`]. + pub fn symbol(&self) -> Option<&Node<'a>> { + match self { + Self::Symbol(ref node) => Some(node), + _ => None, + } + } + /// Unwrap literal (atomic) nodes into their underlying [`Node`]. pub fn atomic(&self) -> Option<&Node<'a>> { match self { @@ -76,6 +177,22 @@ impl<'a> ParseNode<'a> { } } + /// Unwrap list node into vector of nodes. + pub fn list(&self) -> Option<&ParseTree<'a>> { + match self { + Self::List { nodes, .. } => Some(nodes), + _ => None, + } + } + + /// Unwrap attribute node into keyword and value. + pub fn attribute(&self) -> Option<(&str, &Box<ParseNode<'a>>)> { + match self { + Self::Attribute { keyword, node, .. } => Some((keyword, node)), + _ => None, + } + } + /// Same as [`Self::atomic`], but consumes the node, /// returning an owned [`Node`]. pub fn into_atomic(self) -> Option<Node<'a>> { @@ -149,21 +266,13 @@ impl<'a> ParseNode<'a> { } } + pub fn is_symbolic(&self) -> bool { self.symbolic().is_some() } pub fn is_atomic(&self) -> bool { self.atomic().is_some() } - - pub fn is_list(&self) -> bool { - match self { - Self::List { .. } => true, - _ => false, - } - } - - pub fn is_attribute(&self) -> bool { - match self { - Self::Attribute { .. } => true, - _ => false, - } - } + pub fn is_symbol(&self) -> bool { self.symbol().is_some() } + pub fn is_number(&self) -> bool { self.number().is_some() } + pub fn is_string(&self) -> bool { self.string().is_some() } + pub fn is_list(&self) -> bool { self.list().is_some() } + pub fn is_attribute(&self) -> bool { self.attribute().is_some() } } // Try to convert a [`ParseNode`] enum value into @@ -408,7 +517,7 @@ impl<'a> Parser { } } -/// Santize any escaped characters by removing their leading backslash. +/// Sanitize any escaped characters by removing their leading backslash. fn escape_sanitize(string: &str) -> String { let mut builder = String::with_capacity(string.len()); let mut chars = string.chars(); @@ -424,10 +533,10 @@ fn escape_sanitize(string: &str) -> String { fn escape_string<'a>(string: &'a str, site: &Site<'a>) -> Result<String, LexError<'a>> { string.to_unescaped() .map(|s| s.to_string()) - .map_err(|index| { + .map_err(|invalid| { LexError( format!("Invalid escape `\\{}' at byte-index {}.", - string.chars().nth(index).unwrap_or('?'), index), + string.chars().nth(invalid.index).unwrap_or('?'), invalid.index), site.clone()) }) } diff --git a/crates/seam/src/parse/tokens.rs b/crates/seam/src/parse/tokens.rs @@ -1,4 +1,4 @@ -use std::fmt::{self, Display}; +use std::{fmt::{self, Display}, hash::{DefaultHasher, Hash, Hasher}}; use unicode_width::UnicodeWidthStr; /// Precise source-code location a parsed (or lexed) node (or token). @@ -42,6 +42,13 @@ impl<'a> Site<'a> { } } + pub fn uuid(&self) -> String { + let mut hasher = DefaultHasher::new(); + self.source.hash(&mut hasher); + let hash = hasher.finish(); + format!("{}-{}", hash, self.bytes_from_start) + } + pub const fn unknown() -> Self { UNKNOWN_SITE } /// Byte-offset in source code for start-of-line where this site is. diff --git a/crates/seam_argparse_proc_macro/src/lib.rs b/crates/seam_argparse_proc_macro/src/lib.rs @@ -1,6 +1,7 @@ //! Procedural macro for the `arguments! { ... }` //! macro-argument parser for seam macros. //! TODO: Convert all `panic!(..)` calls to actual compiler errors. +#![feature(proc_macro_span)] #![feature(proc_macro_diagnostic)] use std::{collections::{HashMap, HashSet}, iter::Peekable}; @@ -85,6 +86,8 @@ pub fn arguments(stream: proc_macro::TokenStream) -> proc_macro::TokenStream { let stream = stream.into_iter().peekable(); let mut stream = stream.into_iter(); + let struct_name: TokenStream = format!("MyArgs_{}", Span::call_site().line()).parse().unwrap(); + // Parse the provided runtime argument vector. let Some(args_vec) = stream.next().and_then(|tokens| match tokens { TokenTree::Group(group) => Some(group @@ -147,12 +150,10 @@ pub fn arguments(stream: proc_macro::TokenStream) -> proc_macro::TokenStream { } let argument_type = parse_argument_type(&mut stream, PositionTypes::Rest); let arg_type = argument_type.source_code; - let code = quote! { - { - let arg_type = #arg_type; - rules.register_remaining(arg_type); - }; - }; + let code = quote! {{ + let arg_type = #arg_type; + rules.register_remaining(arg_type); + }}; out.extend(code); // Register argument struct type. let rust_type = argument_type.properties.rust_type; @@ -166,7 +167,7 @@ pub fn arguments(stream: proc_macro::TokenStream) -> proc_macro::TokenStream { t => { let span: proc_macro::Span = t.span().unwrap(); Diagnostic::spanned(span, proc_macro::Level::Error, "expected a paranthesised pattern matching the argument position here.").emit(); - panic!("failed to parse."); + panic!("expected a position pattern."); }, }; // Skip `:` @@ -183,19 +184,17 @@ pub fn arguments(stream: proc_macro::TokenStream) -> proc_macro::TokenStream { PositionTypes::Optional => quote! { crate::parse::macros::Arg::Optional }, _ => unreachable!(), }; - let code = quote! { - { - let arg_type = #arg_type; - let arg = #arg_pos(arg_type); - fn position_matcher<'b>(pattern: &Box<dyn crate::parse::macros::ArgMatcher + 'b>) -> bool { - match pattern.into() { - Some(#position_pattern) => true, - _ => false, - } + let code = quote! {{ + let arg_type = #arg_type; + let arg = #arg_pos(arg_type); + fn position_matcher<'b>(pattern: &Box<dyn crate::parse::macros::ArgMatcher + 'b>) -> bool { + match pattern.into() { + Some(#position_pattern) => true, + _ => false, } - rules.register(position_matcher, arg); - }; - }; + } + rules.register(position_matcher, arg); + }}; out.extend(code); // Register argument struct type. let rust_type = argument_type.properties.rust_type; @@ -230,13 +229,13 @@ pub fn arguments(stream: proc_macro::TokenStream) -> proc_macro::TokenStream { match match token { Some(TokenTree::Punct(punct)) if punct.as_char() == ',' => Ok(()), Some(t) => Err(t.span().unwrap()), - None => Err(Span::call_site()), + None => Ok(()), } { Ok(()) => {}, Err(span) => { Diagnostic::spanned(span, proc_macro::Level::Error, "Expected a comma after defining an argument rule.").emit(); - panic!("failed to parse"); + panic!("expected a comma"); } }; // Otherwise, switch back to trying tp parse a new rule. @@ -337,24 +336,25 @@ pub fn arguments(stream: proc_macro::TokenStream) -> proc_macro::TokenStream { // Assemble code that builds argument parser context and argument struct. let out = out.into_iter(); - let out = quote! { + quote! { { #(#out)*; - #[derive(Debug, Clone)] - struct MyArguments<'a> { + #[allow(non_camel_case_types)] + #[derive(Clone, Debug)] + struct #struct_name<'a> { number: (#(#tuple_types),*,), - #(#named_arguments: #named_types),*, + #(#named_arguments: #named_types,)* rest: Vec<#rest_rust_type>, } - let parser_result = crate::parse::macros::ArgParser::new(rules, #params); + let parser_result = crate::parse::macros::ArgParser::new(rules, &node, #params); match parser_result { Ok(parser) => { #(#tuple_variable_initializations)* #(let #named_arguments: #named_types = #named_values;)* let rest = #trailing_arguments; - let args_struct = MyArguments { + let args_struct = #struct_name { number: (#(#tuple_variables),*,), - #(#named_arguments),*, + #(#named_arguments,)* rest, }; Ok((parser, args_struct)) // Returns the parser and args from the scope. @@ -362,9 +362,7 @@ pub fn arguments(stream: proc_macro::TokenStream) -> proc_macro::TokenStream { Err(e) => Err(e), } } - }.into(); - println!("{}", out); - out + }.into() } #[derive(Clone, PartialEq, Eq, Hash)] @@ -487,22 +485,25 @@ fn parse_argument_type(stream: &mut Peekable<IntoIter>, position_type: PositionT }, _ => panic!("Unexpected list delimiter"), }, - // Parse a function which matches the arguemnt. + // Parse a function which matches the argument. Some(TokenTree::Ident(ident)) if ident.to_string() == "fn" => { stream.next(); // Consume the `fn` keyword. + // Consume the function argument list. let fn_arguments = match stream.next() { Some(TokenTree::Group(group)) => group.stream().into_iter(), None => panic!("Unexpected EOF"), _ => panic!("Unexpected token"), }; + // Consume the function body. let Some(fn_body) = stream.next() else { panic!("Unexpected EOF") }; quote! {{ - fn predicate(#(#fn_arguments),*) -> bool { #fn_body } - let arg_pred = crate::parse::macros::ArgPredicate::Satisfying(Box::new(predicate)); + fn predicate<'tree>(#(#fn_arguments),*) -> Result<(), ExpansionError<'tree>> { #fn_body } + let arg_pred = crate::parse::macros::ArgPredicate::Satisfying(predicate); #arg_type(vec![arg_pred]) }} } _ => quote! { #arg_type(vec![]) }, + //_ => panic!("Unexpected tokens after argument type rules.") }; ArgumentType {