seam

Symbolic-Expressions As Markup.
git clone git://git.knutsen.co/seam
Log | Files | Refs | README | LICENSE

commit 2748bee7863eec92d6fad7df5462766a3cde3560
parent 3af2f6a13442873e1351aea20f4d66ae57baaad2
Author: Demonstrandum <samuel@knutsen.co>
Date:   Sat,  7 Dec 2024 19:35:14 +0000

Added %markdown and %yaml macros.

Diffstat:
MCargo.lock | 110+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
MREADME.md | 19+++++++++++++++----
Mcrates/seam/Cargo.toml | 7+++++--
Mcrates/seam/src/parse/expander.rs | 505++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mcrates/seam/src/parse/macros.rs | 11++++++++---
Mcrates/seam/src/parse/parser.rs | 32++++++++++----------------------
Mcrates/seam/src/parse/tokens.rs | 12+++++++++++-
Mcrates/seam_argparse_proc_macro/Cargo.toml | 2+-
Mcrates/seam_argparse_proc_macro/src/lib.rs | 97++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
9 files changed, 712 insertions(+), 83 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock @@ -3,6 +3,18 @@ version = 4 [[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] name = "aho-corasick" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -27,6 +39,12 @@ dependencies = [ ] [[package]] +name = "arraydeque" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236" + +[[package]] name = "autocfg" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -90,12 +108,45 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c1113b908df80c963b107424498e37fba986b424b605729d1492dfbe4b2a630" [[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] name = "formatx" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db0f0c49aba98a3b2578315766960bd242885ff672fd62610c5557cd6c6efe03" [[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown", +] + +[[package]] name = "iana-time-zone" version = "0.1.61" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -146,6 +197,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] +name = "markdown" +version = "1.0.0-alpha.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6491e6c702bf7e3b24e769d800746d5f2c06a6c6a2db7992612e0f429029e81" +dependencies = [ + "unicode-id", +] + +[[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -215,20 +275,23 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "seam" -version = "0.3.2" +version = "0.4.0" dependencies = [ "chrono", "colored", "descape", "formatx", + "glob", + "markdown", "regex", "seam_argparse_proc_macro", "unicode-width", + "yaml-rust2", ] [[package]] name = "seam_argparse_proc_macro" -version = "0.1.0" +version = "0.1.1" dependencies = [ "proc-macro2", "quote", @@ -253,6 +316,12 @@ dependencies = [ ] [[package]] +name = "unicode-id" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10103c57044730945224467c09f71a4db0071c123a0648cc3e818913bde6b561" + +[[package]] name = "unicode-ident" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -265,6 +334,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" [[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] name = "wasm-bindgen" version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -457,3 +532,34 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "yaml-rust2" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a1a1c0bc9823338a3bdf8c61f994f23ac004c6fa32c08cd152984499b445e8d" +dependencies = [ + "arraydeque", + "encoding_rs", + "hashlink", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/README.md b/README.md @@ -93,7 +93,15 @@ seam --sexp <<< '(hello (%define subject world) %subject)' ## Checklist - [ ] User `(%error msg)` macro for aborting compilation. - [ ] Pattern-matching `(%match expr (pat1 ...) (pat2 ...) default)` macro. - Pattern matching is already implemented for `%define` internally. + Pattern matching is already implemented for `%define` internally. + - [ ] The trailing keyword-matching operator. `&&rest` matches excess keyword. + Extracting a value from a map `(:a 1 :b 2 :c 3)` is done with: + `(%match h (:b () &&_) %b)`. + - [ ] `%get` macro: `(%get b (:a 1 :b 2))` becomes `2`; `(%get 0 (a b c))` becomes `a`. + - [ ] `(%yaml "...")`, `(%toml "...")` and `(%json "...")` converts + whichever config-lang definition into a seam `%define`-definition. + - [ ] `(%do ...)` which just expands to the `...`; the identity function. + - [ ] Catch expansion errors: `(%try :catch index-error (%do code-to-try) :error the-error (%do caught-error %the-error))`. - [x] Implement `(%strip ...)` which evaluates to the `...` without any of the leading whitespace. - [x] Implement *splat* operation: `(%splat (a b c))` becomes `a b c`. - [x] `(%define x %body)` evaluates `%body` eagerly (at definition), @@ -109,17 +117,20 @@ seam --sexp <<< '(hello (%define subject world) %subject)' - [x] `(%lambda (x y) ...)` macro which just evaluates to an secret symbol, e.g. `__lambda0`. used by applying `%apply`, e.g. `(%apply (%lambda (a b) b a) x y)` becomes `y x` - [x] `(%string ...)`, `(%join ...)`, `(%map ...)`, `(%filter ...)` macros. + - [x] `(%concat ...)` which is just `(%join "" ...)`. + - [ ] `(%basename )`, `(%dirname)` and `(%extension)` macro for paths. + - [ ] Add options to `%glob` for sorting by type, date(s), name, etc. - [x] `(%format "{}")` macro with Rust's `format` syntax. e.g. `(%format "Hello {}, age {age:0>2}" "Sam" :age 9)` - [x] Add `(%raw ...)` macro which takes a string and leaves it unchanged in the final output. - - [ ] `(%formatter/text)` can take any other source code, for which it just embeds the expanded code (plain-text formatter). + - [ ] `(%formatter/text ...)` can take any seam (sexp) source code, for which it just embeds the expanded code (plain-text formatter). - [ ] `(%formatter/html ...)` etc. which call the respective available formatters. - [ ] Implement lexical scope by letting macros store a copy of the scope they were defined in (or a reference?). - [x] `(%embed "/path")` macro, like `%include`, but just returns the file contents as a string. - [x] Variadic arguments via `&rest` syntax. - [ ] Type-checking facilities for user macros. - [x] `%list` macro which expands from `(%list %a %b %c)` to `( %a %b %c )` but *without* calling `%a` as a macro with `%b` and `%c` as argument. - - [ ] `%for`-loop macro, iterating over `%list`s. - - [ ] `%glob` which returns a list of files/directories matching a glob. + - [x] `%for`-loop macro, iterating over `%list`s. + - [x] `%glob` which returns a list of files/directories matching a glob. - [ ] `%markdown` renders Markdown given to it as `%raw` html-string. - [ ] Add keyword macro arguments. - [ ] Caching or checking time-stamps as to not regenerate unmodified source files. diff --git a/crates/seam/Cargo.toml b/crates/seam/Cargo.toml @@ -5,7 +5,7 @@ keywords = ["markup", "lisp", "macro", "symbolic-expression", "sexp"] license-file = "../../LICENSE" readme = "../../README.md" homepage = "https://git.knutsen.co/seam" -version = "0.3.2" +version = "0.4.0" authors = ["Demonstrandum <samuel@knutsen.co>"] edition = "2021" @@ -22,10 +22,13 @@ name = "seam" path = "src/bin.rs" [dependencies] -seam_argparse_proc_macro = { version = "0.1.0", path = "../seam_argparse_proc_macro" } +seam_argparse_proc_macro = { version = "0.1.1", path = "../seam_argparse_proc_macro" } colored = "2.1" chrono = "0.4" unicode-width = "0.2.0" descape = "2.0.3" formatx = "0.2.2" regex = "1.10.5" +glob = "0.3.1" +markdown = "1.0.0-alpha.21" +yaml-rust2 = "0.9.0" diff --git a/crates/seam/src/parse/expander.rs b/crates/seam/src/parse/expander.rs @@ -1,7 +1,6 @@ use super::parser::{Node, ParseNode, ParseTree, Parser}; use super::tokens::Site; -use std::fmt::Display; use std::{ fmt, cell::RefCell, @@ -17,7 +16,9 @@ use std::{ use colored::*; use formatx; +use glob::glob; use unicode_width::UnicodeWidthStr; +use markdown; // proc macros for generating macros. use seam_argparse_proc_macro::arguments; @@ -377,8 +378,17 @@ impl<'a> Expander<'a> { .iter() .map(|(kw, _)| format!("`:{}`", kw)) .collect(); - let known_keywords = known_keywords.join(", "); let excess_keywords = excess_keywords.join(", "); + if known_keywords.is_empty() { + return Err(ExpansionError( + format!( + "This macro does not expect any keyword arguments, however the following were provided: {}", + excess_keywords, + ), + some_node.owned_site(), + )); + } + let known_keywords = known_keywords.join(", "); return Err(ExpansionError( format!(concat!( "Unknown excess keywords provided: {};", @@ -497,7 +507,7 @@ impl<'a> Expander<'a> { }; // Open file, and parse contents! - let include_error = |error: Box<dyn Display>| ExpansionError( + let include_error = |error: Box<dyn fmt::Display>| ExpansionError( format!("{}", error), site.to_owned()); let mut parser: Result<Parser, ExpansionError> = Err( include_error(Box::new("No path tested."))); @@ -566,11 +576,11 @@ impl<'a> Expander<'a> { }; // Open file, and read contents! - let embed_error = |error: Box<dyn Display>| ExpansionError( + let embed_error = |error: Box<dyn fmt::Display>| ExpansionError( format!("{}", error), site.to_owned()); let mut value: Result<String, ExpansionError> = Err( embed_error(Box::new("No path tested."))); - // Try all include directories until one is succesful. + // Try all include directories until one is successful. for include_dir in &self.includes { let path = include_dir.join(path); value = std::fs::read_to_string(path) @@ -587,6 +597,141 @@ impl<'a> Expander<'a> { ])) } + /// The `(%markdown ...)` macro parses both the fenced `--- ... ---` metadata at the + /// top of the file (which it expands to `%define`s), and converts the rest of the + /// markdown file into a raw-string containing the converted plain HTML. + fn expand_markdown_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params = self.expand_nodes(params)?; // Eager. + let (_parser, args) = arguments! { [&params] + mandatory(1): string, + optional("only"): literal["frontmatter", "content"], + optional("separator"): literal, + }?; + // Parse the makdown content only, the frontmatter only, or do both. + #[derive(Clone, Copy, PartialEq, Eq)] + enum Only { Frontmatter, Content, Both } + // Extract arguments and options. + let contents = args.number.1.value; + let only = args.only.map_or(Only::Both, |option| match option.value.as_ref() { + "frontmatter" => Only::Frontmatter, + "content" => Only::Content, + _ => unreachable!(), + }); + // Default to using the '/' namespace separator for frontmatter definitions. + let sep = args.separator.map_or(String::from("/"), |sep| sep.value); + // Live dangerously / trust the author: + let danger = markdown::CompileOptions { + allow_dangerous_html: true, + allow_dangerous_protocol: true, + ..markdown::CompileOptions::default() + }; + // Flavour options: + let flavour = markdown::ParseOptions { + gfm_strikethrough_single_tilde: false, + math_text_single_dollar: true, + constructs: markdown::Constructs { + frontmatter: true, + gfm_table: true, + gfm_task_list_item: true, + ..markdown::Constructs::default() + }, + ..markdown::ParseOptions::default() + }; + // Options. + let options = markdown::Options { parse: flavour, compile: danger, }; + + // How to convert to HTML. + let to_html = | | -> Result<ParseTree, _> { + // Convert to HTML. + let html = match markdown::to_html_with_options(contents.as_ref(), &options) { + Ok(html) => html, + Err(err) => return Err(ExpansionError( + format!("Failed to render markdown: {}", err), + args.number.1.site.to_owned(), + )) + }; + // Return the raw html. + Ok(Box::new([ + ParseNode::Raw(Node { + value: html, + site: node.owned_site(), + leading_whitespace: node.leading_whitespace().to_owned(), + }), + ])) + }; + + // How to extract front-matter. + let extract_frontmatter = | | -> Result<(), _> { + use markdown::mdast; + let ast = match markdown::to_mdast(contents.as_ref(), &options.parse) { + Ok(ast) => ast, + Err(err) => return Err(ExpansionError( + format!("Failed to render markdown: {}", err), + args.number.1.site.to_owned(), + )) + }; + let mdast::Node::Root(root) = ast else { unreachable!() }; + let root = root.children; + let root: &[mdast::Node] = root.as_ref(); + match root { + [mdast::Node::Yaml(mdast::Yaml { value: yaml, .. }), ..] => { + // Parse the YAML and convert it into macro definitions. + let _ = expand_yaml(self, yaml, &sep, node.site())?; + Ok(()) + }, + [mdast::Node::Toml(mdast::Toml { value: toml, .. }), ..] => { + // Parse the TOML and convert it into macro definitions. + let _ = expand_toml(self, toml, &sep, node.site())?; + Ok(()) + }, + _ => return Err(ExpansionError::new( + "This markdown does not contain any frontmatter.", + &args.number.1.site, + )) + } + }; + + match only { + Only::Frontmatter => { + extract_frontmatter()?; + Ok(Box::new([])) + }, + Only::Content => to_html(), + Only::Both => { + // Ignore any errors if no frontmatter exists. + let _ = extract_frontmatter(); + to_html() + }, + } + } + + fn expand_yaml_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params = self.expand_nodes(params)?; // Eager. + let (_parser, args) = arguments! { [&params] + mandatory(1): string, + optional("separator"): literal, + }?; + let yaml = args.number.1.value; + let sep = args.separator.map_or(String::from("/"), |sep| sep.value); + + expand_yaml(self, &yaml, &sep, node.site()) + } + + fn expand_toml_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params = self.expand_nodes(params)?; // Eager. + let (_parser, args) = arguments! { [&params] + mandatory(1): string, + optional("separator"): literal, + }?; + let yaml = args.number.1.value; + let sep = args.separator.map_or(String::from("/"), |sep| sep.value); + + expand_toml(self, &yaml, &sep, node.site()) + } + fn expand_date_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) -> Result<ParseTree<'a>, ExpansionError<'a>> { let params = self.expand_nodes(params)?; @@ -753,6 +898,81 @@ impl<'a> Expander<'a> { Ok(args.cloned().collect()) } + fn expand_for_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let (_parser, args) = arguments! { [&params] + mandatory(1): any, + mandatory(2): symbol["in"], + mandatory(3): list, + rest: any, + }?; + let it = args.number.1; + let list = args.number.3; + let list = self.expand_nodes(list.into_boxed_slice())?; + let body = args.rest.into_boxed_slice(); + + let context = self.clone(); + let mut expanded = Vec::with_capacity(list.len()); + for item in list { + context.bind(&it, &item)?; + let evaluated = context.expand_nodes(body.clone())?; + expanded.extend(evaluated); + } + + Ok(expanded.into_boxed_slice()) + } + + fn expand_glob_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params = self.expand_nodes(params)?; // Eager. + let (_parser, args) = arguments! { [&params] + mandatory(1): literal, + optional("type"): literal["file", "directory", "any"] + }?; + + #[derive(Clone, Copy, PartialEq, Eq)] + enum PathTypes { File, Dir, Any } + + let path_types = args.r#type.map(|typ| match typ.value.as_ref() { + "file" => PathTypes::File, + "directory" => PathTypes::Dir, + "any" => PathTypes::Any, + _ => unreachable!(), + }).unwrap_or(PathTypes::Any); + + let pattern: &str = args.number.1.value.as_ref(); + let paths = match glob(pattern) { + Ok(paths) => paths, + Err(err) => return Err(ExpansionError( + format!("Failed to read glob pattern: {}", err), + args.number.1.site.to_owned(), + )), + }; + + let mut expanded = vec![]; + for path in paths { + let path = match path { + Ok(path) => path, + Err(err) => return Err(ExpansionError( + format!("glob failed: {}", err), + args.number.1.site.to_owned(), + )), + }; + let meta = std::fs::metadata(&path).unwrap(); + match path_types { + PathTypes::File if !meta.is_file() => continue, + PathTypes::Dir if !meta.is_dir() => continue, + _ => {}, + } + expanded.push(ParseNode::String(Node { + value: path.to_string_lossy().to_string(), + site: args.number.1.site.to_owned(), + leading_whitespace: args.number.1.leading_whitespace.to_owned(), + })); + } + Ok(expanded.into_boxed_slice()) + } + fn expand_raw_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) -> Result<ParseTree<'a>, ExpansionError<'a>> { let mut builder = String::new(); @@ -819,6 +1039,27 @@ impl<'a> Expander<'a> { ])) } + fn expand_concat_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params = self.expand_nodes(params)?; // Eager. + let (_parser, args) = arguments! { [&params] + rest: literal, + }?; + + let joined: String = args.rest.iter().fold + (String::new(), + |acc, x| acc + x.value.as_ref() + ); + + Ok(Box::new([ + ParseNode::String(Node { + value: joined, + site: node.owned_site(), + leading_whitespace: node.leading_whitespace().to_owned(), + }) + ])) + } + fn expand_map_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) -> Result<ParseTree<'a>, ExpansionError<'a>> { let params = self.expand_nodes(params)?; // Eager. @@ -1006,8 +1247,15 @@ impl<'a> Expander<'a> { "include" => self.expand_include_macro(node, params), "embed" => self.expand_embed_macro(node, params), "namespace" => self.expand_namespace_macro(node, params), + "markdown" => self.expand_markdown_macro(node, params), + "yaml" => self.expand_yaml_macro(node, params), + "json" => self.expand_yaml_macro(node, params), + "toml" => self.expand_toml_macro(node, params), + "glob" => self.expand_glob_macro(node, params), + "for" => self.expand_for_macro(node, params), "date" => self.expand_date_macro(node, params), "join" => self.expand_join_macro(node, params), + "concat" => self.expand_concat_macro(node, params), "map" => self.expand_map_macro(node, params), "filter" => self.expand_filter_macro(node, params), "splat" => self.expand_splat_macro(node, params), @@ -1122,3 +1370,250 @@ impl<'a> Expander<'a> { Ok(expanded) } } + +/// For example, the YAML below, +/// ```yaml +/// a: 2 +/// b: hello +/// nested: +/// hello: world +/// array: +/// - aa: 0 +/// bb: 1 +/// - aa: 2 +/// bb: 3 +/// ``` +/// evaluates to the following variables: +/// ```text +/// (%yaml "...") #=> (:a 2 :b hello (:hello world :array ((:aa 0 :bb 1) (:aa 2 :bb 3))))) +/// a #=> 2 +/// b #=> "hello" +/// nested #=> (:hello world :array ((:aa 0 :bb 1) (:aa 2 :bb 3))) +/// nested/hello #=> world +/// nested/array #=> ((:aa 0 :bb 1) (:aa 2 :bb 3)) +/// nested/array/0 #=> (:aa 0 :bb 1) +/// nested/array/1 #=> (:aa 2 :bb 3) +/// nested/array/0/aa #=> 0 +/// nested/array/0/bb #=> 1 +/// nested/array/1/aa #=> 2 +/// nested/array/1/bb #=> 3 +/// ``` +fn expand_yaml<'a>(context: &Expander<'a>, text: &str, sep: &str, site: &Site<'a>) -> Result<ParseTree<'a>, ExpansionError<'a>> { + use yaml_rust2 as yaml; + + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + enum Mode { Map, Seq, } + + struct EventSink<'a, 'b> { + /// The macro expansion context. + context: &'b Expander<'a>, + /// A variable name if the YAML parser is currently parsing + /// the assignment of an item in a map. + defining: Option<String>, + /// The collection of nodes which eventually get assigned + /// to a `ParseNode::List` after a map or array is parsed. + nodes: Vec<ParseNode<'a>>, + parent: Vec<Vec<ParseNode<'a>>>, + /// The sequence of qualifiers for a nested name definition. + prefix: Vec<String>, + /// The namespace separator (e.g. `/` or `.`). + sep: String, + /// Whether we're parsing a map or a sequence (array). + mode: Option<Mode>, + modes: Vec<Mode>, + /// The site of the original YAML-parsing macro. + site: Site<'a>, + } + + impl<'a, 'b> EventSink<'a, 'b> { + fn qualified(&self, name: &str) -> String { + if self.prefix.is_empty() { + name.to_owned() + } else { + let prefix = self.prefix.join(&self.sep); + format!("{}{}{}", prefix, self.sep, name) + } + } + } + + impl<'a, 'b> yaml::parser::EventReceiver for EventSink<'a, 'b> { + fn on_event(&mut self, event: yaml::Event) { + /* + eprintln!("---"); + eprintln!("event: {:?}", event); + eprintln!("mode: {:?}", self.mode); + eprintln!("defn: {:?}", self.defining); + eprintln!("prefix: {:?}", self.prefix); + eprintln!("nodes: [{}]", self.nodes.iter().map(|node| node.to_string()).collect::<Vec<String>>().join("; ")); + eprintln!("parent: {:?}", self.parent); + */ + let the_dreaded_rparen = crate::parse::tokens::Token::new( + crate::parse::tokens::Kind::RParen, + ")", "", self.site.clone() + ); + match event { + // Either defining a new variable or setting a variable to a string. + yaml::Event::Scalar(ref value, ..) => { + let mut string = ParseNode::String(Node { + value: value.clone(), + site: self.site.clone(), + leading_whitespace: String::from(" "), + }); + match self.defining { + Some(ref name) => { + // Define a variable under `name` with `value`. + let qualified_name = self.qualified(name); + self.context.insert_variable(qualified_name.clone(), Rc::new(Macro { + name: qualified_name, + params: Box::new([]), + body: Box::new([string.clone()]), + })); + match self.mode { + Some(Mode::Map) => { + // Wait for next name. + let keyword = name.clone(); + // Push keyword attribute. + let attr = ParseNode::Attribute { + keyword, + node: Box::new(string), + site: self.site.clone(), + leading_whitespace: String::from(if self.nodes.is_empty() { + "" + } else { + " " + }), + }; + self.nodes.push(attr); + self.defining = None; + }, + Some(Mode::Seq) => { + // Push list item. + if self.nodes.is_empty() { + string.set_leading_whitespace(String::new()); + } + self.nodes.push(string); + self.defining = Some(format!("{}", self.nodes.len())); + }, + None => panic!("cannot be defining an item outside of a map or sequence.") + } + }, + None => match self.mode { + // Otherwise, we are defining a new variable under this name. + Some(Mode::Map) => self.defining = Some(value.clone()), + Some(Mode::Seq) => panic!("seq is always defining something."), + None => { + // Push item. + if self.nodes.is_empty() { + string.set_leading_whitespace(String::new()); + } + self.nodes.push(string); + }, + } + } + }, + // Start parsing a YAML map. + yaml::Event::MappingStart(..) => { + if let Some(ref defining) = self.defining { + self.prefix.push(defining.clone()); + } + self.defining = None; + self.parent.push(self.nodes.clone()); + self.nodes = Vec::new(); + if let Some(mode) = self.mode { + self.modes.push(mode); + } + self.mode = Some(Mode::Map); + }, + // Start parsing a YAML sequence. + yaml::Event::SequenceStart(..) => { + if let Some(ref defining) = self.defining { + self.prefix.push(defining.clone()); + } + self.defining = Some(String::from("0")); + self.parent.push(self.nodes.clone()); + self.nodes = Vec::new(); + if let Some(mode) = self.mode { + self.modes.push(mode); + } + self.mode = Some(Mode::Seq); + }, + // Assign the built-up map or sequence. + yaml::Event::MappingEnd | yaml::Event::SequenceEnd => { + self.mode = self.modes.pop(); // Revert to previous mode. + let nodes = self.nodes.clone(); // Nodes in the list. + self.nodes = self.parent.pop().unwrap_or(Vec::new()); // Regain previous collection of nodes. + self.defining = match self.mode { + Some(Mode::Seq) => Some(format!("{}", self.nodes.len() + 1)), + Some(Mode::Map) | None => None + }; + let name = self.prefix.pop(); // The name of this map or sequence. + // Construct a `ParseNode::List` containing the collected nodes. + let list = ParseNode::List { + nodes: nodes.into_boxed_slice(), + site: self.site.clone(), + end_token: the_dreaded_rparen, + leading_whitespace: String::from(if self.nodes.is_empty() { + "" + } else { + " " + }), + }; + // Handle inserting map/seq list under a qualified variable into the context. + match name { + Some(ref name) => { + let name = self.qualified(name); + self.context.insert_variable(name.clone(), Rc::new(Macro { + name, + params: Box::new([]), + body: Box::new([list.clone()]), + })); + }, + None => {}, + }; + // Handle growing the current nodes with the map/seq. + self.nodes.push(match self.mode { + Some(Mode::Map) => { + let leading_whitespace = list.leading_whitespace().to_owned(); + let mut list = list; + list.set_leading_whitespace(String::from(" ")); + ParseNode::Attribute { + keyword: name.clone().expect("must always be defining during a map context."), + node: Box::new(list), + site: self.site.clone(), + leading_whitespace, + } + }, + Some(Mode::Seq) | None => list, + }); + }, + _ => {}, + } + } + } + + let mut sink = EventSink { + context, + defining: None, + prefix: Vec::new(), + sep: sep.to_string(), + mode: None, + modes: Vec::new(), + nodes: Vec::new(), + parent: Vec::new(), + site: site.clone(), + }; + + yaml::parser::Parser::new_from_str(text) + .load(&mut sink, false) + .map_err(|err| ExpansionError( + format!("Failed to parse yaml: {}", err), + site.to_owned() + ))?; + + Ok(sink.nodes.into_boxed_slice()) +} + +/// See [`expand_yaml`], but for the TOML configuration language instead. +fn expand_toml<'a>(context: &Expander<'a>, text: &str, sep: &str, site: &Site<'a>) -> Result<ParseTree<'a>, ExpansionError<'a>> { + Ok(Box::new([])) +} diff --git a/crates/seam/src/parse/macros.rs b/crates/seam/src/parse/macros.rs @@ -21,7 +21,7 @@ impl<'tree> ArgPredicate<'tree> { match self { Self::Exactly(value) => if node.value == *value { Ok(()) } else { Err(ExpansionError( - format!("value must be equal to `{}`", value), + format!("value must be equal to `{}'", value), node.site.to_owned(), )) }, @@ -81,7 +81,12 @@ fn check_all<'tree>(preds: &Vec<ArgPredicate<'tree>>, node: &ParseNode<'tree>) - if issues.is_empty() { return Ok(()); } // Amalgamate errors. let mut error = String::new(); - let _ = writeln!(error, "This argument's value did not satisfy one of the following:"); + if let Some(literal) = node.atomic() { + let _ = writeln!(error, "This argument's value (`{}') did not satisfy one of the following:", literal.value); + } else { + let _ = writeln!(error, "This argument's value did not satisfy one of the following:"); + + } for (i, issue) in issues.iter().enumerate() { let _ = write!(error, " * {}", issue.0); if i != issues.len() - 1 { @@ -127,7 +132,7 @@ impl<'tree> ArgType<'tree> { }, ParseNode::List { nodes, .. } => match self { List(arg_types) => { - if nodes.len() != arg_types.len() { + if arg_types.len() != 0 && nodes.len() != arg_types.len() { return Err(ExpansionError( format!("Unexpected number of items in list, expected {} items, got {}.", arg_types.len(), nodes.len()), diff --git a/crates/seam/src/parse/parser.rs b/crates/seam/src/parse/parser.rs @@ -639,33 +639,21 @@ impl<'a> SearchTree<'a> for ParseTree<'a> { } /// Pretty printing for parse nodes. -#[cfg(feature="debug")] impl<'a> fmt::Display for ParseNode<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { ParseNode::Symbol(node) - | ParseNode::Number(node) => write!(f, "{}", &node.value), - ParseNode::String(node) => { - if node.value.trim().is_empty() { - write!(f, "") - } else { - write!(f, "\"{}\"", &node.value) + | ParseNode::Number(node) => write!(f, "{}{}", node.leading_whitespace, node.value), + ParseNode::String(node) + | ParseNode::Raw(node) => write!(f, "{}{:?}", node.leading_whitespace, node.value), + ParseNode::Attribute { keyword, node, leading_whitespace, .. } => + write!(f, "{}:{}{}", leading_whitespace, keyword, &*node), + ParseNode::List { nodes, leading_whitespace, end_token, .. } => { + write!(f, "{}(", leading_whitespace)?; + for node in nodes { + write!(f, "{}", node)?; } - }, - ParseNode::Attribute { keyword, node, .. } => write!(f, ":{} {}", - &keyword, &*node), - ParseNode::List { nodes, .. } => if nodes.len() == 0 { - write!(f, "()") - } else if let [single] = &**nodes { - write!(f, "({})", single) - } else { - write!(f, "({}{})", nodes[0], - nodes[1..].iter().fold(String::new(), |acc, elem| { - let nested = elem.to_string().split('\n') - .fold(String::new(), |acc, e| - acc + "\n " + &e); - acc + &nested - })) + write!(f, "{})", end_token.leading_whitespace) } } } diff --git a/crates/seam/src/parse/tokens.rs b/crates/seam/src/parse/tokens.rs @@ -44,7 +44,7 @@ impl<'a> Site<'a> { pub fn uuid(&self) -> String { let mut hasher = DefaultHasher::new(); - self.source.hash(&mut hasher); + self.hash(&mut hasher); let hash = hasher.finish(); format!("{}-{}", hash, self.bytes_from_start) } @@ -98,6 +98,16 @@ impl<'a> Site<'a> { } } +impl<'a> Hash for Site<'a> { + fn hash<H: Hasher>(&self, state: &mut H) { + self.source.hash(state); + self.line.hash(state); + self.bytes_from_start.hash(state); + self.bytes_from_start_of_line.hash(state); + self.bytes_span.hash(state); + } +} + impl<'a> Display for Site<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "(")?; diff --git a/crates/seam_argparse_proc_macro/Cargo.toml b/crates/seam_argparse_proc_macro/Cargo.toml @@ -4,7 +4,7 @@ description = "proc-macro crate for seam `arguments!` macro." license-file = "../../LICENSE" authors = ["Demonstrandum <samuel@knutsen.co>"] edition = "2021" -version = "0.1.0" +version = "0.1.1" [lib] proc-macro = true diff --git a/crates/seam_argparse_proc_macro/src/lib.rs b/crates/seam_argparse_proc_macro/src/lib.rs @@ -24,7 +24,7 @@ enum ParseState { PositionPattern(PositionTypes), //< pattern for position or name. } -#[derive(Clone)] +#[derive(Clone, Copy, PartialEq, Eq)] enum ArgumentKind { Literal, String, @@ -51,35 +51,35 @@ struct ArgumentStructTypes { /// Macro that generates an argument parser and builds a custom struct /// holding provided arguments, given a schema and the list of arguments. -/// Example: -/// ``` -/// let (parser, args) = arguments! { [&params] -/// mandatory(1..=3): literal, -/// mandatory(4): number fn(n: ParseNode) { -/// let n = extract_number(n)?; -/// let Ok(n): u32 = n.value.parse() else { -/// return Err("Argument must be an integer."); -/// } -/// if n % 2 == 0 { -/// Ok(()) -/// } else { -/// Err("Integer must be even.") -/// } -/// }, -/// optional("trailing"): literal["true", "false"], -/// rest: number -/// }?; -/// println!("first arg {:?}", args.number.1); // a literal (Node<'a>). -/// println!("second arg {:?}", args.number.2); // a literal (Node<'a>). -/// println!("third arg {:?}", args.number.3); // a literal (Node<'a>). -/// println!("fourth arg {:?}", args.number.4); // an even integer (Node<'a>). -/// if let Some(named) = args.trailing { -/// println!("named arg {:?}", named); // the literal "true" or "false". -/// } -/// for arg in args.rest { -/// println!("trailing arg: {:?}", arg); // trailing number args. -/// } -/// ``` +/// ### Example +/// ``` +/// let (parser, args) = arguments! { [&params] +/// mandatory(1..=3): literal, +/// mandatory(4): number fn(n: ParseNode) { +/// let n = extract_number(n)?; +/// let Ok(n): u32 = n.value.parse() else { +/// return Err("Argument must be an integer."); +/// } +/// if n % 2 == 0 { +/// Ok(()) +/// } else { +/// Err("Integer must be even.") +/// } +/// }, +/// optional("trailing"): literal["true", "false"], +/// rest: number +/// }?; +/// println!("first arg {:?}", args.number.1); // a literal (Node<'a>). +/// println!("second arg {:?}", args.number.2); // a literal (Node<'a>). +/// println!("third arg {:?}", args.number.3); // a literal (Node<'a>). +/// println!("fourth arg {:?}", args.number.4); // an even integer (Node<'a>). +/// if let Some(named) = args.trailing { +/// println!("named arg {:?}", named); // the literal "true" or "false". +/// } +/// for arg in args.rest { +/// println!("trailing arg: {:?}", arg); // trailing number args. +/// } +/// ``` #[proc_macro] pub fn arguments(stream: proc_macro::TokenStream) -> proc_macro::TokenStream { let stream: TokenStream = stream.into(); @@ -261,7 +261,7 @@ pub fn arguments(stream: proc_macro::TokenStream) -> proc_macro::TokenStream { let mut named_values: Vec<TokenStream> = vec![]; for (name, props) in arg_struct.named.iter() { let rust_type = props.rust_type.clone(); - let variable: proc_macro2::TokenStream = name.parse().unwrap(); + let variable: proc_macro2::TokenStream = format!("r#{}", name).parse().unwrap(); named_types.push(rust_type); named_arguments.push(variable); match props.position_type { @@ -321,17 +321,28 @@ pub fn arguments(stream: proc_macro::TokenStream) -> proc_macro::TokenStream { // Generate code for extracting the trailing arguments. let rest_rust_type = arg_struct.rest.rust_type; - let trailing_arguments = quote! { - { - parser.trailing - .iter() - .map(|arg| { - let arg: crate::parse::parser::ParseNode = (*arg).clone(); - let retrieved: #rest_rust_type = arg.try_into().expect("node type-checked but unwrap failed"); - retrieved - }) - .collect() + let has_rest_capture = arg_struct.rest.kind != ArgumentKind::None; + let trailing_arguments = if has_rest_capture { + quote! { + { + parser.trailing + .iter() + .map(|arg| { + let arg: crate::parse::parser::ParseNode = (*arg).clone(); + let retrieved: #rest_rust_type = arg.try_into().expect("node type-checked but unwrap failed"); + retrieved + }) + .collect() + } } + } else { + quote! { () } + }; + + let rest_struct_decl = if has_rest_capture { + quote! {rest: Vec<#rest_rust_type>} + } else { + quote! {rest: ()} }; // Assemble code that builds argument parser context and argument struct. @@ -344,7 +355,7 @@ pub fn arguments(stream: proc_macro::TokenStream) -> proc_macro::TokenStream { struct #struct_name<'a> { number: (#(#tuple_types),*,), #(#named_arguments: #named_types,)* - rest: Vec<#rest_rust_type>, + #rest_struct_decl } let parser_result = crate::parse::macros::ArgParser::new(rules, &node, #params); match parser_result { @@ -459,7 +470,7 @@ fn parse_argument_type(stream: &mut Peekable<IntoIter>, position_type: PositionT "symbol" => (AK::Symbol, quote! { crate::parse::parser::Node<'a> }, quote! { crate::parse::macros::ArgType::Symbol }), "number" => (AK::Number, quote! { crate::parse::parser::Node<'a> }, quote! { crate::parse::macros::ArgType::Number }), "symbolic" => (AK::Symbolic, quote! { crate::parse::parser::Node<'a> }, quote! { crate::parse::macros::ArgType::Symbolic }), - "list" => (AK::List, quote! { Vec<crate::parse::parser::Node<'a>> }, quote! { crate::parse::macros::ArgType::List }), + "list" => (AK::List, quote! { Vec<crate::parse::parser::ParseNode<'a>> }, quote! { crate::parse::macros::ArgType::List }), "any" => (AK::Any, quote! { crate::parse::parser::ParseNode<'a> }, quote! { crate::parse::macros::ArgType::Any }), _ => panic!("Invalid argument type: `{}`", ident), },