seam

Symbolic-Expressions As Markup.
git clone git://git.knutsen.co/seam
Log | Files | Refs | README | LICENSE

commit ceb658bd34143bbebbd6879cd21929301eeee6ae
Author: Demonstrandum <moi@knutsen.co>
Date:   Sun, 21 Jun 2020 04:25:17 +0100

Initial implementation.

Diffstat:
A.gitignore | 1+
ACargo.lock | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ACargo.toml | 19+++++++++++++++++++
AREADME.md | 28++++++++++++++++++++++++++++
Asamples/html-document-1.sex | 10++++++++++
Asamples/json-example.sex | 27+++++++++++++++++++++++++++
Asamples/xml-example-1.sex | 7+++++++
Asamples/xml-example-2.sex | 26++++++++++++++++++++++++++
Asrc/assemble/html.rs | 123+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/assemble/mod.rs | 5+++++
Asrc/bin.rs | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/lib.rs | 27+++++++++++++++++++++++++++
Asrc/parse/expander.rs | 0
Asrc/parse/lexer.rs | 146+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/parse/mod.rs | 7+++++++
Asrc/parse/parser.rs | 148+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/parse/tokens.rs | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest.html | 21+++++++++++++++++++++
Atest.sex | 15+++++++++++++++
19 files changed, 803 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock @@ -0,0 +1,73 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "colored" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4ffc801dacf156c5854b9df4f425a626539c3a6ef7893cc0c5084a23f0b6c59" +dependencies = [ + "atty", + "lazy_static", + "winapi", +] + +[[package]] +name = "hermit-abi" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9586eedd4ce6b3c498bc3b4dd92fc9f11166aa908a914071953768066c67909" +dependencies = [ + "libc", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49" + +[[package]] +name = "seam" +version = "0.1.0" +dependencies = [ + "colored", +] + +[[package]] +name = "winapi" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "seam" +description = "Symbolic Expressions As Markup." +keywords = ["markup", "lisp", "macro", "symbolic-expression", "sexp"] +version = "0.1.0" +authors = ["Demonstrandum <moi@knutsen.co>"] +edition = "2018" + + +[lib] +name = "seam" +path = "src/lib.rs" + +[[bin]] +name = "seam" +path = "src/bin.rs" + +[dependencies] +colored = "1.8" diff --git a/README.md b/README.md @@ -0,0 +1,28 @@ +# SEAM + +> **S**ymbolic **E**xpressions **A**s **M**arkup. + +## Why + +Because all markup is terrible, especially XML/SGML and derivatives. + +But mainly, for easier static markup code generation, such as with +macros, code includes and such. + + +## Try it out + +Mainly this should be used as a library, such as from within a server, +generating HTML (or any other supported markup) before it is served to the +client. + +### Using The Binary + +(Providing you have cloned this repo, and `cd`'d into it) + +```console +cargo run test.sex --html > test.html +``` + +`test.sex` contains your symbolic-expressions, which is used to generate +HTML, saved in `test.html`. diff --git a/samples/html-document-1.sex b/samples/html-document-1.sex @@ -0,0 +1,10 @@ +(!DOCTYPE html) +(html +(head + (title Example HTML Document)) +(body + (p :id hello Hello, World!) + (p something text...) + (h1 "A (big) Header!") + (p Yet some more (span :style "color: red" text)))) + diff --git a/samples/json-example.sex b/samples/json-example.sex @@ -0,0 +1,27 @@ +(uuid "abc-123") +(students [ ;; [...] means an array. + ( + :id 0x000020f1 ;; Style 1 + :name "Tohma" + :gender :male + ) + ;; Parens loosly indicate an object grouping. + ( + (id 0o00023721) ;; Style 2 + (name "Julia") + (gender :female) + ) +] +(techers [ + ((id 123 + (name "Bellers") + (gender :female)))]) + +;; One can imagine that this document is implicitly +;; wrapped in a pair of parenthesis. +;; (This indicates it is an object grouping) +;; If we explicitly use square brackets [...] +;; at top-level, then that's a replacement for the +;; implicit parenthesis. i.e. we've explicitly stated +;; we want an array instead of an object (default). + diff --git a/samples/xml-example-1.sex b/samples/xml-example-1.sex @@ -0,0 +1,7 @@ +(message :status urgernt + (to Tove) + (from Jani) + (heading A reminder \(Again!\) for you) + (body Don't forget me this weekend!)) + + diff --git a/samples/xml-example-2.sex b/samples/xml-example-2.sex @@ -0,0 +1,26 @@ +(breakfast_menu + (food + (name Belgian\(-style\) Waffles) + (price $5.95) + (description + Two of our famous Belgian Waffles + with plenty of real maple syrup.) + (calories 650)) + (food + (name "French(-style) Toast") + (price $4.50) + (description """ + Thick slices made from our homemade + sourdough bread. + """) + (calories 600)) + (food + (name "Home Breakfast") + (price "$6.95") + (desciption + Two eggs, bacon or sausage, toast, + and our ever-popular hash browns.) + (calories "950"))) + + + diff --git a/src/assemble/html.rs b/src/assemble/html.rs @@ -0,0 +1,123 @@ +//! Assembles an expanded tree into valid HTML. +use super::Documentise; +use crate::parse::parser::{ParseNode, ParseTree}; + +use std::fmt::{self, Display}; + +#[derive(Debug, Clone)] +pub struct HTMLFormatter { + pub tree : ParseTree +} + +impl HTMLFormatter { + pub fn new(tree : ParseTree) -> Self { + Self { tree } + } +} + +pub const DEFAULT : &str = + "<!DOCTYPE>\n\ + <html>\n\ + <head></head>\n\ + <body></body>\n\ + </html>"; + +impl Documentise for HTMLFormatter { + fn document(&self) -> String { + // Check if <!DOCTYPE html> exists. + let mut doc = String::new(); + if self.tree.is_empty() { + return String::from(DEFAULT); + } + let mut current_node = &self.tree[0]; + let mut has_declaration = false; + + if let ParseNode::List(list) = &current_node { + if let Some(ParseNode::Symbol(declaration)) = &list.get(0) { + if declaration.value.to_lowercase() == "!doctype" { + has_declaration = true; + } + } + } + + if has_declaration { + current_node = &self.tree[1]; + } else { + doc += "<!DOCTYPE html>" + } + // Check if <html></html> root object exists. + // Check if head exits, if not, make an empty one. + // Check if body exists, if not, make it, and put everything + // in there. + + doc += &self.to_string(); + + doc + } +} + + +// TODO: Convert special characters to HTML compatible ones. +// e.g. +// < => &lt; +// > => &gt; +// & => &amp; +// " => &quot; +// ! => &excl; +// etc. + +/// Converting the tree to an HTML string. +impl Display for HTMLFormatter { + fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { + for node in &self.tree { + match node { + ParseNode::Symbol(node) => write!(f, " {}", node.value)?, + ParseNode::Number(node) => write!(f, " {}", node.value)?, + ParseNode::String(node) => write!(f, " {}", node.value)?, + ParseNode::List(list) => { + let head = list.first(); + let mut tag = ""; + if let Some(head_node) = head { + if let ParseNode::Symbol(head_symbol) = head_node { + tag = &head_symbol.value; + write!(f, "<{}", tag)?; + } else { + // Error, tags can only have symbol values. + } + } else { + // Error, empty tags not supported. + } + + let mut rest = &list[1..]; + + // Declarations behave differently. + if tag.as_bytes()[0] == '!' as u8 { + // TODO: Following can only be symbols. + while !rest.is_empty() { + write!(f, " {}", rest[0])?; + rest = &rest[1..]; + } + write!(f, ">")?; + continue; + } + + while let Some(ParseNode::Attribute(attr)) = rest.first() { + if let Some(atom) = (*attr.node).atomic() { + write!(f, " {}=\"{}\"", attr.keyword, atom.value)?; + rest = &rest[1..]; + } else { + // Error! Cannot be non atomic. + } + } + writeln!(f, ">")?; + + let html_fmt = HTMLFormatter::new(rest.to_owned()); + writeln!(f, "{}", html_fmt)?; + write!(f, "</{}>", tag)?; + }, + _ => write!(f, "hi")?, + } + } + write!(f, "") + } +} diff --git a/src/assemble/mod.rs b/src/assemble/mod.rs @@ -0,0 +1,5 @@ +pub trait Documentise { + fn document(&self) -> String; +} + +pub mod html; diff --git a/src/bin.rs b/src/bin.rs @@ -0,0 +1,66 @@ +use seam; +use seam::assemble::Documentise; + +use std::env; +use std::path::PathBuf; +use std::error::Error; + +use colored::*; + +fn argument_fatal(msg : &str) -> ! { + eprintln!("{} {}", + format!("[{}]", "**".red()).white().bold(), + msg.bold()); + std::process::exit(1) +} + +const SUPPORTED_TARGETS : [&str; 1] = ["html"]; + +fn main() -> Result<(), Box<dyn Error>> { + let (major, minor, tiny) = seam::VERSION; + eprintln!("{}", format!("SEAM v{}.{}.{}", + major, minor, tiny).bold()); + + let mut args = env::args(); + args.next(); // Discard. + + let mut files = Vec::new(); + let mut target = ""; + + for arg in args { + if let Some(opt) = arg.split("--").nth(1) { + if SUPPORTED_TARGETS.contains(&opt) { + target = Box::leak(opt.to_owned().into_boxed_str()); + } + continue; + } + let path = PathBuf::from(&arg); + if path.exists() { + eprintln!("Reading file `{}'.", &path.display()); + files.push(path); + } + } + + if files.is_empty() { + argument_fatal("No input files given."); + } + if target.is_empty() { + argument_fatal("No such target exists / no target given."); + } + + for file in files { + let tree = seam::parse_file(&file)?; + /*eprintln!("{}", &tree + .iter().fold(String::new(), + |acc, s| acc + "\n" + &s.to_string()));*/ + if target == "html" { + let fmt = seam::assemble::html::HTMLFormatter::new(tree); + let result = fmt.document(); + println!("{}", result); + } + } + + eprintln!("All files read and converted."); + + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs @@ -0,0 +1,27 @@ +pub mod parse; +pub mod assemble; + +use parse::{parser, lexer}; + +use std::error::Error; +use std::{fs, path::Path}; + +pub const VERSION : (u8, u8, u8) = (0, 1, 0); + +pub fn parse<P: AsRef<Path>>(string : String, source : Option<P>) + -> Result<parser::ParseTree, Box<dyn Error>> { + let tokens = lexer::lex(string, source)?; + let tree = parser::parse_stream(tokens)?; + Ok(tree) +} + +pub fn parse_file(path : &Path) + -> Result<parser::ParseTree, Box<dyn Error>> { + let contents = fs::read_to_string(&path)?; + parse(contents, Some(&path)) +} + +pub fn main() { + eprintln!("Library main function should not be used."); + std::process::exit(1); +} diff --git a/src/parse/expander.rs b/src/parse/expander.rs diff --git a/src/parse/lexer.rs b/src/parse/lexer.rs @@ -0,0 +1,146 @@ +use super::tokens::{self, Token, TokenStream}; + +use std::path::Path; +use std::{fmt, error::Error}; + +#[derive(Debug, Clone)] +pub struct LexError(Token, String); + +impl fmt::Display for LexError { + fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[**] Lexical Error: `{}'.\nAt: {:#?}", + self.1, self.0) + } +} + +impl Error for LexError { } + +fn character_kind(character : char, prev : Option<tokens::Kind>) + -> Option<tokens::Kind> { + let kind = match character { + '\n' | '\r' | ' ' | '\t' => None, + '(' => Some(tokens::Kind::LParen), + ')' => Some(tokens::Kind::RParen), + '0'..='9' => Some(tokens::Kind::Number), + ':' => Some(tokens::Kind::Keyword), + '"' => Some(tokens::Kind::String), + _ => Some(tokens::Kind::Symbol) + }; + + if prev == Some(tokens::Kind::String) { + if character == '"' { + None + } else { + prev + } + } else { + kind + } +} + +pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>) + -> Result<TokenStream, LexError> { + + let eof = string.len(); + let mut lines : usize = 1; + let mut bytes : usize = 0; + let mut line_bytes : usize = 0; + + let mut accumulator : Vec<u8> = Vec::new(); + let mut tokens = Vec::new(); + + let mut token_start : usize = 0; + let mut current_kind = None; + let mut old_kind = None; + + while bytes < eof { + let current_byte = string.as_bytes()[bytes]; + + if !string.is_char_boundary(bytes) { + accumulator.push(current_byte); + bytes += 1; + line_bytes += 1; + continue; + } + + let character = current_byte as char; + + let mut prev_kind = current_kind; + current_kind = character_kind(character, current_kind); + + let string_start = character == '"' + && prev_kind != Some(tokens::Kind::String); + if string_start { + current_kind = None; + } + + let mut peek_kind = if bytes == eof - 1 { + None + } else { + let peek_char = string.as_bytes()[bytes + 1] as char; + character_kind(peek_char, current_kind) + }; + + let was_lparen = current_kind == Some(tokens::Kind::LParen); + let was_rparen = current_kind == Some(tokens::Kind::RParen); + let peek_rparen = peek_kind == Some(tokens::Kind::RParen); + if was_lparen || was_rparen { + peek_kind = None; + prev_kind = None; + } + if peek_rparen { + peek_kind = None; + } + + if let Some(kind_current) = current_kind { + if prev_kind.is_none() { + old_kind = current_kind; + token_start = line_bytes; + } + accumulator.push(current_byte); + bytes += 1; + line_bytes += 1; + + if peek_kind.is_none() { + let kind = if let Some(kind_old) = old_kind { + kind_old + } else { + kind_current + }; + + let mut span = accumulator.len(); + if kind == tokens::Kind::String { + span += 2; + } + + let value = String::from_utf8(accumulator).unwrap(); + let site = tokens::Site::from_line(lines, token_start, span); + tokens.push(Token::new(kind, value, site)); + accumulator = Vec::new(); + + if was_lparen || peek_rparen || was_rparen { + old_kind = None; + current_kind = None; + token_start = line_bytes; + } + + } + } else { + bytes += 1; + line_bytes += 1; + } + + if character == '\n' { + line_bytes = 0; + token_start = 0; + lines += 1; + } + if string_start { + current_kind = Some(tokens::Kind::String); + old_kind = current_kind; + token_start = line_bytes - 1; + } + } + + Ok(tokens) +} diff --git a/src/parse/mod.rs b/src/parse/mod.rs @@ -0,0 +1,7 @@ +pub mod tokens; + +pub mod lexer; + +pub mod parser; + +pub mod expander; diff --git a/src/parse/parser.rs b/src/parse/parser.rs @@ -0,0 +1,148 @@ +use std::{fmt, error::Error}; +use super::tokens::{self, Kind, Site, Token}; + +#[derive(Debug, Clone)] +pub struct Node { + pub site : Site, + pub value : String +} + +impl Node { + pub fn new(value : &str, site : &Site) -> Self { + Self { + site: site.to_owned(), + value: value.to_owned() + } + } +} + +#[derive(Debug, Clone)] +pub struct AttributeNode { + pub keyword : String, + pub node : Box<ParseNode> +} + +#[derive(Debug, Clone)] +pub enum ParseNode { + Symbol(Node), + Number(Node), + String(Node), + List(Vec<ParseNode>), + Attribute(AttributeNode) +} + +impl ParseNode { + pub fn atomic(&self) -> Option<Node> { + match self { + Self::Symbol(node) + | Self::Number(node) + | Self::String(node) => Some(node.to_owned()), + _ => None + } + } +} + +pub type ParseTree = Vec<ParseNode>; + +#[derive(Debug, Clone)] +pub struct ParseError(pub String, pub Site); + +impl fmt::Display for ParseError { + fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[**] Parse Error: `{}',\nAt: {:#?}", + self.0, self.1) + } +} + +impl Error for ParseError { } + +fn parse_atomic(token : &Token) -> Result<ParseNode, ParseError> { + let node = Node::new(&token.value, &token.site); + match token.kind { + Kind::Symbol => Ok(ParseNode::Symbol(node)), + Kind::String => Ok(ParseNode::String(node)), + Kind::Number => Ok(ParseNode::Number(node)), + _ => Err(ParseError( + String::from("Atomic token not found here."), + token.site.clone())) + } +} + +pub fn parse(tokens : &[Token]) + -> Result<(ParseNode, &[Token]), ParseError> { + let token = &tokens[0]; + match token.kind { + Kind::LParen => { + // Parse list. + let mut slice = &tokens[1..]; + let mut elements = Vec::new(); + let mut token = &slice[0]; + loop { + if slice.is_empty() { + return Err(ParseError( + "Expected `)' (closing parenthesis), got EOF." + .to_owned(), token.site.clone())); + } + token = &slice[0]; + if token.kind == Kind::RParen + { break; } + + let (element, left) = parse(&slice)?; + elements.push(element); + slice = left; + } + slice = &slice[1..]; // Ignore last r-paren. + Ok((ParseNode::List(elements), slice)) + }, + Kind::Keyword => { + // Parse second token, make attribute. + let (node, slice) = parse(&tokens[1..])?; + let attribute = AttributeNode { + keyword: token.value[1..].to_owned(), + node: Box::new(node) + }; + Ok((ParseNode::Attribute(attribute), slice)) + }, + Kind::RParen => { + Err(ParseError("Unexpected `)' (closing parenthesis). \ + Perhaps you forgot an opening parenthesis?".to_owned(), + token.site.clone())) + }, + _ => { // Any atomic tokens. + Ok((parse_atomic(&token)?, &tokens[1..])) + } + } +} + +pub fn parse_stream(tokens: tokens::TokenStream) + -> Result<ParseTree, ParseError> { + let mut tree = Vec::new(); + let mut slice = &tokens[..]; + while !slice.is_empty() { + let (node, next) = parse(slice)?; + tree.push(node); + slice = next; + } + Ok(tree) +} + +/// Pretty printing for parse nodes. +impl fmt::Display for ParseNode { + fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ParseNode::Symbol(node) + | ParseNode::Number(node) => write!(f, "{}", &node.value), + ParseNode::String(node) => write!(f, "\"{}\"", &node.value), + ParseNode::Attribute(attr) => write!(f, ":{} {}", + &attr.keyword, &*attr.node), + ParseNode::List(list) => write!(f, "({}{})", &list[0], + list[1..].iter().fold(String::new(), |acc, elem| { + let nested = elem.to_string().split('\n') + .fold(String::new(), |acc, e| + acc + "\n " + &e); + acc + &nested + })) + } + } +} + diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs @@ -0,0 +1,54 @@ +#[derive(Debug, Clone)] +pub struct Site { + pub source : Option<String>, + pub line : usize, + pub bytes_from_start : usize, + pub bytes_span : usize, +} + +impl Site { + pub fn new(source : String, line : usize, + bytes_from_start : usize, + bytes_span : usize) -> Self { + Self { + source: Some(source), + line, bytes_from_start, + bytes_span + } + } + + pub fn from_line(line : usize, + bytes_from_start : usize, + bytes_span : usize) -> Self { + Self { + source: None, + line, bytes_from_start, + bytes_span + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Kind { + LParen, + RParen, + Symbol, + String, + Number, + Keyword, +} + +#[derive(Debug, Clone)] +pub struct Token { + pub kind : Kind, + pub value : String, + pub site : Site, +} + +impl Token { + pub fn new(kind : Kind, value : String, site : Site) -> Self { + Self { kind, value, site } + } +} + +pub type TokenStream = Vec<Token>; diff --git a/test.html b/test.html @@ -0,0 +1,21 @@ +<!DOCTYPE html><html> +<head> +<title> + Example HTML Document +</title> +</head><body> +<p id="hello"> + Hello, World! +</p><p> + something something text... +</p><h1> + A (big) Header! +</h1><p> + Yet some more<span style="color: red"> + text +</span> <3 +</p><img alt="Cute Cat" src="https://static.insider.com/image/5d24d6b921a861093e71fef3.jpg" width="300"> + +</img> +</body> +</html> diff --git a/test.sex b/test.sex @@ -0,0 +1,15 @@ +(!DOCTYPE html) +(html + (head + (title Example HTML Document)) + (body + (p :id hello Hello, World!) + (p something something text...) + (h1 "A (big) Header!") + (p Yet some more + (span :style "color: red" text) <3) + (img + :alt "Cute Cat" + :src "https://static.insider.com/image/5d24d6b921a861093e71fef3.jpg" + :width 300))) +