valhallac

Compiler for set-theoretic programming language.
git clone git://git.knutsen.co/valhallac
Log | Files | Refs | README | LICENSE

commit 5f94308ea77cdb81118b9eb1be4cf577a8fb33f2
parent b73354ec6147294a693bf08e5388741a8a16a9d7
Author: Demonstrandum <moi@knutsen.co>
Date:   Sun, 21 Jul 2019 17:49:24 +0100

Currying OPs and comments.

Diffstat:
MREADME.md | 12++++++------
Asamples/currying_infix.vh | 4++++
Msrc/syntax/ast.rs | 30+++++++++++++++++++-----------
Msrc/syntax/lexer.rs | 74++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Msrc/syntax/parser.rs | 26+++++++++++++++++++++++---
Mtest.vh | 7++++---
6 files changed, 104 insertions(+), 49 deletions(-)

diff --git a/README.md b/README.md @@ -9,18 +9,18 @@ bytecode compilation) which understands the syntax and semantics, as well as doing static type analysis and code optimisation. The generated AST is then compiled to Brokkr bytecode. -The execution of the subsequential bytecode -is handled by the langauge's VM (virtual machine) called -Brokkr, which exists seperately. +The execution of the subsequent bytecode +is handled by the language's VM (virtual machine) called +Brokkr, which exists separately. Valhalla is a set theoretic programming language. -That's to say, it's based on priciples from set theory, +That's to say, it's based on principles from set theory, in a way that all types are just sets, and hence everything is just an element of a set. The language is meant to give a new way to think about types, and provides an intuitive way to think about types. It may also be used to verify proofs and such about set theory. -The language is a general purpose, but intead of being all OOP, +The language is a general purpose, but instead of being all OOP, or functional, etc., it's just set theory based. From what I've -gathered, it's not a very popular paradigme. +gathered, it's not a very popular paradigm. diff --git a/samples/currying_infix.vh b/samples/currying_infix.vh @@ -0,0 +1,3 @@ +(2 + 3) -- Eq. of: ((2 +) 3), can write: ((+ 3) 2) +(2 +) +(+ 3) -- Partial application+ \ No newline at end of file diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs @@ -139,6 +139,8 @@ pub struct BlockNode { pub statements : Vec<Nodes> } +pub struct EmptyNode; + /// All node types. pub enum Nodes { Ident(IdentNode), @@ -146,7 +148,8 @@ pub enum Nodes { Str(StrNode), Sym(SymNode), Call(CallNode), - Block(BlockNode) + Block(BlockNode), + Empty(EmptyNode), } @@ -161,6 +164,7 @@ impl fmt::Display for Nodes { "%call{{\n :callee ({})\n :operands [|\n {}\n |]\n}}", node.callee, node.operands.iter().map(Nodes::to_string).collect::<Vec<String>>().join("\n ")), Nodes::Block(node) => format!("%block{{ ... }}"), + Nodes::Empty(_) => String::from("()"), }; write!(f, "{}", printable) } @@ -190,8 +194,8 @@ impl Nodes { Nodes::Num(_) => true, Nodes::Str(_) => true, Nodes::Sym(_) => true, - Nodes::Call(_) => false, - Nodes::Block(_) => false, + Nodes::Empty(_) => true, + _ => false } } } @@ -224,6 +228,10 @@ impl CallNode { } } +impl EmptyNode { + pub fn new() -> Nodes { Nodes::Empty(EmptyNode { }) } +} + /// Root branch of the AST. pub struct Root { pub branches : Vec<Nodes> @@ -240,17 +248,17 @@ const TAB : &str = " "; pub fn pretty_print(node : &Nodes, depth : usize) -> String { let tab = TAB.repeat(depth); let printable = match node { - Nodes::Ident(_) => format!("{}{}", tab, node), - Nodes::Num(_) => format!("{}{}", tab, node), - Nodes::Str(_) => format!("{}{}", tab, node), - Nodes::Sym(_) => format!("{}{}", tab, node), - Nodes::Call(n) => format!( - "{tab}%call{{\n{tab}{T}:callee (\n{calling}\n{tab}{T})\n{tab}{T}:operands [|\n{ops}\n{tab}{T}|]\n{tab}}}", + Nodes::Call(n) => format!( + "{tab}%call{{\n{tab}{T}:callee (\n{calling}\n{tab}{T})\n{tab}{T}:operand [|{op}|]\n{tab}}}", tab=tab, T=TAB, calling=pretty_print(&*n.callee, depth + 2), - ops=n.operands.iter().map(|e| pretty_print(e, depth + 2)).collect::<Vec<String>>().join("\n") + op=(if n.operands.is_empty() { String::from(" ") } else { format!( + "\n{ops}\n{tab}{T}", + ops=pretty_print(&n.operands[0], depth + 2), + tab=tab, T=TAB) }) ), - Nodes::Block(n) => format!("%block{{ ... }}"), + Nodes::Block(n) => format!("%block{{ ... }}"), + _ => format!("{}{}", tab, node) }; printable } diff --git a/src/syntax/lexer.rs b/src/syntax/lexer.rs @@ -49,13 +49,13 @@ lazy_static! { macro_rules! try_match { ($stream:expr, $partial:expr, $reg:expr, $token_type:expr, - $current_char:expr, $line:expr, $col:expr) => { + $current_char_ptr:expr, $line:expr, $col:expr) => { if let Some(matched) = $reg.first_match($partial) { let span = matched.width() as u32; $stream.push(Token::new( $token_type, &matched, location::new($line, $col, span))); - $current_char += matched.len(); + $current_char_ptr += matched.len(); $col += span; continue; } @@ -67,42 +67,64 @@ macro_rules! try_match { pub fn lex(string : &str) -> Vec<Token> { let mut token_stream : Vec<Token> = Vec::new(); - let mut current_char = 0; + let mut current_char_ptr = 0; let string_size = string.bytes().count(); let mut partial : &str; let mut line = 1; let mut col = 1; - while current_char < string_size { - if let Some(slice) = &string.get(current_char..) { + // Step through + while current_char_ptr < string_size { + // Align to character boundary. + if let Some(slice) = &string.get(current_char_ptr..) { partial = slice; } else { // Not on boundary yet. - current_char += 1; + current_char_ptr += 1; continue; } - let maybe_vec = &partial.get(0..2).unwrap_or(""); - let vec_brack = match maybe_vec { - &"[|" => Some(TokenType::LVec), - &"|]" => Some(TokenType::RVec), + + let two_chars = partial.get(0..2).unwrap_or("\0\0"); + + // Consume EON comment: + if two_chars.chars().nth(0).unwrap() == '#' || two_chars == "--" { + let old_char_ptr = current_char_ptr; + current_char_ptr += if two_chars == "--" { 2 } else { 1 }; + loop { + let current_char = string.bytes().nth(current_char_ptr).unwrap_or(b'\0'); + if current_char == b'\n' || current_char == b'\0' { + break; + } + current_char_ptr += 1; + } + col += string.get(old_char_ptr..current_char_ptr) + .expect("Comment ended or started not on char boundary.") + .width() as u32; + + continue; + } + + let vec_brack = match two_chars { + "[|" => Some(TokenType::LVec), + "|]" => Some(TokenType::RVec), _ => None }; if let Some(tt) = vec_brack { token_stream.push(Token::new( - tt, maybe_vec, + tt, two_chars, location::new(line, col, 2))); col += 2; - current_char += 2; + current_char_ptr += 2; continue; } - if *maybe_vec == ": " { + if two_chars == ": " { token_stream.push(Token::new( TokenType::Op, ":", location::new(line, col, 1))); col += 2; - current_char += 2; + current_char_ptr += 2; continue; } @@ -130,7 +152,7 @@ pub fn lex(string : &str) -> Vec<Token> { } else { col += 1; } - current_char += 1; + current_char_ptr += 1; continue; } @@ -143,7 +165,7 @@ pub fn lex(string : &str) -> Vec<Token> { while !eos { // Spaghet if let Some(character) = partial.chars().nth(i) { if character == '"' { - current_char += 1; + current_char_ptr += 1; col += 1; eos = true; } else if character == '\\' { @@ -156,10 +178,10 @@ pub fn lex(string : &str) -> Vec<Token> { 'b' => String::from("\x08"), '0' => String::from("\0"), 'x' => { - if let Some(code) = partial.get((current_char + 2)..(current_char + 4)) { + if let Some(code) = partial.get((current_char_ptr + 2)..(current_char_ptr + 4)) { i += 2; col += 2; - current_char += 2; + current_char_ptr += 2; (u8::from_str_radix(code, 16).expect("Malformed hex.") as char).to_string() } else { String::new() } } @@ -167,7 +189,7 @@ pub fn lex(string : &str) -> Vec<Token> { }; i += 1; col += 1; - current_char += 1; + current_char_ptr += 1; contents.push_str(&escaped); continue; } else { @@ -178,7 +200,7 @@ pub fn lex(string : &str) -> Vec<Token> { contents.push(character); i += 1; col += character.width().unwrap_or(2) as u32; - current_char += character.len_utf8(); + current_char_ptr += character.len_utf8(); continue; } } else { @@ -186,7 +208,7 @@ pub fn lex(string : &str) -> Vec<Token> { // Error: Unexpected EOS! } i += 1; - current_char += 1; + current_char_ptr += 1; col += 1; } token_stream.push(Token::new( @@ -197,21 +219,21 @@ pub fn lex(string : &str) -> Vec<Token> { try_match!(token_stream, partial, NUM, TokenType::Num, - current_char, line, col); + current_char_ptr, line, col); try_match!(token_stream, partial, OP, TokenType::Op, - current_char, line, col); + current_char_ptr, line, col); try_match!(token_stream, partial, IDENT, TokenType::Ident, - current_char, line, col); + current_char_ptr, line, col); try_match!(token_stream, partial, SYM, TokenType::Sym, - current_char, line, col); + current_char_ptr, line, col); - current_char += 1; + current_char_ptr += 1; if partial.is_char_boundary(0) { col += 1 } } diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs @@ -49,10 +49,19 @@ impl ParseEnvironment { fn null_den(&mut self, token : &Token) -> Nodes { match token.class { TokenType::Ident => ast::IdentNode::new(&token.string), - TokenType::Op => { // Prefix Op. + TokenType::Op => { let is_op = self.optable.exists(&token.string); if is_op { - return ast::CallNode::new(ast::IdentNode::new(&token.string), vec![self.expr(300)]); + return match self.stream[0].class { + TokenType::RParen => { + ast::CallNode::new(ast::IdentNode::new(&token.string), vec![]) + }, + _ => ast::CallNode::new( + ast::CallNode::new( + ast::IdentNode::new(&token.string), + vec![]), + vec![self.expr(500)]) + }; } issue!(err::Types::ParseError, self.file, token, "`{}` is not an operator.", token.string); @@ -60,6 +69,13 @@ impl ParseEnvironment { TokenType::Num => ast::NumNode::new(&*token.string), TokenType::Str => ast::StrNode::new(&token.string), TokenType::LParen => { + let current = self.stream.get(0); + if current.is_none() || current.unwrap().class == TokenType::EOF { + self.expect(TokenType::RParen, current) + } else if current.unwrap().class == TokenType::RParen { + self.stream.remove(0); + return ast::EmptyNode::new(); + } let expr = self.expr(0); self.expect(TokenType::RParen, self.stream.get(0)); self.stream.remove(0); @@ -93,8 +109,12 @@ impl ParseEnvironment { } fn left_den(&mut self, left : Nodes, op : operators::Operator) -> Nodes { + let first_appl = ast::CallNode::new(ast::IdentNode::new(op.name), vec![left]); + if self.stream[0].class == TokenType::RParen { + return first_appl; + } let right = self.expr(op.precedence - (if op.is_right() { 1 } else { 0 })); - ast::CallNode::new(ast::IdentNode::new(op.name), vec![left, right]) + ast::CallNode::new(first_appl, vec![right]) } fn expect(&self, tt : TokenType, maybe_t : Option<&Token>) { diff --git a/test.vh b/test.vh @@ -1,2 +1,3 @@ -f : A -> B -> C -a = n + 3- \ No newline at end of file +(2 + 3) -- Eq. of: ((2 +) 3), can write: ((+ 3) 2) +(2 +) +(+ 3) -- Partial application+ \ No newline at end of file